Full Code of Const-me/Whisper for AI

master 306aadd1fce4 cached
485 files
3.3 MB
899.8k tokens
3982 symbols
1 requests
Download .txt
Showing preview only (3,590K chars total). Download the full file or copy to clipboard to get everything.
Repository: Const-me/Whisper
Branch: master
Commit: 306aadd1fce4
Files: 485
Total size: 3.3 MB

Directory structure:
gitextract_nzqawnur/

├── .gitignore
├── ComLightLib/
│   ├── ComLightLib.vcxproj
│   ├── ComLightLib.vcxproj.filters
│   ├── Exception.hpp
│   ├── Readme.txt
│   ├── client/
│   │   └── CComPtr.hpp
│   ├── comLightClient.h
│   ├── comLightCommon.h
│   ├── comLightServer.h
│   ├── hresult.h
│   ├── pal/
│   │   ├── guiddef.h
│   │   └── hresult.h
│   ├── server/
│   │   ├── Object.hpp
│   │   ├── ObjectRoot.hpp
│   │   ├── RefCounter.hpp
│   │   ├── freeThreadedMarshaller.cpp
│   │   ├── freeThreadedMarshaller.h
│   │   └── interfaceMap.h
│   ├── streams.h
│   ├── unknwn.h
│   └── utils/
│       ├── guid_parse.hpp
│       └── typeTraits.hpp
├── ComputeShaders/
│   ├── ComputeShaders.cpp
│   ├── ComputeShaders.vcxproj
│   ├── ComputeShaders.vcxproj.filters
│   ├── Readme.txt
│   ├── add.hlsl
│   ├── addInPlace.hlsl
│   ├── addRepeat.hlsl
│   ├── addRepeat64.hlsl
│   ├── addRepeatEx.hlsl
│   ├── addRepeatGelu.hlsl
│   ├── addRepeatGelu64.hlsl
│   ├── addRepeatScale.hlsl
│   ├── addRows.hlsl
│   ├── componentwiseBinaryOp.hlsli
│   ├── convolutionMain.hlsl
│   ├── convolutionMain2.hlsl
│   ├── convolutionMain2Fixed.hlsl
│   ├── convolutionPrep1.hlsl
│   ├── convolutionPrep2.hlsl
│   ├── copyConvert.hlsl
│   ├── copyTranspose.hlsl
│   ├── dbgFindNaN.hlsl
│   ├── diagMaskInf.hlsl
│   ├── flashAttention.hlsl
│   ├── flashAttentionCommon.hlsli
│   ├── flashAttentionCompat1.hlsl
│   ├── flashAttentionCompat2.hlsl
│   ├── flashAttentionCompat3.hlsl
│   ├── fmaRepeat1.hlsl
│   ├── fmaRepeat164.hlsl
│   ├── fmaRepeat2.hlsl
│   ├── fp64Utils.hlsli
│   ├── groupReduce.hlsli
│   ├── groupReduce64.hlsli
│   ├── matReshapePanels.hlsl
│   ├── miscUtils.hlsli
│   ├── mulMatByRow.hlsl
│   ├── mulMatByRow64.hlsl
│   ├── mulMatByRowTiled.hlsl
│   ├── mulMatByRowTiledEx.hlsl
│   ├── mulMatByScalar.hlsl
│   ├── mulMatDotMain.hlsl
│   ├── mulMatDotReshape.hlsl
│   ├── mulMatMadMain.hlsl
│   ├── mulMatTiled.hlsl
│   ├── mulMatTiledEx.hlsl
│   ├── norm.hlsl
│   ├── normCompat.hlsl
│   ├── normFixed.hlsl
│   ├── normFixed64.hlsl
│   ├── repeatUtils.hlsli
│   ├── scaleInPlace.hlsl
│   ├── softMax.hlsl
│   ├── softMax64.hlsl
│   ├── softMaxCompat.hlsl
│   ├── softMaxFixed.hlsl
│   ├── softMaxLong.hlsl
│   └── zeroMemory.hlsl
├── Examples/
│   ├── MicrophoneCS/
│   │   ├── CaptureThread.cs
│   │   ├── CommandLineArgs.cs
│   │   ├── MicrophoneCS.cs
│   │   ├── MicrophoneCS.csproj
│   │   ├── Readme.txt
│   │   └── TranscribeCallbacks.cs
│   ├── OldMain/
│   │   ├── OldMain.vcxproj
│   │   ├── OldMain.vcxproj.filters
│   │   ├── Readme.txt
│   │   ├── Utils/
│   │   │   ├── Logger.cpp
│   │   │   └── Logger.h
│   │   ├── dr_wav.h
│   │   └── main.cpp
│   ├── TranscribeCS/
│   │   ├── AnsiCodes.cs
│   │   ├── CommandLineArgs.cs
│   │   ├── Readme.txt
│   │   ├── Transcribe.cs
│   │   ├── TranscribeCS.cs
│   │   └── TranscribeCS.csproj
│   ├── WhisperDesktop/
│   │   ├── AppState.cpp
│   │   ├── AppState.h
│   │   ├── CaptureDlg.cpp
│   │   ├── CaptureDlg.h
│   │   ├── CircleIndicator.cpp
│   │   ├── CircleIndicator.h
│   │   ├── LoadModelDlg.cpp
│   │   ├── LoadModelDlg.h
│   │   ├── ModelAdvancedDlg.cpp
│   │   ├── ModelAdvancedDlg.h
│   │   ├── Readme.txt
│   │   ├── Resource.h
│   │   ├── TranscribeDlg.cpp
│   │   ├── TranscribeDlg.h
│   │   ├── Utils/
│   │   │   ├── DebugConsole.cpp
│   │   │   ├── DebugConsole.h
│   │   │   ├── LanguageDropdown.cpp
│   │   │   ├── LanguageDropdown.h
│   │   │   ├── PendingState.cpp
│   │   │   ├── PendingState.h
│   │   │   ├── TranslateCheckbox.cpp
│   │   │   ├── TranslateCheckbox.h
│   │   │   ├── WTL/
│   │   │   │   ├── MS-PL.txt
│   │   │   │   ├── ReadMe.html
│   │   │   │   ├── atlapp.h
│   │   │   │   ├── atlcrack.h
│   │   │   │   ├── atlctrls.h
│   │   │   │   ├── atlddx.h
│   │   │   │   ├── atlgdi.h
│   │   │   │   ├── atlres.h
│   │   │   │   ├── atluser.h
│   │   │   │   └── atlwinx.h
│   │   │   ├── logger.cpp
│   │   │   ├── logger.h
│   │   │   ├── miscUtils.cpp
│   │   │   └── miscUtils.h
│   │   ├── WhisperDesktop.cpp
│   │   ├── WhisperDesktop.manifest
│   │   ├── WhisperDesktop.rc
│   │   ├── WhisperDesktop.vcxproj
│   │   ├── WhisperDesktop.vcxproj.filters
│   │   ├── framework.h
│   │   ├── stdafx.cpp
│   │   ├── stdafx.h
│   │   ├── targetver.h
│   │   └── useDiscreteGpu.c
│   └── main/
│       ├── Readme.txt
│       ├── main.cpp
│       ├── main.vcxproj
│       ├── main.vcxproj.filters
│       ├── miscUtils.cpp
│       ├── miscUtils.h
│       ├── params.cpp
│       ├── params.h
│       ├── textWriter.cpp
│       └── textWriter.h
├── LICENSE
├── Readme.md
├── SampleClips/
│   ├── Readme.txt
│   ├── columbia-large-1080ti.txt
│   ├── columbia-large-1650.txt
│   ├── columbia-large-vega7.txt
│   ├── columbia-large-vega8.txt
│   ├── columbia-medium-1080ti.txt
│   ├── columbia-medium-1650.txt
│   ├── columbia-medium-vega7.txt
│   ├── columbia-medium-vega8.txt
│   ├── columbia.wma
│   ├── jfk-large-1080ti.txt
│   ├── jfk-large-1650.txt
│   ├── jfk-large-vega7.txt
│   ├── jfk-large-vega8.txt
│   ├── jfk-medium-1080ti.txt
│   ├── jfk-medium-1650.txt
│   ├── jfk-medium-vega7.txt
│   ├── jfk-medium-vega8.txt
│   └── summary.tsv
├── Tools/
│   ├── CompressShaders/
│   │   ├── Cabinet.cs
│   │   ├── CompressShaders.cs
│   │   ├── CompressShaders.csproj
│   │   ├── DetectFp64.cs
│   │   ├── LZ4.cs
│   │   ├── LanguageCodes.cs
│   │   ├── Readme.txt
│   │   └── ShaderNames.cs
│   ├── CompressTables/
│   │   ├── CompressTables.cs
│   │   └── CompressTables.csproj
│   ├── PerfSummary/
│   │   ├── LogParser.cs
│   │   ├── PerfSummary.cs
│   │   ├── PerfSummary.csproj
│   │   └── Summary.cs
│   ├── compareTraces/
│   │   ├── CommandLineArgs.cpp
│   │   ├── CommandLineArgs.h
│   │   ├── Readme.txt
│   │   ├── TraceReader.cpp
│   │   ├── TraceReader.h
│   │   ├── compare.cpp
│   │   ├── compare.h
│   │   ├── compareTraces.cpp
│   │   ├── compareTraces.vcxproj
│   │   ├── compareTraces.vcxproj.filters
│   │   ├── stdafx.cpp
│   │   ├── stdafx.h
│   │   └── testUtils.cpp
│   └── copy-binaries.cmd
├── Whisper/
│   ├── API/
│   │   ├── MfStructs.h
│   │   ├── Readme.txt
│   │   ├── SpecialTokens.h
│   │   ├── TranscribeStructs.h
│   │   ├── iContext.cl.h
│   │   ├── iContext.h
│   │   ├── iMediaFoundation.cl.h
│   │   ├── iMediaFoundation.h
│   │   ├── iTranscribeResult.cl.h
│   │   ├── iTranscribeResult.h
│   │   ├── loggerApi.h
│   │   ├── sFullParams.h
│   │   ├── sLanguageList.h
│   │   ├── sLoadModelCallbacks.h
│   │   ├── sModelSetup.h
│   │   ├── whisperComLight.h
│   │   └── whisperWindows.h
│   ├── CPU/
│   │   ├── BufferAllocator.cpp
│   │   ├── BufferAllocator.h
│   │   ├── DecoderTensors.cpp
│   │   ├── DecoderTensors.h
│   │   ├── HybridLoader.cpp
│   │   ├── HybridLoader.h
│   │   ├── KvTensors.h
│   │   ├── KvTensorsCpu.cpp
│   │   ├── LargeBuffer.cpp
│   │   ├── LargeBuffer.h
│   │   ├── MlContext.h
│   │   ├── MlContextCpu.cpp
│   │   ├── ParallelForRunner.cpp
│   │   ├── ParallelForRunner.h
│   │   ├── Readme.txt
│   │   ├── Tensor.h
│   │   ├── TensorCpu.cpp
│   │   ├── mulMat.cpp
│   │   ├── mulMat.h
│   │   ├── mulMat.kernel.hpp
│   │   ├── mulMatImpl.avx2.cpp
│   │   ├── mulMatImpl.cpp
│   │   ├── mulMatImpl.h
│   │   ├── mulMatImpl.panel.cpp
│   │   ├── mulMatUtils.hpp
│   │   ├── simdUtils.cpp
│   │   └── simdUtils.h
│   ├── D3D/
│   │   ├── Binder.cpp
│   │   ├── Binder.h
│   │   ├── MappedResource.cpp
│   │   ├── MappedResource.h
│   │   ├── RenderDoc/
│   │   │   ├── renderDoc.cpp
│   │   │   ├── renderDoc.h
│   │   │   └── renderdoc_app.h
│   │   ├── createBuffer.cpp
│   │   ├── createBuffer.h
│   │   ├── createDevice.cpp
│   │   ├── createDevice.h
│   │   ├── device.h
│   │   ├── downloadBuffer.cpp
│   │   ├── downloadBuffer.h
│   │   ├── enums.cpp
│   │   ├── enums.h
│   │   ├── listGPUs.cpp
│   │   ├── listGPUs.h
│   │   ├── sGpuInfo.h
│   │   ├── shaderNames.cpp
│   │   ├── shaderNames.h
│   │   ├── shaders.cpp
│   │   └── shaders.h
│   ├── DllMain.cpp
│   ├── Hybrid/
│   │   ├── HybridContext.cpp
│   │   ├── HybridContext.h
│   │   ├── KeyValueDownloader.cpp
│   │   ├── KeyValueDownloader.h
│   │   └── Readme.txt
│   ├── MF/
│   │   ├── AudioBuffer.cpp
│   │   ├── AudioBuffer.h
│   │   ├── AudioCapture.cpp
│   │   ├── AudioCapture.h
│   │   ├── MediaFoundation.cpp
│   │   ├── PcmReader.cpp
│   │   ├── PcmReader.h
│   │   ├── loadAudioFile.cpp
│   │   ├── loadAudioFile.h
│   │   ├── mfStartup.cpp
│   │   ├── mfStartup.h
│   │   ├── mfUtils.cpp
│   │   └── mfUtils.h
│   ├── ML/
│   │   ├── ConstantBuffer.cpp
│   │   ├── ConstantBuffer.h
│   │   ├── Context.ops.cpp
│   │   ├── DbgNanTest.cpp
│   │   ├── DbgNanTest.h
│   │   ├── Device.cpp
│   │   ├── Device.h
│   │   ├── LookupTables.cpp
│   │   ├── LookupTables.h
│   │   ├── LookupTablesData.cpp
│   │   ├── LookupTablesData.h
│   │   ├── LookupTablesData.inl
│   │   ├── MlContext.cpp
│   │   ├── MlContext.dbg.cpp
│   │   ├── MlContext.h
│   │   ├── Reshaper.cpp
│   │   ├── Reshaper.h
│   │   ├── TempBuffers.cpp
│   │   ├── TempBuffers.h
│   │   ├── Tensor.cpp
│   │   ├── Tensor.h
│   │   ├── TensorEx.cpp
│   │   ├── TensorEx.h
│   │   ├── TensorGpuViews.cpp
│   │   ├── TensorGpuViews.h
│   │   ├── TensorShape.cpp
│   │   ├── TensorShape.h
│   │   ├── TensorsArena.cpp
│   │   ├── TensorsArena.h
│   │   ├── mlUtils.cpp
│   │   ├── mlUtils.h
│   │   ├── reshapedMultiply.h
│   │   ├── tensorOpsTests.cpp
│   │   ├── tensorOpsTests.h
│   │   ├── testUtils.cpp
│   │   ├── testUtils.h
│   │   └── testUtilsC.h
│   ├── Readme.txt
│   ├── Resource.rc
│   ├── Utils/
│   │   ├── CpuProfiler.cpp
│   │   ├── CpuProfiler.h
│   │   ├── DelayExecution.cpp
│   │   ├── DelayExecution.h
│   │   ├── GpuProfiler.cpp
│   │   ├── GpuProfiler.h
│   │   ├── GpuProfilerSimple.h
│   │   ├── LZ4/
│   │   │   ├── LICENSE
│   │   │   ├── lz4.c
│   │   │   └── lz4.h
│   │   ├── Logger.cpp
│   │   ├── Logger.h
│   │   ├── MurmurHash3.cpp
│   │   ├── MurmurHash3.h
│   │   ├── ProfileCollection.cpp
│   │   ├── ProfileCollection.h
│   │   ├── ReadStream.h
│   │   ├── Trace/
│   │   │   ├── TraceStructures.cpp
│   │   │   ├── TraceStructures.h
│   │   │   ├── TraceWriter.cpp
│   │   │   ├── TraceWriter.h
│   │   │   ├── tracing.cpp
│   │   │   └── tracing.h
│   │   ├── miscUtils.cpp
│   │   ├── miscUtils.h
│   │   ├── parallelFor.cpp
│   │   └── parallelFor.h
│   ├── Whisper/
│   │   ├── ContextImpl.capture.cpp
│   │   ├── ContextImpl.cpp
│   │   ├── ContextImpl.diarize.cpp
│   │   ├── ContextImpl.h
│   │   ├── ContextImpl.misc.cpp
│   │   ├── DecoderInputBuffers.cpp
│   │   ├── DecoderInputBuffers.h
│   │   ├── DecoderResultBuffer.cpp
│   │   ├── DecoderResultBuffer.h
│   │   ├── KeyValueBuffers.cpp
│   │   ├── KeyValueBuffers.h
│   │   ├── Languages.cpp
│   │   ├── Languages.h
│   │   ├── MelInputTensor.cpp
│   │   ├── MelInputTensor.h
│   │   ├── MelStreamer.cpp
│   │   ├── MelStreamer.h
│   │   ├── ModelBuffers.clone.cpp
│   │   ├── ModelBuffers.cpp
│   │   ├── ModelBuffers.h
│   │   ├── ModelImpl.cpp
│   │   ├── ModelImpl.h
│   │   ├── ModelLoader.h
│   │   ├── Spectrogram.cpp
│   │   ├── Spectrogram.h
│   │   ├── TranscribeResult.h
│   │   ├── Vocabulary.cpp
│   │   ├── Vocabulary.h
│   │   ├── WhisperContext.cpp
│   │   ├── WhisperContext.h
│   │   ├── WhisperModel.cpp
│   │   ├── WhisperModel.h
│   │   ├── audioConstants.h
│   │   ├── iSpectrogram.h
│   │   ├── languageCodez.inl
│   │   ├── languageCodez.tsv
│   │   ├── loaderUtils.h
│   │   ├── melSpectrogram.cpp
│   │   ├── melSpectrogram.h
│   │   ├── sEncodeParams.h
│   │   ├── sModelParams.h
│   │   ├── sTokenData.h
│   │   ├── voiceActivityDetection.cpp
│   │   └── voiceActivityDetection.h
│   ├── Whisper.vcxproj
│   ├── Whisper.vcxproj.filters
│   ├── misc.natvis
│   ├── modelFactory.cpp
│   ├── modelFactory.h
│   ├── resource.h
│   ├── source/
│   │   ├── LICENSE
│   │   ├── Readme.txt
│   │   ├── ggml.c
│   │   ├── ggml.h
│   │   ├── whisper.cpp
│   │   └── whisper.h
│   ├── source.compat/
│   │   ├── Readme.txt
│   │   ├── convertThings.cpp
│   │   ├── convertThings.h
│   │   └── ggmlMsvc.c
│   ├── stdafx.cpp
│   ├── stdafx.h
│   ├── whisper.def
│   └── whisperCom.cpp
├── WhisperCpp.sln
├── WhisperNet/
│   ├── API/
│   │   ├── CaptureDeviceId.cs
│   │   ├── Parameters.cs
│   │   ├── SpecialTokens.cs
│   │   ├── eCaptureStatus.cs
│   │   ├── eGpuModelFlags.cs
│   │   ├── eLanguage.cs
│   │   ├── eLogLevel.cs
│   │   ├── eModelImplementation.cs
│   │   ├── eResultFlags.cs
│   │   ├── eSpeakerChannel.cs
│   │   ├── iAudioBuffer.cs
│   │   ├── iAudioReader.cs
│   │   ├── iMediaFoundation.cs
│   │   ├── iModel.cs
│   │   └── sCaptureParams.cs
│   ├── AssemblyInfo.cs
│   ├── AssemblyTitle.cs
│   ├── Callbacks.cs
│   ├── CaptureCallbacks.cs
│   ├── Context.cs
│   ├── ExtensionMethods.cs
│   ├── Internal/
│   │   ├── NativeLogger.cs
│   │   ├── iContext.cs
│   │   ├── iTranscribeResult.cs
│   │   ├── sCaptureCallbacks.cs
│   │   ├── sCaptureDevice.cs
│   │   ├── sFullParams.cs
│   │   ├── sLoadModelCallbacks.cs
│   │   ├── sLoggerSetup.cs
│   │   ├── sModelSetup.cs
│   │   └── sProgressSink.cs
│   ├── Library.cs
│   ├── Readme.md
│   ├── WhisperNet.csproj
│   └── WhisperNet.nuspec
└── WhisperPS/
    ├── Commands/
    │   ├── ExportBase.cs
    │   ├── ExportSubrip.cs
    │   ├── ExportText.cs
    │   ├── ExportWebVtt.cs
    │   ├── FormatSegments.cs
    │   ├── ListAdapters.cs
    │   ├── LoadModel.cs
    │   ├── TranscribeBase.cs
    │   └── TranscribeFile.cs
    ├── Internal/
    │   ├── MarshalEx.cs
    │   ├── NativeLogger.cs
    │   ├── iTranscribeResult.cs
    │   ├── sCaptureDevice.cs
    │   ├── sFullParams.cs
    │   ├── sLoadModelCallbacks.cs
    │   ├── sModelSetup.cs
    │   └── sProgressSink.cs
    ├── Library.cs
    ├── Properties/
    │   └── AssemblyTitle.cs
    ├── Readme.md
    ├── Types/
    │   ├── Model.cs
    │   ├── Segment.cs
    │   └── Transcription.cs
    ├── Utils/
    │   ├── CommandLogger.cs
    │   └── MiscUtils.cs
    ├── WhisperPS.csproj
    ├── WhisperPS.psd1
    ├── app.config
    └── packages.config

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.vs/
ComLightLib/x64/
Whisper/x64/
x64/
Tools/CompressShaders/bin/
Tools/CompressShaders/obj/
Whisper/D3D/shaderData-Debug.inl
Whisper/D3D/shaderData-Release.inl
WhisperNet/bin/
WhisperNet/obj/
Examples/TranscribeCS/bin/
Examples/TranscribeCS/obj/
*.aps
*.json
*.user
Examples/MicrophoneCS/obj/
Examples/MicrophoneCS/bin/
Tools/PerfSummary/bin/
Tools/PerfSummary/obj/
packages/
WhisperPS/obj/
WhisperPS/bin/
Tools/CompressTables/bin/
Tools/CompressTables/obj/

================================================
FILE: ComLightLib/ComLightLib.vcxproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="comLightClient.h" />
    <ClInclude Include="client\CComPtr.hpp" />
    <ClInclude Include="comLightServer.h" />
    <ClInclude Include="comLightCommon.h" />
    <ClInclude Include="server\freeThreadedMarshaller.h" />
    <ClInclude Include="hresult.h" />
    <ClInclude Include="server\ObjectRoot.hpp" />
    <ClInclude Include="pal\guiddef.h" />
    <ClInclude Include="server\Object.hpp" />
    <ClInclude Include="server\interfaceMap.h" />
    <ClInclude Include="server\RefCounter.hpp" />
    <ClInclude Include="Exception.hpp" />
    <ClInclude Include="streams.h" />
    <ClInclude Include="utils\guid_parse.hpp" />
    <ClInclude Include="pal\hresult.h" />
    <ClInclude Include="unknwn.h" />
    <ClInclude Include="utils\typeTraits.hpp" />
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="server\freeThreadedMarshaller.cpp" />
  </ItemGroup>
  <ItemGroup>
    <Text Include="Readme.txt" />
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <VCProjectVersion>15.0</VCProjectVersion>
    <ProjectGuid>{52F486E7-830C-45D8-BE47-E76B5AAB2772}</ProjectGuid>
    <Keyword>Win32Proj</Keyword>
    <RootNamespace>ComLightLib</RootNamespace>
    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>StaticLibrary</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>StaticLibrary</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="Shared">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <LinkIncremental>true</LinkIncremental>
    <OutDir>$(Platform)\$(Configuration)\</OutDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <LinkIncremental>false</LinkIncremental>
    <OutDir>$(Platform)\$(Configuration)\</OutDir>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <PrecompiledHeader>NotUsing</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <Optimization>Disabled</Optimization>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
      <LanguageStandard>stdcpp20</LanguageStandard>
      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
      <GenerateDebugInformation>true</GenerateDebugInformation>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <PrecompiledHeader>NotUsing</PrecompiledHeader>
      <WarningLevel>Level3</WarningLevel>
      <Optimization>MaxSpeed</Optimization>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
      <LanguageStandard>stdcpp20</LanguageStandard>
      <EnableEnhancedInstructionSet>AdvancedVectorExtensions</EnableEnhancedInstructionSet>
    </ClCompile>
    <Link>
      <SubSystem>Windows</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
      <GenerateDebugInformation>true</GenerateDebugInformation>
    </Link>
  </ItemDefinitionGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
</Project>

================================================
FILE: ComLightLib/ComLightLib.vcxproj.filters
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup>
    <ClInclude Include="pal\hresult.h" />
    <ClInclude Include="pal\guiddef.h" />
    <ClInclude Include="utils\guid_parse.hpp" />
    <ClInclude Include="unknwn.h" />
    <ClInclude Include="comLightClient.h" />
    <ClInclude Include="comLightServer.h" />
    <ClInclude Include="client\CComPtr.hpp" />
    <ClInclude Include="comLightCommon.h" />
    <ClInclude Include="server\RefCounter.hpp" />
    <ClInclude Include="server\interfaceMap.h" />
    <ClInclude Include="server\Object.hpp" />
    <ClInclude Include="utils\typeTraits.hpp" />
    <ClInclude Include="server\freeThreadedMarshaller.h" />
    <ClInclude Include="hresult.h" />
    <ClInclude Include="server\ObjectRoot.hpp" />
    <ClInclude Include="Exception.hpp" />
    <ClInclude Include="streams.h" />
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="server\freeThreadedMarshaller.cpp" />
  </ItemGroup>
  <ItemGroup>
    <Text Include="Readme.txt" />
  </ItemGroup>
</Project>

================================================
FILE: ComLightLib/Exception.hpp
================================================
#pragma once

namespace ComLight
{
	class Exception : public std::runtime_error
	{
		// I don't like C++ exceptions too much, but for some cases they are useful.
		// You can throw ComLight::Exception from constructor, or from FinalConstruct() method, the library will catch & return the code from the class factory function.
		// Unfortunately, for interface methods this doesn't work, the C++ parts of the library can't catch them without very complex trickery like code generation.
		// You can still use this class in methods, but you'll need to catch them manually near the API boundary or the app will crash.
		// C++ doesn't have an ABI, the framework can't catch C++ exception across the modules.
		const HRESULT m_code;

	public:

		Exception( HRESULT hr ) : runtime_error( "ComLight HRESULT exception" ), m_code( hr ) { }

		HRESULT code() const { return m_code; }
	};
}

================================================
FILE: ComLightLib/Readme.txt
================================================
Copy-pasted from there:
https://github.com/Const-me/ComLightInterop/tree/master/ComLightLib
With only a few minor changes.

================================================
FILE: ComLightLib/client/CComPtr.hpp
================================================
#pragma once

namespace ComLight
{
	// COM smart pointer, very comparable to CComPtr from ATL
	template <class I>
	class CComPtr
	{
		I* p;

		void callAddRef() const
		{
			if( nullptr == p )
				return;
			p->AddRef();
		}

	public:

		// Construct with nullptr
		CComPtr() : p( nullptr ) { }

		// Release the pointer
		void release()
		{
			if( nullptr == p )
				return;
			p->Release();
			p = nullptr;
		}

		~CComPtr()
		{
			release();
		}

		// Attach without AddRef()
		void attach( I* raw )
		{
			release();
			p = raw;
		}

		// Detach without Release(), set this pointer to nullptr
		I* detach()
		{
			I* const result = p;
			p = nullptr;
			return result;
		}

		// Detach without Release() and place to the specified address, set this pointer to nullptr
		template<class Other>
		void detach( Other** pp )
		{
			// If the argument points to a non-empty object, release the old instance: would leak memory otherwise.
			if( nullptr != *pp )
				( *pp )->Release();
			( *pp ) = detach();
		}

		// Set and AddRef()
		void assign( I* raw )
		{
			release();
			attach( raw );
			callAddRef();
		}

		void swap( CComPtr<I>& that )
		{
			std::swap( p, that.p );
		}

		// Set and AddRef()
		CComPtr( I* raw ) : p( raw )
		{
			callAddRef();
		}

		// Set and AddRef()
		CComPtr( const CComPtr<I>& that ) : CComPtr( that.p ) { }
		// Move constructor
		CComPtr( CComPtr<I>&& that ) : p( that.p ) { that.p = nullptr; }

		// Set and AddRef()
		void operator=( I* raw )
		{
			assign( raw );
		}

		// Set and AddRef()
		void operator=( const CComPtr<I>& that )
		{
			assign( that.p );
		}

		// Move assignment operator, destroys the other one
		void operator=( CComPtr<I>&& that )
		{
			attach( that.detach() );
		}

		operator I*( ) const { return p; }
		I* operator -> () const { return p; }
		I** operator &() { return &p; }

		operator bool() const { return nullptr != p; }
	};
}

================================================
FILE: ComLightLib/comLightClient.h
================================================
#pragma once
#include "comLightCommon.h"
#include "client/CComPtr.hpp"
#include "utils/typeTraits.hpp"

namespace ComLight
{
	namespace details
	{
		template<typename T>
		inline constexpr void** castDoublePointerToVoid( T** pp )
		{
			static_assert( pointersAssignable<IUnknown, T>(), "IID_PPV_ARGS macro should be used with IUnknown interfaces" );
			return reinterpret_cast<void**>( pp );
		}
	}
}

#ifdef IID_PPV_ARGS
#undef IID_PPV_ARGS
#endif

#define IID_PPV_ARGS( pp ) decltype( **pp )::iid, ::ComLight::details::castDoublePointerToVoid( pp )

================================================
FILE: ComLightLib/comLightCommon.h
================================================
#pragma once
#include "hresult.h"

#ifdef _MSC_VER
#include <guiddef.h>
#else
#include "pal/guiddef.h"
using LPCTSTR = const char*;
#endif

#include "unknwn.h"

================================================
FILE: ComLightLib/comLightServer.h
================================================
#pragma once
#include "comLightCommon.h"
#include "client/CComPtr.hpp"

#include "server/ObjectRoot.hpp"
#include "server/interfaceMap.h"
#include "server/Object.hpp"
#include "server/freeThreadedMarshaller.h"

#ifdef _MSC_VER
// On Windows, it's controlled by library.def module definition file. There's __declspec(dllexport), but it adds underscore, I don't like that.
#define DLLEXPORT extern "C"
#else
#define DLLEXPORT extern "C" __attribute__((visibility("default")))
#endif

================================================
FILE: ComLightLib/hresult.h
================================================
#pragma once
#include <stdint.h>
#ifdef _MSC_VER
#include <winerror.h>
#include <OleCtl.h>
#else
#include "pal/hresult.h"
#endif

#define CHECK( hr ) { const HRESULT __hr = ( hr ); if( FAILED( __hr ) ) return __hr; }

#ifndef _MSC_VER
inline constexpr HRESULT HRESULT_FROM_WIN32( int c )
{
	return c < 0 ? c : ( ( 0xFFFF & c ) | 0x80070000 );
}

constexpr HRESULT OLE_E_BLANK = _HRESULT_TYPEDEF_( 0x80040007 );
constexpr HRESULT E_BOUNDS = _HRESULT_TYPEDEF_( 0x8000000BL ); 

constexpr int ERROR_HANDLE_EOF = 38;
constexpr int ERROR_ALREADY_INITIALIZED = 1247;
#endif

constexpr HRESULT E_EOF = HRESULT_FROM_WIN32( ERROR_HANDLE_EOF );
constexpr HRESULT E_ALREADY_INITIALIZED = HRESULT_FROM_WIN32( ERROR_ALREADY_INITIALIZED );

================================================
FILE: ComLightLib/pal/guiddef.h
================================================
#pragma once
#include <stdint.h>
#include <array>
#ifndef GUID_DEFINED
#define GUID_DEFINED
#endif

struct GUID
{
	uint32_t Data1;
	uint16_t Data2;
	uint16_t Data3;
	std::array<uint8_t, 8> Data4;

	constexpr inline bool operator==( const GUID& that ) const
	{
		return Data1 == that.Data1 && Data2 == that.Data2 && Data3 == that.Data3 && Data4 == that.Data4;
	}
};

using REFIID = const GUID&;

================================================
FILE: ComLightLib/pal/hresult.h
================================================
#pragma once
#include <stdint.h>
using HRESULT = int32_t;
#define _HRESULT_TYPEDEF_(_sc) ((HRESULT)_sc)
#define SEVERITY_ERROR        1
#define FACILITY_CONTROL      10

inline constexpr HRESULT MAKE_SCODE( uint32_t sev, uint32_t fac, uint32_t code )
{
	return (HRESULT)( ( (uint32_t)( sev ) << 31 ) | ( (unsigned long)( fac ) << 16 ) | ( (unsigned long)( code ) ) );
};

// ==== Copy-pasted from coreclr-master\src\pal\inc\rt\palrt.h ====
#define S_OK                             _HRESULT_TYPEDEF_(0x00000000L)
#define S_FALSE                          _HRESULT_TYPEDEF_(0x00000001L)

#define E_NOTIMPL                        _HRESULT_TYPEDEF_(0x80004001L)
#define E_NOINTERFACE                    _HRESULT_TYPEDEF_(0x80004002L)
#define E_UNEXPECTED                     _HRESULT_TYPEDEF_(0x8000FFFFL)
#define E_OUTOFMEMORY                    _HRESULT_TYPEDEF_(0x8007000EL)
#define E_INVALIDARG                     _HRESULT_TYPEDEF_(0x80070057L)
#define E_POINTER                        _HRESULT_TYPEDEF_(0x80004003L)
#define E_HANDLE                         _HRESULT_TYPEDEF_(0x80070006L)
#define E_ABORT                          _HRESULT_TYPEDEF_(0x80004004L)
#define E_FAIL                           _HRESULT_TYPEDEF_(0x80004005L)
#define E_ACCESSDENIED                   _HRESULT_TYPEDEF_(0x80070005L)
#define E_PENDING                        _HRESULT_TYPEDEF_(0x8000000AL)

#define DISP_E_PARAMNOTFOUND             _HRESULT_TYPEDEF_(0x80020004L)
#define DISP_E_TYPEMISMATCH              _HRESULT_TYPEDEF_(0x80020005L)
#define DISP_E_BADVARTYPE                _HRESULT_TYPEDEF_(0x80020008L)
#define DISP_E_OVERFLOW                  _HRESULT_TYPEDEF_(0x8002000AL)
#define DISP_E_DIVBYZERO                 _HRESULT_TYPEDEF_(0x80020012L)

#define CLASS_E_CLASSNOTAVAILABLE        _HRESULT_TYPEDEF_(0x80040111L)
#define CLASS_E_NOAGGREGATION            _HRESULT_TYPEDEF_(0x80040110L)

#define CO_E_CLASSSTRING                 _HRESULT_TYPEDEF_(0x800401F3L)

#define MK_E_SYNTAX                      _HRESULT_TYPEDEF_(0x800401E4L)

#define STG_E_INVALIDFUNCTION            _HRESULT_TYPEDEF_(0x80030001L)
#define STG_E_FILENOTFOUND               _HRESULT_TYPEDEF_(0x80030002L)
#define STG_E_PATHNOTFOUND               _HRESULT_TYPEDEF_(0x80030003L)
#define STG_E_WRITEFAULT                 _HRESULT_TYPEDEF_(0x8003001DL)
#define STG_E_FILEALREADYEXISTS          _HRESULT_TYPEDEF_(0x80030050L)
#define STG_E_ABNORMALAPIEXIT            _HRESULT_TYPEDEF_(0x800300FAL)

#define NTE_BAD_UID                      _HRESULT_TYPEDEF_(0x80090001L)
#define NTE_BAD_HASH                     _HRESULT_TYPEDEF_(0x80090002L)
#define NTE_BAD_KEY                      _HRESULT_TYPEDEF_(0x80090003L)
#define NTE_BAD_LEN                      _HRESULT_TYPEDEF_(0x80090004L)
#define NTE_BAD_DATA                     _HRESULT_TYPEDEF_(0x80090005L)
#define NTE_BAD_SIGNATURE                _HRESULT_TYPEDEF_(0x80090006L)
#define NTE_BAD_VER                      _HRESULT_TYPEDEF_(0x80090007L)
#define NTE_BAD_ALGID                    _HRESULT_TYPEDEF_(0x80090008L)
#define NTE_BAD_FLAGS                    _HRESULT_TYPEDEF_(0x80090009L)
#define NTE_BAD_TYPE                     _HRESULT_TYPEDEF_(0x8009000AL)
#define NTE_BAD_KEY_STATE                _HRESULT_TYPEDEF_(0x8009000BL)
#define NTE_BAD_HASH_STATE               _HRESULT_TYPEDEF_(0x8009000CL)
#define NTE_NO_KEY                       _HRESULT_TYPEDEF_(0x8009000DL)
#define NTE_NO_MEMORY                    _HRESULT_TYPEDEF_(0x8009000EL)
#define NTE_SIGNATURE_FILE_BAD           _HRESULT_TYPEDEF_(0x8009001CL)
#define NTE_FAIL                         _HRESULT_TYPEDEF_(0x80090020L)

#define CRYPT_E_HASH_VALUE               _HRESULT_TYPEDEF_(0x80091007L)

#define TYPE_E_SIZETOOBIG                _HRESULT_TYPEDEF_(0x800288C5L)
#define TYPE_E_DUPLICATEID               _HRESULT_TYPEDEF_(0x800288C6L)

#define STD_CTL_SCODE(n) MAKE_SCODE(SEVERITY_ERROR, FACILITY_CONTROL, n)
#define CTL_E_OVERFLOW                  STD_CTL_SCODE(6)
#define CTL_E_OUTOFMEMORY               STD_CTL_SCODE(7)
#define CTL_E_DIVISIONBYZERO            STD_CTL_SCODE(11)
#define CTL_E_OUTOFSTACKSPACE           STD_CTL_SCODE(28)
#define CTL_E_FILENOTFOUND              STD_CTL_SCODE(53)
#define CTL_E_DEVICEIOERROR             STD_CTL_SCODE(57)
#define CTL_E_PERMISSIONDENIED          STD_CTL_SCODE(70)
#define CTL_E_PATHFILEACCESSERROR       STD_CTL_SCODE(75)
#define CTL_E_PATHNOTFOUND              STD_CTL_SCODE(76)

#define INET_E_CANNOT_CONNECT            _HRESULT_TYPEDEF_(0x800C0004L)
#define INET_E_RESOURCE_NOT_FOUND        _HRESULT_TYPEDEF_(0x800C0005L)
#define INET_E_OBJECT_NOT_FOUND          _HRESULT_TYPEDEF_(0x800C0006L)
#define INET_E_DATA_NOT_AVAILABLE        _HRESULT_TYPEDEF_(0x800C0007L)
#define INET_E_DOWNLOAD_FAILURE          _HRESULT_TYPEDEF_(0x800C0008L)
#define INET_E_CONNECTION_TIMEOUT        _HRESULT_TYPEDEF_(0x800C000BL)
#define INET_E_UNKNOWN_PROTOCOL          _HRESULT_TYPEDEF_(0x800C000DL)

#define DBG_PRINTEXCEPTION_C             _HRESULT_TYPEDEF_(0x40010006L)
// ==== Done pasting ====

inline constexpr bool SUCCEEDED( HRESULT hr )
{
	return hr >= 0;
}

inline constexpr bool FAILED( HRESULT hr )
{
	return hr < 0;
}

================================================
FILE: ComLightLib/server/Object.hpp
================================================
#pragma once
#include <type_traits>
#include "../comLightClient.h"
#include "../utils/typeTraits.hpp"
#include "../Exception.hpp"

namespace ComLight
{
	namespace details
	{
		GENERATE_HAS_MEMBER( implQueryInterface );
		GENERATE_HAS_MEMBER( implAddRef );
		GENERATE_HAS_MEMBER( implRelease );
	}

	// Outer class of objects, implements IUnknown methods, also the class factory. The type argument must be your class implementing your interfaces, inherited from ObjectRoot<I>
	template<class T>
	class Object : public T
	{
	public:
		Object() = default;

		template<typename ... Args>
		Object( Args&& ... args ) : T{ std::forward<Args>( args )... } {};

		inline virtual ~Object() override { }

		// Implement IUnknown methods
		HRESULT COMLIGHTCALL QueryInterface( REFIID riid, void** ppvObject ) override
		{
			static_assert( details::has_member_implQueryInterface<T>::value, "Your object class must inherit from ComLight::ObjectRoot" );

			if( nullptr == ppvObject )
				return E_POINTER;

			if( T::implQueryInterface( riid, ppvObject ) )
				return S_OK;
			if( T::queryExtraInterfaces( riid, ppvObject ) )
				return S_OK;

			if( riid == IUnknown::iid() )
			{
				ComLight::IUnknown* unk = T::getUnknown();
				unk->AddRef();
				*ppvObject = unk;
				return S_OK;
			}

			return E_NOINTERFACE;
		}

		uint32_t COMLIGHTCALL AddRef() override
		{
			static_assert( details::has_member_implAddRef<T>::value, "Your object class must inherit from ComLight::ObjectRoot" );
			return T::implAddRef();
		}

		uint32_t COMLIGHTCALL Release() override
		{
			static_assert( details::has_member_implRelease<T>::value, "Your object class must inherit from ComLight::ObjectRoot" );
			const uint32_t ret = T::implRelease();
			if( 0 == ret )
			{
				T::FinalRelease();
				delete this;
			}
			return ret;
		}

		// Create a new object on the heap, store in smart pointer
		static inline HRESULT create( CComPtr<Object<T>>& result )
		{
			CComPtr<Object<T>> ptr;
			try
			{
				ptr = new Object<T>();	// The RefCounter constructor creates it with ref.counter 0. But then CComPtr constructor calls AddRef so we have RC=1 after this line.

				HRESULT hr = ptr->internalFinalConstruct();
				if( FAILED( hr ) )
					return hr;

				hr = ptr->FinalConstruct();
				if( FAILED( hr ) )
					return hr;

				ptr.swap( result );
				return S_OK;
			}
			catch( const Exception& ex )
			{
				return ex.code();
			}
		}

		// Create a new object on the heap, store in smart pointer
		template<typename ... Args>
		static inline HRESULT create( CComPtr<Object<T>>& result, Args&& ... args )
		{
			CComPtr<Object<T>> ptr;
			try
			{
				ptr = new Object<T>( std::forward<Args>( args )... );

				HRESULT hr = ptr->internalFinalConstruct();
				if( FAILED( hr ) )
					return hr;

				hr = ptr->FinalConstruct();
				if( FAILED( hr ) )
					return hr;

				ptr.swap( result );
				return S_OK;
			}
			catch( const Exception& ex )
			{
				return ex.code();
			}
			catch( HRESULT hr )
			{
				return hr;
			}
		}

		// Create a new object on the heap, return one of it's interfaces. The caller is assumed to take ownership of the new object.
		template<class I>
		static inline HRESULT create( I** pp )
		{
			if( pp == nullptr )
				return E_POINTER;

			static_assert( details::pointersAssignable<I, T>(), "Object::create can't cast object to the requested interface" );
			CComPtr<Object<T>> ptr;
			CHECK( create( ptr ) );
			ptr.detach( pp );
			return S_OK;
		}
	};
}

================================================
FILE: ComLightLib/server/ObjectRoot.hpp
================================================
#pragma once
#include "RefCounter.hpp"
#include "../comLightCommon.h"
#include "../utils/typeTraits.hpp"

namespace ComLight
{
	// Base class of objects, implements reference counting, also a few lifetime methods.
	// The template argument is the interface you want clients to get when they ask for IID_IUnknown. By convention, that pointer defines object's identity.
	template<class I>
	class ObjectRoot : public RefCounter, public I
	{
	protected:

		inline HRESULT internalFinalConstruct()
		{
			return S_FALSE;
		}

		inline HRESULT FinalConstruct()
		{
			return S_FALSE;
		}

		inline void FinalRelease() { }

		IUnknown* getUnknown()
		{
			static_assert( details::pointersAssignable<IUnknown, I>(), "The interface doesn't derive from IUnknown" );
			return static_cast<I*>( this );
		}

		bool queryExtraInterfaces( REFIID riid, void **ppvObject ) const
		{
			return false;
		}

		// Implement query interface with 2 entries, IUnknown and I.
		bool implQueryInterface( REFIID riid, void** ppvObject )
		{
			if( riid == I::iid() || riid == IUnknown::iid() )
			{
				I* const result = this;
				result->AddRef();
				*ppvObject = result;
				return true;
			}
			return false;
		}
	};
}

================================================
FILE: ComLightLib/server/RefCounter.hpp
================================================
#pragma once
#include <atomic>
#include <assert.h>
#include <limits.h>

namespace ComLight
{
	// Very base class of objects, implements reference counting.
	class RefCounter
	{
		std::atomic_uint referenceCounter;

	public:

		RefCounter() : referenceCounter( 0 ) { }

		inline virtual ~RefCounter() { }

		RefCounter( const RefCounter &that ) = delete;
		RefCounter( RefCounter &&that ) = delete;

	protected:

		uint32_t implAddRef()
		{
			return ++referenceCounter;
		}

		uint32_t implRelease()
		{
			// Might be a good idea to use locks, at least in debug builds. They're much slower than atomics, but with locks it's possible to detect when 2 threads call release at the same time, for object with counter = 1.
			// It's a memory management bug, but it would be nice if debug builds would handle that case gracefully.
			const uint32_t rc = --referenceCounter;
			assert( rc != UINT_MAX );
			return rc;
		}
	};
}

================================================
FILE: ComLightLib/server/freeThreadedMarshaller.cpp
================================================
#include "freeThreadedMarshaller.h"
#ifdef _MSC_VER
#include <combaseapi.h>

HRESULT ComLight::details::createFreeThreadedMarshaller( IUnknown* pUnkOuter, IUnknown** ppUnkMarshal )
{
	return ::CoCreateFreeThreadedMarshaler( (LPUNKNOWN)pUnkOuter, (LPUNKNOWN *)ppUnkMarshal );
}

bool ComLight::details::queryMarshallerInterface( REFIID riid, void **ppvObject, IUnknown* marshaller )
{
	if( riid != IID_IMarshal || nullptr == marshaller )
		return false;
	const HRESULT hr = marshaller->QueryInterface( IID_IMarshal, ppvObject );
	return SUCCEEDED( hr ) ? true : false;
}
#endif

================================================
FILE: ComLightLib/server/freeThreadedMarshaller.h
================================================
#pragma once
#ifdef _MSC_VER
#include "../comLightCommon.h"

namespace ComLight
{
	namespace details
	{
		HRESULT createFreeThreadedMarshaller( IUnknown* pUnkOuter, IUnknown** ppUnkMarshal );
		bool queryMarshallerInterface( REFIID riid, void **ppvObject, IUnknown* marshaller );
	}
}

#define DECLARE_FREE_THREADED_MARSHALLER()                                                              \
private:                                                                                                \
ComLight::CComPtr<ComLight::IUnknown> m_freeThreadedMarshaller;                                         \
protected:                                                                                              \
HRESULT internalFinalConstruct()                                                                        \
{                                                                                                       \
	return ComLight::details::createFreeThreadedMarshaller( getUnknown(), &m_freeThreadedMarshaller );  \
}                                                                                                       \
bool queryExtraInterfaces( REFIID riid, void **ppvObject ) const                                        \
{                                                                                                       \
	return ComLight::details::queryMarshallerInterface( riid, ppvObject, m_freeThreadedMarshaller );    \
}

#else
#define DECLARE_FREE_THREADED_MARSHALLER()
#endif

================================================
FILE: ComLightLib/server/interfaceMap.h
================================================
#pragma once
#include "../utils/typeTraits.hpp"

// Unlike ATL, the interface map is optional for ComLight.
// If you won't declare a map, the object will support 2 interfaces: IUnknown, and whatever template argument was passed to ObjectRoot class.
#define BEGIN_COM_MAP()                                      \
protected:                                                   \
bool implQueryInterface( REFIID iid, void** ppvObject ) {

#define END_COM_MAP() return false; }

namespace ComLight
{
	namespace details
	{
		template<typename I, typename C>
		inline bool tryReturnInterface( REFIID iid, C* pThis, void** ppvResult )
		{
			static_assert( pointersAssignable<IUnknown, I>(), "Trying to implement an interface that doesn't derive from IUnknown" );
			static_assert( pointersAssignable<I, C>(), "Declared support for an interface, but the class doesn't implement it" );
			if( I::iid() != iid )
				return false;
			I* const result = pThis;
			result->AddRef();
			*ppvResult = result;
			return true;
		}
	}
}

#define COM_INTERFACE_ENTRY( I ) if( ComLight::details::tryReturnInterface<I>( iid, this, ppvObject ) ) return true;

================================================
FILE: ComLightLib/streams.h
================================================
#pragma once
#include <vector>
#include "comLightCommon.h"

// COM interfaces to marshal streams across the interop.
namespace ComLight
{
	enum struct eSeekOrigin : uint8_t
	{
		Begin = 0,
		Current = 1,
		End = 2
	};

	namespace details
	{
		template<class E>
		inline size_t sizeofVector( const std::vector<E>& vec )
		{
			return sizeof( E ) * vec.size();
		}
	}

	// COM interface for readonly stream. You'll get these interfaces what you use [ReadStream] attribute in C#.
	struct DECLSPEC_NOVTABLE iReadStream : public IUnknown
	{
		DEFINE_INTERFACE_ID( "006af6db-734e-4595-8c94-19304b2389ac" );

		virtual HRESULT COMLIGHTCALL read( void* lpBuffer, int nNumberOfBytesToRead, int &lpNumberOfBytesRead ) = 0;
		virtual HRESULT COMLIGHTCALL seek( int64_t offset, eSeekOrigin origin ) = 0;
		virtual HRESULT COMLIGHTCALL getPosition( int64_t& position ) = 0;
		virtual HRESULT COMLIGHTCALL getLength( int64_t& length ) = 0;

		template<class E>
		inline HRESULT read( std::vector<E>& vec )
		{
			const int cb = (int)details::sizeofVector( vec );
			int cbRead = 0;
			CHECK( read( vec.data(), cb, cbRead ) );
			if( cbRead >= cb )
				return S_OK;
			return E_EOF;
		}
	};

	// COM interface for readonly stream. You'll get these interfaces what you use [WriteStream] attribute in C#.
	struct DECLSPEC_NOVTABLE iWriteStream : public IUnknown
	{
		DEFINE_INTERFACE_ID( "d7c3eb39-9170-43b9-ba98-2ea1f2fed8a8" );

		virtual HRESULT COMLIGHTCALL write( const void* lpBuffer, int nNumberOfBytesToWrite ) = 0;
		virtual HRESULT COMLIGHTCALL flush() = 0;

		template<class E>
		inline HRESULT write( const std::vector<E>& vec )
		{
			const int cb = (int)details::sizeofVector( vec );
			return write( vec.data(), cb );
		}
	};
}

================================================
FILE: ComLightLib/unknwn.h
================================================
#pragma once
#include <type_traits>

// Calling conventions
#ifdef _MSC_VER
#define COMLIGHTCALL __stdcall
#define DECLSPEC_NOVTABLE   __declspec(novtable)
#elif defined(__GNUC__) || defined(__clang__)
#if defined(__i386__)
#define COMLIGHTCALL __attribute__((stdcall))
#else
#define COMLIGHTCALL
#endif
#define DECLSPEC_NOVTABLE
#else
#error Unsupported C++ compiler
#endif

#include "utils/guid_parse.hpp"

#define DEFINE_INTERFACE_ID( guidString ) static constexpr GUID iid() { return ::ComLight::make_guid( guidString ); }

namespace ComLight
{
	// This thing is binary compatible with IUnknown from Windows SDK. See DesktopClient demo project, it uses normal COM interop in .NET framework 4.7 to call my implementation.
	struct DECLSPEC_NOVTABLE IUnknown
	{
		DEFINE_INTERFACE_ID( "00000000-0000-0000-c000-000000000046" );

		virtual HRESULT COMLIGHTCALL QueryInterface( REFIID riid, void **ppvObject ) = 0;

		virtual uint32_t COMLIGHTCALL AddRef() = 0;

		virtual uint32_t COMLIGHTCALL Release() = 0;
	};
}

================================================
FILE: ComLightLib/utils/guid_parse.hpp
================================================
// https://github.com/tobias-loew/constexpr-GUID-cpp-11

//-------------------------------------------------------------------------------------------------------
// constexpr GUID parsing
// Written by Alexander Bessonov
// Written by Tobias Loew
//
// Licensed under the MIT license.
//-------------------------------------------------------------------------------------------------------

#pragma once
#include <stdexcept>
#include <string>
#include <cassert>
#include <cstdint>

#if !defined(GUID_DEFINED)
#define GUID_DEFINED
struct GUID {
	uint32_t Data1;
	uint16_t Data2;
	uint16_t Data3;
	uint8_t Data4[ 8 ];
};
#endif

namespace ComLight
{
	namespace details
	{
		constexpr const size_t short_guid_form_length = 36;	// XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
		constexpr const size_t long_guid_form_length = 38;	// {XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX}

		constexpr uint8_t parse_hex_digit( const char c )
		{
			using namespace std::string_literals;
			return
				( '0' <= c && c <= '9' )
				? c - '0'
				: ( 'a' <= c && c <= 'f' )
				? 10 + c - 'a'
				: ( 'A' <= c && c <= 'F' )
				? 10 + c - 'A'
				:
				throw std::domain_error{ "invalid character in GUID"s };
		}

		constexpr uint8_t parse_hex_uint8_t( const char *ptr )
		{
			return ( parse_hex_digit( ptr[ 0 ] ) << 4 ) + parse_hex_digit( ptr[ 1 ] );
		}

		constexpr uint16_t parse_hex_uint16_t( const char *ptr )
		{
			return ( parse_hex_uint8_t( ptr ) << 8 ) + parse_hex_uint8_t( ptr + 2 );
		}

		constexpr uint32_t parse_hex_uint32_t( const char *ptr )
		{
			return ( parse_hex_uint16_t( ptr ) << 16 ) + parse_hex_uint16_t( ptr + 4 );
		}

		constexpr GUID parse_guid( const char *begin )
		{
			return GUID{
				parse_hex_uint32_t( begin ),
				parse_hex_uint16_t( begin + 8 + 1 ),
				parse_hex_uint16_t( begin + 8 + 1 + 4 + 1 ),
				{
					parse_hex_uint8_t( begin + 8 + 1 + 4 + 1 + 4 + 1 ),
					parse_hex_uint8_t( begin + 8 + 1 + 4 + 1 + 4 + 1 + 2 ),
					parse_hex_uint8_t( begin + 8 + 1 + 4 + 1 + 4 + 1 + 2 + 2 + 1 ),
					parse_hex_uint8_t( begin + 8 + 1 + 4 + 1 + 4 + 1 + 2 + 2 + 1 + 2 ),
					parse_hex_uint8_t( begin + 8 + 1 + 4 + 1 + 4 + 1 + 2 + 2 + 1 + 2 + 2 ),
					parse_hex_uint8_t( begin + 8 + 1 + 4 + 1 + 4 + 1 + 2 + 2 + 1 + 2 + 2 + 2 ),
					parse_hex_uint8_t( begin + 8 + 1 + 4 + 1 + 4 + 1 + 2 + 2 + 1 + 2 + 2 + 2 + 2 ),
					parse_hex_uint8_t( begin + 8 + 1 + 4 + 1 + 4 + 1 + 2 + 2 + 1 + 2 + 2 + 2 + 2 + 2 )
				}
			};
		}

		constexpr GUID make_guid_helper( const char *str, size_t N )
		{
			using namespace std::string_literals;
			using namespace details;

			return ( !( N == long_guid_form_length || N == short_guid_form_length ) )
				? throw std::domain_error{ "String GUID of the form {XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX} or XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX is expected"s }
				: ( N == long_guid_form_length && ( str[ 0 ] != '{' || str[ long_guid_form_length - 1 ] != '}' ) )
				? throw std::domain_error{ "Missing opening or closing brace"s }

			: parse_guid( str + ( N == long_guid_form_length ? 1 : 0 ) );
		}


		template<size_t N>
		constexpr GUID make_guid( const char( &str )[ N ] )
		{
			return make_guid_helper( str, N - 1 );
		}
	}
	using details::make_guid;
}

================================================
FILE: ComLightLib/utils/typeTraits.hpp
================================================
#pragma once
#include <type_traits>

namespace ComLight
{
	namespace details
	{
		template<class TResult, class TValue>
		constexpr bool pointersAssignable()
		{
			// See this for why `&` is required: https://stackoverflow.com/a/52429468/126995
			return std::is_assignable<TResult*&, TValue*>::value;
		}
	}
}

// https://en.wikibooks.org/wiki/More_C++_Idioms/Member_Detector
#define GENERATE_HAS_MEMBER(member)                                               \
                                                                                  \
template < class T >                                                              \
class HasMember_##member                                                          \
{                                                                                 \
private:                                                                          \
    using Yes = char[2];                                                          \
    using  No = char[1];                                                          \
                                                                                  \
    struct Fallback { int member; };                                              \
    struct Derived : T, Fallback { };                                             \
                                                                                  \
    template < class U >                                                          \
    static No& test ( decltype(U::member)* );                                     \
    template < typename U >                                                       \
    static Yes& test ( U* );                                                      \
                                                                                  \
public:                                                                           \
    static constexpr bool RESULT = sizeof(test<Derived>(nullptr)) == sizeof(Yes); \
};                                                                                \
                                                                                  \
template < class T >                                                              \
struct has_member_##member                                                        \
: public std::integral_constant<bool, HasMember_##member<T>::RESULT>              \
{                                                                                 \
};


================================================
FILE: ComputeShaders/ComputeShaders.cpp
================================================
void fnComputeShaders()
{
}

================================================
FILE: ComputeShaders/ComputeShaders.vcxproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <VCProjectVersion>16.0</VCProjectVersion>
    <Keyword>Win32Proj</Keyword>
    <ProjectGuid>{1c39d386-96d0-47a1-bbfa-68bbdb24439c}</ProjectGuid>
    <RootNamespace>ComputeShaders</RootNamespace>
    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>StaticLibrary</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>StaticLibrary</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>StaticLibrary</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>StaticLibrary</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="Shared">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <MultiProcFXC>true</MultiProcFXC>
    <OutDir>$(Platform)\$(Configuration)\</OutDir>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <OutDir>$(Platform)\$(Configuration)\</OutDir>
    <MultiProcFXC>true</MultiProcFXC>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
    </ClCompile>
    <Link>
      <SubSystem>
      </SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
      <GenerateDebugInformation>true</GenerateDebugInformation>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
    </ClCompile>
    <Link>
      <SubSystem>
      </SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
      <GenerateDebugInformation>true</GenerateDebugInformation>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
    </ClCompile>
    <Link>
      <SubSystem>
      </SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
      <GenerateDebugInformation>true</GenerateDebugInformation>
    </Link>
    <FxCompile>
      <ShaderModel>5.0</ShaderModel>
      <ShaderType>Compute</ShaderType>
    </FxCompile>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
    </ClCompile>
    <Link>
      <SubSystem>
      </SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
      <GenerateDebugInformation>true</GenerateDebugInformation>
    </Link>
    <FxCompile>
      <ShaderModel>5.0</ShaderModel>
      <ShaderType>Compute</ShaderType>
      <DisableOptimizations>true</DisableOptimizations>
      <EnableDebuggingInformation>true</EnableDebuggingInformation>
    </FxCompile>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClCompile Include="ComputeShaders.cpp" />
  </ItemGroup>
  <ItemGroup>
    <FxCompile Include="add.hlsl" />
    <FxCompile Include="addInPlace.hlsl" />
    <FxCompile Include="addRepeat.hlsl" />
    <FxCompile Include="addRepeat64.hlsl" />
    <FxCompile Include="addRepeatEx.hlsl" />
    <FxCompile Include="addRepeatGelu.hlsl" />
    <FxCompile Include="addRepeatGelu64.hlsl" />
    <FxCompile Include="addRepeatScale.hlsl" />
    <FxCompile Include="addRows.hlsl" />
    <FxCompile Include="convolutionMain.hlsl" />
    <FxCompile Include="convolutionMain2.hlsl" />
    <FxCompile Include="convolutionMain2Fixed.hlsl" />
    <FxCompile Include="convolutionPrep1.hlsl" />
    <FxCompile Include="convolutionPrep2.hlsl" />
    <FxCompile Include="copyConvert.hlsl" />
    <FxCompile Include="copyTranspose.hlsl" />
    <FxCompile Include="dbgFindNaN.hlsl" />
    <FxCompile Include="diagMaskInf.hlsl" />
    <FxCompile Include="flashAttention.hlsl" />
    <FxCompile Include="flashAttentionCompat1.hlsl" />
    <FxCompile Include="flashAttentionCompat2.hlsl" />
    <FxCompile Include="flashAttentionCompat3.hlsl" />
    <FxCompile Include="fmaRepeat1.hlsl" />
    <FxCompile Include="fmaRepeat164.hlsl" />
    <FxCompile Include="fmaRepeat2.hlsl" />
    <FxCompile Include="matReshapePanels.hlsl" />
    <FxCompile Include="mulMatByRow.hlsl" />
    <FxCompile Include="mulMatByRow64.hlsl" />
    <FxCompile Include="mulMatByRowTiled.hlsl" />
    <FxCompile Include="mulMatByRowTiledEx.hlsl" />
    <FxCompile Include="mulMatByScalar.hlsl" />
    <FxCompile Include="mulMatDotMain.hlsl" />
    <FxCompile Include="mulMatDotReshape.hlsl" />
    <FxCompile Include="mulMatMadMain.hlsl" />
    <FxCompile Include="mulMatTiled.hlsl" />
    <FxCompile Include="mulMatTiledEx.hlsl" />
    <FxCompile Include="norm.hlsl" />
    <FxCompile Include="normCompat.hlsl" />
    <FxCompile Include="normFixed.hlsl" />
    <FxCompile Include="normFixed64.hlsl" />
    <FxCompile Include="scaleInPlace.hlsl" />
    <FxCompile Include="softMax.hlsl" />
    <FxCompile Include="softMax64.hlsl" />
    <FxCompile Include="softMaxCompat.hlsl" />
    <FxCompile Include="softMaxFixed.hlsl" />
    <FxCompile Include="softMaxLong.hlsl" />
    <FxCompile Include="zeroMemory.hlsl" />
  </ItemGroup>
  <ItemGroup>
    <None Include="componentwiseBinaryOp.hlsli" />
    <None Include="flashAttentionCommon.hlsli" />
    <None Include="fp64Utils.hlsli" />
    <None Include="groupReduce.hlsli" />
    <None Include="groupReduce64.hlsli" />
    <None Include="miscUtils.hlsli" />
    <None Include="repeatUtils.hlsli" />
  </ItemGroup>
  <ItemGroup>
    <Text Include="Readme.txt" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
</Project>

================================================
FILE: ComputeShaders/ComputeShaders.vcxproj.filters
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup>
    <ClCompile Include="ComputeShaders.cpp" />
  </ItemGroup>
  <ItemGroup>
    <FxCompile Include="mulMatDotMain.hlsl" />
    <FxCompile Include="mulMatDotReshape.hlsl" />
    <FxCompile Include="convolutionMain.hlsl" />
    <FxCompile Include="convolutionPrep1.hlsl" />
    <FxCompile Include="convolutionPrep2.hlsl" />
    <FxCompile Include="add.hlsl" />
    <FxCompile Include="flashAttention.hlsl" />
    <FxCompile Include="convolutionMain2.hlsl" />
    <FxCompile Include="norm.hlsl" />
    <FxCompile Include="copyConvert.hlsl" />
    <FxCompile Include="copyTranspose.hlsl" />
    <FxCompile Include="normCompat.hlsl" />
    <FxCompile Include="flashAttentionCompat1.hlsl" />
    <FxCompile Include="flashAttentionCompat3.hlsl" />
    <FxCompile Include="flashAttentionCompat2.hlsl" />
    <FxCompile Include="scaleInPlace.hlsl" />
    <FxCompile Include="diagMaskInf.hlsl" />
    <FxCompile Include="softMaxCompat.hlsl" />
    <FxCompile Include="mulMatMadMain.hlsl" />
    <FxCompile Include="addRepeat.hlsl" />
    <FxCompile Include="fmaRepeat1.hlsl" />
    <FxCompile Include="fmaRepeat2.hlsl" />
    <FxCompile Include="addInPlace.hlsl" />
    <FxCompile Include="softMax.hlsl" />
    <FxCompile Include="addRepeatScale.hlsl" />
    <FxCompile Include="mulMatByRow.hlsl" />
    <FxCompile Include="mulMatByScalar.hlsl" />
    <FxCompile Include="mulMatTiled.hlsl" />
    <FxCompile Include="mulMatByRow64.hlsl" />
    <FxCompile Include="softMax64.hlsl" />
    <FxCompile Include="softMaxFixed.hlsl" />
    <FxCompile Include="addRepeat64.hlsl" />
    <FxCompile Include="fmaRepeat164.hlsl" />
    <FxCompile Include="addRepeatGelu.hlsl" />
    <FxCompile Include="addRepeatGelu64.hlsl" />
    <FxCompile Include="normFixed.hlsl" />
    <FxCompile Include="normFixed64.hlsl" />
    <FxCompile Include="mulMatByRowTiled.hlsl" />
    <FxCompile Include="convolutionMain2Fixed.hlsl" />
    <FxCompile Include="addRows.hlsl" />
    <FxCompile Include="zeroMemory.hlsl" />
    <FxCompile Include="mulMatTiledEx.hlsl" />
    <FxCompile Include="matReshapePanels.hlsl" />
    <FxCompile Include="mulMatByRowTiledEx.hlsl" />
    <FxCompile Include="addRepeatEx.hlsl" />
    <FxCompile Include="softMaxLong.hlsl" />
    <FxCompile Include="dbgFindNaN.hlsl" />
  </ItemGroup>
  <ItemGroup>
    <None Include="componentwiseBinaryOp.hlsli" />
    <None Include="miscUtils.hlsli" />
    <None Include="groupReduce.hlsli" />
    <None Include="fp64Utils.hlsli" />
    <None Include="flashAttentionCommon.hlsli" />
    <None Include="repeatUtils.hlsli" />
    <None Include="groupReduce64.hlsli" />
  </ItemGroup>
  <ItemGroup>
    <Text Include="Readme.txt" />
  </ItemGroup>
</Project>

================================================
FILE: ComputeShaders/Readme.txt
================================================
This project compiles all the compute shaders which implement the model.

Many shaders come in 2 versions, something.hlsl and something64.hlsl

The version with the `64` suffix is used on AMD GPUs, the version without suffix is used on nVidia and Intel GPUs.

Not all of these shaders are actually used for anything.
Some of them are implementing binary compatibility for the reference CPU version, and not used unless messing with the `constexpr` flags in MlContext C++ class.
Such shaders often require FP64 support, which is an optional feature in D3D11.
CompressShaders tool detects such shaders by looking at the SFI0 chunk in the binary, and outputs a bitmap of the FP64 shaders.
This way, missing FP64 hardware support shouldn’t break the library.

================================================
FILE: ComputeShaders/add.hlsl
================================================
inline float compute( float a, float b )
{
	return a + b;
}

#include "componentwiseBinaryOp.hlsli"

================================================
FILE: ComputeShaders/addInPlace.hlsl
================================================
#ifndef THREADS
#define THREADS 512
#endif

Buffer<float> arg0: register( t0 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 size: packoffset( c0 );
	uint4 strides: packoffset( c1 );
	uint4 argStrides: packoffset( c3 );
}

inline uint rowOffset( uint3 idx, uint4 strides )
{
	return idx[ 0 ] * strides[ 1 ] + idx[ 1 ] * strides[ 2 ] + idx[ 2 ] * strides[ 3 ];
}

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	uint rdi = rowOffset( group, strides );
	uint rsi = rowOffset( group, argStrides );

	const uint rdiEnd = rdi + size[ 0 ] * strides[ 0 ];
	rdi += thread * strides[ 0 ];
	rsi += thread * argStrides[ 0 ];

	const uint rdiInc = THREADS * strides[ 0 ];
	const uint rsiInc = THREADS * argStrides[ 0 ];

	for( ; rdi < rdiEnd; rdi += rdiInc, rsi += rsiInc )
	{
		float f = result[ rdi ];
		f += arg0[ rsi ];
		result[ rdi ] = f;
	}
}

================================================
FILE: ComputeShaders/addRepeat.hlsl
================================================
// Compute tensor = tensor + repeat( pattern, tensor ) in 1 shot, without VRAM allocations
// Dispatch [ nb[ 1 ], nb[ 2 ], nb[ 3 ] ] thread groups of this shader, where nb is size of the destination tensor
RWBuffer<float> tensor: register( u0 );
Buffer<float> pattern: register( t0 );

cbuffer Constants: register( b0 )
{
	uint4 tensorSize: packoffset( c0 );
	uint4 tensorStrides: packoffset( c1 );
	uint4 patternSize: packoffset( c2 );
	uint4 patternStrides: packoffset( c3 );
}

#ifndef THREADS
#define THREADS 256
#endif

#include "repeatUtils.hlsli"

inline void computeSimple( uint idx, float add )
{
	float f = tensor[ idx ];
	f += add;
	tensor[ idx ] = f;
}

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	uint3 it = tensorIteratorState( group, thread, tensorSize, tensorStrides );
	uint rsi = rowOffset( group % patternSize.yzw, patternStrides );

	if( patternSize[ 0 ] == 1 )
	{
		// The pattern only has 1 column - broadcasting over the row
		const float p = pattern[ rsi ];
		ROW_LOOP( it )
			computeSimple( it.x, p );
	}
	else if( patternSize[ 0 ] <= THREADS )
	{
		// pattern size doesn't exceed thread group size: load pattern value outside of the loop
		const uint threadsPerGroup = THREADS - ( THREADS % patternSize[ 0 ] );
		if( thread >= threadsPerGroup )
			return;

		const float p = pattern[ rsi + ( thread % patternSize[ 0 ] ) * patternStrides[ 0 ] ];
		ROW_LOOP_EX( it, threadsPerGroup, tensorStrides )
			computeSimple( it.x, p );
	}
	else
	{
		// Pattern rows are larger than the thread group, need to stream from both buffers
		const uint rsiInc = THREADS * patternStrides[ 0 ];
		const uint rsiDec = patternSize[ 0 ] * patternStrides[ 0 ];
		const uint rsiEnd = rsi + rsiDec;
		rsi += thread * patternStrides[ 0 ];

		ROW_LOOP( it )
		{
			float f = tensor[ it.x ];
			float p = pattern[ rsi ];
			rsi += rsiInc;
			if( rsi >= rsiEnd )
				rsi -= rsiDec;
			f += p;
			tensor[ it.x ] = f;
		}
	}
}

================================================
FILE: ComputeShaders/addRepeat64.hlsl
================================================
#define THREADS 64
#include "addRepeat.hlsl"

================================================
FILE: ComputeShaders/addRepeatEx.hlsl
================================================
// An equivalent of "addRepeat.hlsl" followed by "addInPlace.hlsl".
// Merging into a single shader saves some global memory bandwidth and reduces CPU overhead wasted binding resources and dispatching shaders
RWBuffer<float> tensor: register( u0 );
Buffer<float> pattern: register( t0 );
Buffer<float> finalAdd: register( t1 );

cbuffer Constants: register( b0 )
{
	uint4 tensorSize: packoffset( c0 );
	uint4 tensorStrides: packoffset( c1 );
	uint4 patternSize: packoffset( c2 );
	uint4 patternStrides: packoffset( c3 );
	// uint4 finalSize: packoffset( c4 );
	uint4 finalStrides: packoffset( c5 );
}

#ifndef THREADS
#define THREADS 256
#endif

#include "repeatUtils.hlsli"

// The micro-kernel of the shader, computes tensor[ rsi.x ] += pattern + finalAdd[ rsi.y ]
inline void add2( uint2 rsi, float pattern )
{
	float f = tensor[ rsi.x ];
	f += pattern;
	f += finalAdd[ rsi.y ];
	tensor[ rsi.x ] = f;
}

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint2 stridesX = uint2( tensorStrides.x, finalStrides.x );
	uint2 rsi;
	rsi.x = rowOffset( group, tensorStrides );
	rsi.y = rowOffset( group, finalStrides );
	const uint rsiEnd = rsi.x + tensorSize.x * stridesX.x;
	rsi += stridesX * thread;

	uint pat = rowOffset( group % patternSize.yzw, patternStrides );

	if( patternSize.x == 1 )
	{
		// The pattern only has 1 column, broadcasting over the row
		const uint2 rsiInc = stridesX * THREADS;
		const float p = pattern[ pat ];
		for( ; rsi.x < rsiEnd; rsi += rsiInc )
			add2( rsi, p );
	}
	else if( patternSize.x <= THREADS )
	{
		// pattern size doesn't exceed thread group size, load outside of the loop
		const uint threadsPerGroup = THREADS - ( THREADS % patternSize.x );
		if( thread >= threadsPerGroup )
			return;

		const uint2 rsiInc = stridesX * threadsPerGroup;
		pat += ( thread % patternSize.x ) * patternStrides.x;
		const float p = pattern[ pat ];
		for( ; rsi.x < rsiEnd; rsi += rsiInc )
			add2( rsi, p );
	}
	else
	{
		// Pattern rows are longer than the thread group, need to stream from both buffers
		uint3 rsi3;
		rsi3.xy = rsi;
		rsi3.z = pat + thread * patternStrides.x;

		const uint3 rsiInc = uint3( stridesX, patternStrides.x ) * THREADS;
		while( rsi3.x < rsiEnd )
		{
			add2( rsi3.xy, pattern[ rsi3.z ] );

			rsi3 += rsiInc;
			if( rsi3.z >= patternSize.x )
				rsi3.z -= patternSize.x;
		}
	}
}

================================================
FILE: ComputeShaders/addRepeatGelu.hlsl
================================================
// Compute tensor = GELU( tensor + repeat( pattern, tensor ) ) in 1 shot, without VRAM allocations
// Dispatch [ nb[ 1 ], nb[ 2 ], nb[ 3 ] ] thread groups of this shader, where nb is size of the destination tensor
RWBuffer<float> tensor: register( u0 );
Buffer<float> pattern: register( t0 );
Buffer<uint> lookupTable: register( t1 );

cbuffer Constants: register( b0 )
{
	uint4 tensorSize: packoffset( c0 );
	uint4 tensorStrides: packoffset( c1 );
	uint4 patternSize: packoffset( c2 );
	uint4 patternStrides: packoffset( c3 );
}

#ifndef THREADS
#define THREADS 1024
#endif

#include "repeatUtils.hlsli"
#include "miscUtils.hlsli"

inline float gelu( float x )
{
#if 1
	const uint index = fp16Rounded( x );
	const uint res16 = lookupTable[ index ];
	return f16tof32( res16 );
#else
	// This version is much slower, at least on AMD, despite saving these VRAM loads.
	const float GELU_COEF_A = 0.044715;
	const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876;
	return 0.5 * x * ( 1.0 + tanh( SQRT_2_OVER_PI * x * ( 1.0 + GELU_COEF_A * x * x ) ) );
#endif
}

inline void computeSimple( uint idx, float add )
{
	float f = tensor[ idx ];
	f += add;
	f = gelu( f );
	tensor[ idx ] = f;
}

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	uint3 it = tensorIteratorState( group, thread, tensorSize, tensorStrides );
	uint rsi = rowOffset( group % patternSize.yzw, patternStrides );

	if( patternSize[ 0 ] == 1 )
	{
		// The pattern only has 1 column - broadcasting over the row
		const float p = pattern[ rsi ];
		ROW_LOOP( it )
			computeSimple( it.x, p );
	}
	else if( patternSize[ 0 ] <= THREADS )
	{
		// pattern size doesn't exceed thread group size: load pattern value outside of the loop
		const uint threadsPerGroup = THREADS - ( THREADS % patternSize[ 0 ] );
		if( thread >= threadsPerGroup )
			return;

		const float p = pattern[ rsi + ( thread % patternSize[ 0 ] ) * patternStrides[ 0 ] ];
		ROW_LOOP_EX( it, threadsPerGroup, tensorStrides )
			computeSimple( it.x, p );
	}
	else
	{
		// Pattern rows are larger than the thread group, need to stream from both buffers
		const uint rsiInc = THREADS * patternStrides[ 0 ];
		const uint rsiDec = patternSize[ 0 ] * patternStrides[ 0 ];
		const uint rsiEnd = rsi + rsiDec;
		rsi += thread * patternStrides[ 0 ];

		ROW_LOOP( it )
		{
			float f = tensor[ it.x ];
			float p = pattern[ rsi ];
			rsi += rsiInc;
			if( rsi >= rsiEnd )
				rsi -= rsiDec;
			f += p;
			f = gelu( f );
			tensor[ it.x ] = f;
		}
	}
}

================================================
FILE: ComputeShaders/addRepeatGelu64.hlsl
================================================
#define THREADS 64
#include "addRepeatGelu.hlsl"

================================================
FILE: ComputeShaders/addRepeatScale.hlsl
================================================
// Compute tensor = ( tensor + repeat( pattern, tensor ) ) * scale in 1 shot, without VRAM allocations
// Dispatch [ nb[ 1 ], nb[ 2 ], nb[ 3 ] ] thread groups of this shader, where nb is size of the destination tensor
RWBuffer<float> tensor: register( u0 );
Buffer<float> pattern: register( t0 );

cbuffer Constants: register( b0 )
{
	uint4 tensorSize: packoffset( c0 );
	uint4 tensorStrides: packoffset( c1 );
	uint4 patternSize: packoffset( c2 );
	uint4 patternStrides: packoffset( c3 );
	float scalingMul : packoffset( c4.x );
}

#ifndef THREADS
#define THREADS 512
#endif

#include "repeatUtils.hlsli"

inline void computeSimple( uint idx, float add )
{
	float f = tensor[ idx ];
	f += add;
	f *= scalingMul;
	tensor[ idx ] = f;
}

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	uint3 it = tensorIteratorState( group, thread, tensorSize, tensorStrides );
	uint rsi = rowOffset( group % patternSize.yzw, patternStrides );

	if( patternSize[ 0 ] == 1 )
	{
		// The pattern only has 1 column - broadcasting over the row
		const float p = pattern[ rsi ];
		ROW_LOOP( it )
			computeSimple( it.x, p );
	}
	else if( patternSize[ 0 ] <= THREADS )
	{
		// pattern size doesn't exceed thread group size: load pattern value outside of the loop
		const uint threadsPerGroup = THREADS - ( THREADS % patternSize[ 0 ] );
		if( thread >= threadsPerGroup )
			return;

		const float p = pattern[ rsi + ( thread % patternSize[ 0 ] ) * patternStrides[ 0 ] ];
		ROW_LOOP_EX( it, threadsPerGroup, tensorStrides )
			computeSimple( it.x, p );
	}
	else
	{
		// Pattern rows are larger than the thread group, need to stream from both buffers
		const uint rsiInc = THREADS * patternStrides[ 0 ];
		const uint rsiDec = patternSize[ 0 ] * patternStrides[ 0 ];
		const uint rsiEnd = rsi + rsiDec;
		rsi += thread * patternStrides[ 0 ];

		ROW_LOOP( it )
		{
			float f = tensor[ it.x ];
			float p = pattern[ rsi ];
			rsi += rsiInc;
			if( rsi >= rsiEnd )
				rsi -= rsiDec;
			f += p;
			f *= scalingMul;
			tensor[ it.x ] = f;
		}
	}
}

================================================
FILE: ComputeShaders/addRows.hlsl
================================================
#ifndef THREADS
#define THREADS 256
#endif

// dec.tokenEmbedding tensor
Buffer<float> tokenEmbedding: register( t0 );
// dec.positionalEmbedding tensor
Buffer<float> positionalEmbedding: register( t1 );
// R32_UINT buffer with the input tokens
Buffer<uint> embd: register( t2 );
// Output tensor
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint rowLength: packoffset( c0.x );
	uint pastTokensCount: packoffset( c0.y );
	uint outputRowStride: packoffset( c0.z );
	uint2 embStrides: packoffset( c1.x );
	uint2 posStrides: packoffset( c1.z );
}

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint row = group.x;
	const uint rowTok = embd[ row ];
	const uint rowPos = row + pastTokensCount;

	uint rdi = row * outputRowStride;
	const uint rdiEnd = rdi + rowLength;
	rdi += thread;

	uint rsiTok = rowTok * embStrides.y;
	rsiTok += thread * embStrides.x;

	uint rsiPos = rowPos * posStrides.y;
	rsiPos += thread * posStrides.x;

	for( ; rdi < rdiEnd; rdi += THREADS, rsiTok += THREADS * embStrides.x, rsiPos += THREADS * posStrides.x )
	{
		float a = tokenEmbedding[ rsiTok ];
		float b = positionalEmbedding[ rsiPos ];
		result[ rdi ] = a + b;
	}
}

================================================
FILE: ComputeShaders/componentwiseBinaryOp.hlsli
================================================
Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	uint4 src1_elements: packoffset( c2 );
	uint4 src1_strides: packoffset( c3 );
	uint4 result_elements: packoffset( c4 );
	uint4 result_strides: packoffset( c5 );
}

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint j = group.x;
	const uint nb1 = result_strides[ 1 ];
	const uint nb01 = src0_strides[ 1 ];

	const uint nb10 = src1_strides[ 0 ];
	const uint nb11 = src1_strides[ 1 ];
	const uint nc = src0_elements[ 0 ];

	uint rsi0 = j * nb01;
	uint rsi1 = j * nb11;
	uint rdi = j * nb1;
	const uint rsi0End = rsi0 + nc;

	rsi0 += thread;
	rsi1 += thread * nb10;
	rdi += thread;

	const uint rsi1Inc = 32 * nb10;
	for( ; rsi0 < rsi0End; rsi0 += 32, rsi1 += rsi1Inc, rdi += 32 )
	{
		const float a = arg0[ rsi0 ];
		const float b = arg1[ rsi1 ];
		const float res = compute( a, b );
		result[ rdi ] = res;
	}
}

================================================
FILE: ComputeShaders/convolutionMain.hlsl
================================================
// ggml_compute_forward_conv_1d_1s_f16_f32, GGML_TASK_COMPUTE implementation
// Dispatch [ ne10, ne02, 1 ] thread groups
Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	uint4 src1_elements: packoffset( c2 );
	uint4 result_elements: packoffset( c4 );
	uint4 result_strides: packoffset( c5 );
}

#include "groupReduce.hlsli"

inline void computeDotProduct( uint s0, uint s1, uint len, uint thread, inout float acc )
{
	float curr = 0;
	const uint completeVectors = len / 32;
	uint i;
	for( i = 0; i < completeVectors; i++, s0 += 32, s1 += 32 )
		curr = mad( arg0[ s0 + thread ], arg1[ s1 + thread ], curr );

	horizontalSumCompatNew( thread, curr );

	if( 0 == thread )
	{
		const uint rem = len % 32;
		if( 0 != rem )
		{
			double f64 = curr;
			for( i = 0; i < rem; i++ )
			{
				precise float a = arg0[ s0 + i ];
				precise float b = arg1[ s1 + i ];
				precise float prod = a * b;
				f64 += prod;
			}
			curr = (float)f64;
		}
		acc += curr;
	}
}

#include "miscUtils.hlsli"

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint i1 = group.y;
	const uint i0 = group.x;

	const uint ne00 = src0_elements[ 0 ];
	const uint nk = ne00;
	const int nh = (int)( nk / 2 );

	const uint ne01 = src0_elements[ 1 ];
	const int ew0 = roundUp32( ne01 );

	float res = 0;
	for( int k = -nh; k <= nh; k++ )
	{
		const uint source0 = i1 * ew0 * ne00 + uint( nh + k ) * ew0;
		const uint source1 = uint( i0 + nh + k ) * ew0;
		computeDotProduct( source0, source1, ew0, thread, res );
	}

	if( 0 != thread )
		return;

	const uint nb1 = result_strides[ 1 ];
	const uint rdi = i1 * nb1 + i0;
	result[ rdi ] = res;
}

================================================
FILE: ComputeShaders/convolutionMain2.hlsl
================================================
// ggml_compute_forward_conv_1d_2s_f16_f32, GGML_TASK_COMPUTE implementation
// Dispatch [ ne10 / 2, ne02, 1 ] thread groups
Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	uint4 src1_elements: packoffset( c2 );
	uint4 result_elements: packoffset( c4 );
	uint4 result_strides: packoffset( c5 );
}

#include "groupReduce.hlsli"

inline void computeDotProduct( uint s0, uint s1, uint len, uint thread, inout float acc )
{
	float curr = 0;
	const uint s0End = s0 + len;
	s0 += thread;
	s1 += thread;
	for( ; s0 < s0End; s0 += 32, s1 += 32 )
		curr = mad( arg0[ s0 ], arg1[ s1 ], curr );

	horizontalSumCompatNew( thread, curr );
	if( 0 == thread )
		acc += curr;
}

#include "miscUtils.hlsli"

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint ne00 = src0_elements[ 0 ];
	const uint ne01 = src0_elements[ 1 ];
	const int ew0 = roundUp32( ne01 );

	float res = 0;
	uint s0 = group.y * ew0 * ne00;
	uint s1 = group.x * 2 * ew0;
	// The original implementation did following:
	// int nh = (int)( nk / 2 );
	// for( int k = -nh; k <= nh; k++ )
	// What we doing instead:
	// for( uint len = ( nk / 2 ) * 2 + 1, i = 0; i < len; i++ )
	// len = ( nk / 2 ) * 2 + 1 is equal to ( nk | 1 )
	const uint s0End = s0 + ( ne00 | 1u ) * ew0;
	for( ; s0 < s0End; s0 += ew0, s1 += ew0 )
		computeDotProduct( s0, s1, ew0, thread, res );

	if( 0 != thread )
		return;

	const uint nb1 = result_strides[ 1 ];
	const uint rdi = group.y * nb1 + group.x;
	result[ rdi ] = res;
}

================================================
FILE: ComputeShaders/convolutionMain2Fixed.hlsl
================================================
// Optimized version of convolutionMain2.hlsl for kernel size = 3
// Dispatch [ ( ( ne10 / 2 ) + TILE_Y - 1 ) / TILE_Y, ne02, 1 ] thread groups of this shader
#ifndef TILE_Y
static const uint TILE_Y = 8;
#endif
#ifndef THREADS
static const uint THREADS = 64;
#endif

Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	uint4 src1_elements: packoffset( c2 );
	uint4 result_elements: packoffset( c4 );
	uint4 result_strides: packoffset( c5 );
}

// The accumulators we're after
groupshared float resTemp[ TILE_Y ][ THREADS ];

// Multiply + accumulate the specified row
inline void accumulate( float a0, float a1, const uint resultRow, const uint thread )
{
	float acc = resTemp[ resultRow ][ thread ];
	acc = mad( a0, a1, acc );
	resTemp[ resultRow ][ thread ] = acc;
}

inline void convolutionTile( const uint s0, uint s1, const uint thread, const uint stride, const uint height )
{
	// Load 3 rows from arg0
	const float3 a0 = float3( arg0[ s0 ], arg0[ s0 + stride ], arg0[ s0 + stride * 2 ] );

	// Row 0
	float a1 = arg1[ s1 ];
	accumulate( a0[ 0 ], a1, 0, thread );
	s1 += stride;

	for( uint i = 1; i < height; i++ )
	{
		// Row i*2-1
		// Even-indexed rows only contribute to a single output rows, after muiltiplied by kernel row #1
		a1 = arg1[ s1 ];
		accumulate( a0[ 1 ], a1, i - 1, thread );
		s1 += stride;

		// Row i*2, contributes to 2 output rows corresponding to kernel rows #0 and #2
		a1 = arg1[ s1 ];
		accumulate( a0[ 2 ], a1, i - 1, thread );
		accumulate( a0[ 0 ], a1, i, thread );
		s1 += stride;
	}

	// Row height*2 - 1
	a1 = arg1[ s1 ];
	accumulate( a0[ 1 ], a1, height - 1, thread );
	s1 += stride;

	// Row height*2
	a1 = arg1[ s1 ];
	accumulate( a0[ 2 ], a1, height - 1, thread );
}

#include "miscUtils.hlsli"

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	uint i;
	// Zero out the accumulators
	for( i = 0; i < TILE_Y; i++ )
		resTemp[ i ][ thread ] = 0.0;
	GroupMemoryBarrierWithGroupSync();

	const uint i1 = group.y;
	const uint i0 = group.x * TILE_Y * 2;
	const uint height = min( TILE_Y, ( src1_elements.x / 2 ) - group.x * TILE_Y );

	const uint ne00 = src0_elements[ 0 ];
	const uint ne01 = src0_elements[ 1 ];
	const int ew0 = roundUp32( ne01 );

	uint s0 = i1 * ew0 * ne00;
	const uint s0End = s0 + ew0;
	uint s1 = i0 * ew0;
	s0 += thread;
	s1 += thread;
	for( ; s0 < s0End; s0 += THREADS, s1 += THREADS )
		convolutionTile( s0, s1, thread, ew0, height );

	GroupMemoryBarrierWithGroupSync();

	// Now we need horizontal sums of these shared accumulators, i.e. reduce [height][THREADS] shared array into [height][1] column
	for( i = THREADS / 2; i > 0; i /= 2 )
	{
		if( thread < i )
		{
			for( uint j = 0; j < height; j++ )
			{
				float sum = resTemp[ j ][ thread ];
				sum += resTemp[ j ][ thread + i ];
				resTemp[ j ][ thread ] = sum;
			}
		}
		GroupMemoryBarrierWithGroupSync();
	}

	// And finally, store that column to global memory
	if( thread >= height )
		return;
	const uint nb1 = result_strides[ 1 ];
	const uint rdi = i1 * nb1 + group.x * TILE_Y + thread;
	result[ rdi ] = resTemp[ thread ][ 0 ];
}

================================================
FILE: ComputeShaders/convolutionPrep1.hlsl
================================================
// ggml_compute_forward_conv_1d_1s_f16_f32, prepare kernel data (src0)
// Dispatch [ ne01, ne02, 1 ] thread groups
Buffer<float> arg0: register( t0 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
}

inline uint roundUp32( uint x )
{
	return ( x + 31 ) & ( ~31u );
}

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint nb01 = src0_strides[ 1 ];
	const uint nb02 = src0_strides[ 2 ];

	const uint ne00 = src0_elements[ 0 ];
	const uint ne01 = src0_elements[ 1 ];
	const uint ew0 = roundUp32( ne01 );

	const uint i02 = group.y;
	const uint i01 = group.x;

	uint rsi = i02 * nb02 + i01 * nb01;
	const uint rsiEnd = rsi + ne00;
	uint rdi = i02 * ew0 * ne00 + i01;
	rsi += thread;
	rdi += thread * ew0;
	const uint rdiInc = 32 * ew0;

	for( ; rsi < rsiEnd; rsi += 32, rdi += rdiInc )
		result[ rdi ] = arg0[ rsi ];
}

================================================
FILE: ComputeShaders/convolutionPrep2.hlsl
================================================
// ggml_compute_forward_conv_1d_1s_f16_f32, prepare source data (src1)
// Dispatch [ ne11, 1, 1 ] thread groups
Buffer<float> arg1: register( t0 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src1_elements: packoffset( c2 );
	uint4 src1_strides: packoffset( c3 );
}

#include "miscUtils.hlsli"

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint i11 = group.x;

	const uint ne00 = src0_elements[ 0 ];
	const uint ne01 = src0_elements[ 1 ];
	const uint ne10 = src1_elements[ 0 ];
	const uint nb11 = src1_strides[ 1 ];

	const uint nk = ne00;
	const uint nh = nk / 2;
	const int ew0 = roundUp32( ne01 );

	uint rsi = i11 * nb11;
	uint rdi = nh * ew0 + i11;
	const uint rdiInc = ew0 * 32;
	const uint rsiEnd = rsi + ne10;

	rsi += thread;
	rdi += thread * ew0;

	for( ; rsi < rsiEnd; rsi += 32, rdi += rdiInc )
	{
		float f = arg1[ rsi ];
		f = adjustFp16( f );
		result[ rdi ] = f;
	}
}

================================================
FILE: ComputeShaders/copyConvert.hlsl
================================================
// ggml_compute_forward_dup_f32 when we only need to convert types, but not reshape the tensor
// Dispatch [ ne01, ne02, ne03 ] thread groups of this shader
Buffer<float> arg0: register( t0 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	bool downcastFp32 : packoffset( c2.x );
}

#include "miscUtils.hlsli"

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint nb00 = src0_strides[ 0 ];
	const uint nb01 = src0_strides[ 1 ];
	const uint nb02 = src0_strides[ 2 ];
	const uint nb03 = src0_strides[ 3 ];

	const uint ne00 = src0_elements[ 0 ];
	const uint ne01 = src0_elements[ 1 ];
	const uint ne02 = src0_elements[ 2 ];
	const uint ne03 = src0_elements[ 3 ];

	const uint i01 = group.x;
	const uint i02 = group.y;
	const uint i03 = group.z;

	const uint rs = ne00 * nb00;
	//const uint id = i01 + i02 * ne02 + i03 * ne01 * ne02;
	const uint id = ( i03 * ne01 + i02 ) * ne02 + i01;

	uint rsi = i01 * nb01 + i02 * nb02 + i03 * nb03;
	uint rdi = id * rs;

	const uint rsiEnd = rsi + rs;
	rsi += thread;
	rdi += thread;
	for( ; rsi < rsiEnd; rsi += 32, rdi += 32 )
	{
		float f = arg0[ rsi ];
		[branch]
		if( downcastFp32 )
			f = adjustFp16( f );
		result[ rdi ] = f;
	}
}

================================================
FILE: ComputeShaders/copyTranspose.hlsl
================================================
// ggml_compute_forward_dup_f32 when we actually need to reshape the tensor
// Dispatch [ ne01, ne02, ne03 ] thread groups of this shader
Buffer<float> arg0: register( t0 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	bool downcastFp32 : packoffset( c2.x );
}

#include "miscUtils.hlsli"

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint nb00 = src0_strides[ 0 ];
	const uint nb01 = src0_strides[ 1 ];
	const uint nb02 = src0_strides[ 2 ];
	const uint nb03 = src0_strides[ 3 ];

	const uint ne00 = src0_elements[ 0 ];
	const uint ne01 = src0_elements[ 1 ];
	const uint ne02 = src0_elements[ 2 ];
	const uint ne03 = src0_elements[ 3 ];

	const uint i01 = group.x;
	const uint i02 = group.y;
	const uint i03 = group.z;

	// We need following integer: i01*ne00 + i02*ne00*ne01 + i03*ne00*ne01*ne02
	// We want to minimize count of integer multiplications
	// Also, DXBC assembly features `imad` instruction which computes a*b+c for integers, the actual hardware hopefully has an equivalent
	// i03*ne00*ne01*ne02 + i02*ne00*ne01 + i01*ne00
	// ( i03*ne01*ne02 + i02*ne01 + i01 ) * ne00
	// ( ( i03*ne02 + i02) * ne01 + i01 ) * ne00
	uint rdi = ( ( i03 * ne02 + i02 ) * ne01 + i01 ) * ne00;

	const uint rdiEnd = rdi + ne00;

	uint rsi = i01 * nb01 + i02 * nb02 + i03 * nb03;
	const uint rsiInc = 32 * nb00;

	rdi += thread;
	rsi += thread * nb00;

	for( ; rdi < rdiEnd; rdi += 32, rsi += rsiInc )
	{
		float f = arg0[ rsi ];
		[branch]
		if( downcastFp32 )
			f = adjustFp16( f );
		result[ rdi ] = f;
	}
}

================================================
FILE: ComputeShaders/dbgFindNaN.hlsl
================================================
// When reset = TRUE, write zero to the output buffer
// When reset = FALSE, test input tensor for NaN, when found at least 1 NaN element, write 1 to the output buffer

// FP32 or FP16 tensor to test for NAN
Buffer<float> tensor: register( t0 );
// A buffer with a single element for the output boolean. Zero means there were no NAN values in the tensor.
RWBuffer<uint> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint elements: packoffset( c0.x );
	bool reset : packoffset( c0.y );
}

// Thread group index is 16 bits per coordinate:
// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-id3d11devicecontext-dispatch
// We want this shader to support buffers up to 2 GB.
#ifndef THREADS
static const uint THREADS = 512;
#endif
#ifndef ITERATIONS
static const uint ITERATIONS = 128;
#endif

static const uint itemsPerGroup = THREADS * ITERATIONS;

inline bool isNaN( float x )
{
	// https://sakibsaikia.github.io/graphics/2022/01/04/Nan-Checks-In-HLSL.html
	return ( asuint( x ) & 0x7fffffff ) > 0x7f800000;
}

groupshared uint reductionBuffer;

[numthreads( THREADS, 1, 1 )]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	if( reset )
	{
		if( 0 == thread )
			result[ 0 ] = 0;
		return;
	}

	uint rsi = group.x * itemsPerGroup;
	const uint rsiEnd = min( rsi + itemsPerGroup, elements );

	// The main loop updates a local variable. There're THREADS instances of that variable for the group of threads.
	bool foundNan = false;
	for( rsi += thread; rsi < rsiEnd; rsi += THREADS )
	{
		const float val = tensor[ rsi ];
		if( !isNaN( val ) )
			continue;
		foundNan = true;
		break;
	}

	// Reduce THREADS booleans to a single one, using group shared memory atomics
	if( 0 == thread )
		reductionBuffer = 0;
	GroupMemoryBarrierWithGroupSync();

	if( foundNan )
		InterlockedOr( reductionBuffer, 1u );

	GroupMemoryBarrierWithGroupSync();

	// When found, update output value with global memory atomic
	if( 0 != thread )
		return;
	if( 0 == reductionBuffer )
		return;

	InterlockedOr( result[ 0 ], 1u );
}

================================================
FILE: ComputeShaders/diagMaskInf.hlsl
================================================
// ggml_compute_forward_diag_mask_inf_f32
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 elements: packoffset( c0 );
	uint4 strides: packoffset( c1 );
	uint n_past : packoffset( c2.x );
}

static const float negativeInfinity = asfloat( 0xff800000 );

[numthreads( 32, 1, 1 )]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint k = group.y;
	const uint j = group.x;

	// Start of the row
	uint rdi = k * strides[ 2 ] + j * strides[ 1 ];
	// End of the row
	const uint rdiEnd = rdi + elements[ 0 ] * strides[ 0 ];
	// First index to write in this thread
	rdi += ( n_past + j + thread + 1 ) * strides[ 0 ];
	// Index increment
	const uint rdiInc = 32 * strides[ 0 ];

	for( ; rdi < rdiEnd; rdi += rdiInc )
		result[ rdi ] = negativeInfinity;
}

================================================
FILE: ComputeShaders/flashAttention.hlsl
================================================
// Ported from ggml_compute_forward_flash_attn_f16
// Dispatch with [ neq1*neq2*neq3, 1, 1 ] thread groups

#include "flashAttentionCommon.hlsli"
Buffer<uint> lookupTable: register( t3 );
#include "groupReduce.hlsli"

inline void computeDotProduct( Buffer<float> buff0, Buffer<float> buff1, uint s0, uint s1, const uint len, const uint thread, inout float acc )
{
	acc = 0;
	const uint s0End = s0 + len;
	s0 += thread;
	s1 += thread;
	for( ; s0 < s0End; s0 += 32, s1 += 32 )
		acc = mad( buff0[ s0 ], buff1[ s1 ], acc );

	horizontalSum( thread, acc );
}

inline void computeDotProduct( Buffer<float> buff0, RWBuffer<float> buff1, uint s0, uint s1, const uint len, const uint thread, inout float acc )
{
	acc = 0;
	const uint s0End = s0 + len;
	s0 += thread;
	s1 += thread;
	for( ; s0 < s0End; s0 += 32, s1 += 32 )
		acc = mad( buff0[ s0 ], buff1[ s1 ], acc );

	horizontalSum( thread, acc );
}

void scaleTempVector( uint i, const uint length, const uint thread, const float multiplier, bool round )
{
	const uint end = i + length;
	for( i += thread; i < end; i += 32 )
	{
		float f = temp[ i ];
		f *= multiplier;
		if( round )
			f = roundToFp16( f );
		temp[ i ] = f;
	}
}

#include "miscUtils.hlsli"

// Transform temp[ i ] = exp( temp[ i ] - tempMax ), and return the sum of these values
inline float applySoftMax( uint i, const uint length, const uint thread, const float tempMax )
{
	// Transform the values, and compute per-thread sum
	const uint end = i + length;
	float sum = 0;
	for( i += thread; i < end; i += 32 )
	{
		float f = temp[ i ];
		[branch]
		if( f != negativeInfinity )
		{
			f -= tempMax;
			const uint index = fp16Rounded( f );
			const uint res16 = lookupTable[ index ];
			f = f16tof32( res16 );
		}
		else
			f = 0;

		temp[ i ] = f;
		sum += f;
	}

	// Reduce per-thread sum to the global one, over all threads of the group
	horizontalSumBroadcast( thread, sum );
	return sum;
}

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint neq0 = q_elements[ 0 ];
	const uint neq1 = q_elements[ 1 ];
	const uint neq2 = q_elements[ 2 ];
	const uint neq3 = q_elements[ 3 ];

	const uint nek0 = k_elements[ 0 ];
	const uint nek1 = k_elements[ 1 ];

	const uint nev1 = v_elements[ 1 ];

	const uint ne0 = res_elements[ 0 ];
	const uint ne1 = res_elements[ 1 ];

	const uint nbk0 = k_strides[ 0 ];
	const uint nbk1 = k_strides[ 1 ];
	const uint nbk2 = k_strides[ 2 ];
	const uint nbk3 = k_strides[ 3 ];

	const uint nbq0 = q_strides[ 0 ];
	const uint nbq1 = q_strides[ 1 ];
	const uint nbq2 = q_strides[ 2 ];
	const uint nbq3 = q_strides[ 3 ];

	const uint nbv0 = v_strides[ 0 ];
	const uint nbv1 = v_strides[ 1 ];
	const uint nbv2 = v_strides[ 2 ];
	const uint nbv3 = v_strides[ 3 ];

	const uint nb0 = res_strides[ 0 ];
	const uint nb1 = res_strides[ 1 ];
	const uint nb2 = res_strides[ 2 ];
	const uint nb3 = res_strides[ 3 ];

	const uint D = neq0;
	const uint N = neq1;
	const uint P = nek1 - N;
	const uint M = nek1;

	const uint ir = group.x;
	const uint iq3 = ir / ( neq2 * neq1 );
	const uint iq2 = ( ir - iq3 * neq2 * neq1 ) / neq1;
	const uint iq1 = ( ir - iq3 * neq2 * neq1 - iq2 * neq1 );

	const uint tempIndex = ir * tempBufferStride;

	uint ic;
	float tvm = negativeInfinity;
	const uint s1 = iq1 * nbq1 + iq2 * nbq2 + iq3 * nbq3;
	uint s0 = iq2 * nbk2 + iq3 * nbk3;
	for( ic = 0; ic < nek1; ic++, s0 += nbk1 )
	{
		if( masked )
		{
			if( ic > P + iq1 )
			{
				if( 0 == thread )
					temp[ tempIndex + ic ] = negativeInfinity;
				continue;
			}
		}

		float dp;
		computeDotProduct( k, q, s0, s1, neq0, thread, dp );
		if( 0 == thread )
		{
			dp *= scale;
			temp[ tempIndex + ic ] = dp;
			tvm = max( tvm, dp );
		}
	}

	if( 0 == thread )
		sharedAccumulators[ 0 ] = tvm;
	GroupMemoryBarrierWithGroupSync();
	tvm = sharedAccumulators[ 0 ];

	// Softmax
	{
		float sum = applySoftMax( tempIndex, M, thread, tvm );
		scaleTempVector( tempIndex, M, thread, 1.0 / sum, true );
	}

	s0 = iq2 * nbv2 + iq3 * nbv3;
	uint rdi = iq1 * nb1 + iq2 * nb2 + iq3 * nb3;
	for( ic = 0; ic < nev1; ic++, s0 += nbv1, rdi += nb0 )
	{
		float dp;
		computeDotProduct( v, temp, s0, tempIndex, nek1, thread, dp );
		if( 0 == thread )
			result[ rdi ] = dp;
	}
}

================================================
FILE: ComputeShaders/flashAttentionCommon.hlsli
================================================
// Ported from ggml_compute_forward_flash_attn_f16
// Dispatch with [ neq1*neq2*neq3, 1, 1 ] thread groups
Buffer<float> q: register( t0 );
Buffer<float> k: register( t1 );
Buffer<float> v: register( t2 );

RWBuffer<float> result: register( u0 );
// This temporary buffer should fit tempBufferStride * neq1 * neq2 * neq3 elements, FP32 precision
RWBuffer<float> temp: register( u1 );

cbuffer Constants: register( b0 )
{
	uint4 q_elements: packoffset( c0 );
	uint4 q_strides: packoffset( c1 );
	uint4 k_elements: packoffset( c2 );
	uint4 k_strides: packoffset( c3 );
	uint4 v_elements: packoffset( c4 );
	uint4 v_strides: packoffset( c5 );
	uint4 res_elements: packoffset( c6 );
	uint4 res_strides: packoffset( c7 );

	bool masked : packoffset( c8.x );
	// 1.0 / sqrt( (double) D )
	float scale : packoffset( c8.y );
	// This number is required to be >= nek1, and ideally rounded up to either 32 (L2 line) or 128 (L1 line) bytes
	uint tempBufferStride: packoffset( c8.z );
}

static const float negativeInfinity = asfloat( 0xff800000 );

// Convert FP32 number to FP16 using rounding to nearest, then upcast back to FP32
inline float roundToFp16( const float src )
{
	const uint trunc16 = f32tof16( src );
	const float trunc32 = f16tof32( trunc16 );

	const uint truncExp = ( trunc16 >> 10 ) & 0x1F;
	if( truncExp != 0x1F )
	{
		const uint next16 = trunc16 + 1;
		const float next32 = f16tof32( next16 );

		const float errTrunc = abs( src - trunc32 );
		const float errNext = abs( src - next32 );

		if( errTrunc < errNext )
		{
			// Truncated was closer to the source
			return trunc32;
		}
		else if( errTrunc > errNext )
		{
			// Truncated + 1 was closer to the source
			return next32;
		}
		else
		{
			// Exactly half, doing banker's rounding to nearest even
			return ( 0 == ( trunc16 & 1 ) ) ? trunc32 : next32;
		}
	}
	else
	{
		// INF or NAN
		return trunc32;
	}
}

================================================
FILE: ComputeShaders/flashAttentionCompat1.hlsl
================================================
// Dispatch with [ neq1*neq2*neq3, 1, 1 ] thread groups
#include "flashAttentionCommon.hlsli"
#include "groupReduce.hlsli"

inline void computeDotProduct( Buffer<float> buff0, Buffer<float> buff1, uint s0, uint s1, const uint len, const uint thread, inout float acc )
{
	acc = 0;
	/*
	const uint s0End = s0 + len;
	s0 += thread;
	s1 += thread;
	for( ; s0 < s0End; s0 += 32, s1 += 32 )
		acc = mad( buff0[ s0 ], buff1[ s1 ], acc );
	horizontalSumCompatNew( thread, acc );
	*/

	const uint completeVectors = len / 32;
	uint i;
	for( i = 0; i < completeVectors; i++, s0 += 32, s1 += 32 )
		acc = mad( buff0[ s0 + thread ], buff1[ s1 + thread ], acc );

	horizontalSumCompatNew( thread, acc );

	if( 0 == thread )
	{
		const uint rem = len % 32;
		for( i = 0; i < rem; i++ )
		{
			precise float a = buff0[ s0 + i ];
			precise float b = buff1[ s1 + i ];
			precise float prod = a * b;
			acc += prod;
		}
	}
}

void scaleTempVector( uint i, const uint length, const uint thread, const float multiplier )
{
	const uint end = i + length;
	for( i += thread; i < end; i += 32 )
	{
		float f = temp[ i ];
		f *= multiplier;
		temp[ i ] = f;
	}
}

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint neq0 = q_elements[ 0 ];
	const uint neq1 = q_elements[ 1 ];
	const uint neq2 = q_elements[ 2 ];
	const uint neq3 = q_elements[ 3 ];

	const uint nek0 = k_elements[ 0 ];
	const uint nek1 = k_elements[ 1 ];

	const uint nev1 = v_elements[ 1 ];

	const uint ne0 = res_elements[ 0 ];
	const uint ne1 = res_elements[ 1 ];

	const uint nbk0 = k_strides[ 0 ];
	const uint nbk1 = k_strides[ 1 ];
	const uint nbk2 = k_strides[ 2 ];
	const uint nbk3 = k_strides[ 3 ];

	const uint nbq0 = q_strides[ 0 ];
	const uint nbq1 = q_strides[ 1 ];
	const uint nbq2 = q_strides[ 2 ];
	const uint nbq3 = q_strides[ 3 ];

	const uint nbv0 = v_strides[ 0 ];
	const uint nbv1 = v_strides[ 1 ];
	const uint nbv2 = v_strides[ 2 ];
	const uint nbv3 = v_strides[ 3 ];

	const uint nb0 = res_strides[ 0 ];
	const uint nb1 = res_strides[ 1 ];
	const uint nb2 = res_strides[ 2 ];
	const uint nb3 = res_strides[ 3 ];

	const uint D = neq0;
	const uint N = neq1;
	const uint P = nek1 - N;
	// const uint M = P + N;
	const uint M = nek1;

	const uint ir = group.x;
	const uint iq3 = ir / ( neq2 * neq1 );
	const uint iq2 = ( ir - iq3 * neq2 * neq1 ) / neq1;
	const uint iq1 = ( ir - iq3 * neq2 * neq1 - iq2 * neq1 );

	const uint tempIndex = ir * tempBufferStride;

	uint ic;
	for( ic = 0; ic < nek1; ic++ )
	{
		// k indices
		const uint ik3 = iq3;
		const uint ik2 = iq2;
		const uint ik1 = ic;

		// S indices
		const uint i1 = ik1;

		if( masked )
		{
			if( ic > P + iq1 )
			{
				if( 0 == thread )
					temp[ tempIndex + ic ] = negativeInfinity;
				continue;
			}
		}

		const uint s0 = ik1 * nbk1 + ik2 * nbk2 + ik3 * nbk3;
		const uint s1 = iq1 * nbq1 + iq2 * nbq2 + iq3 * nbq3;
		float dp;
		computeDotProduct( k, q, s0, s1, neq0, thread, dp );
		if( 0 == thread )
			temp[ tempIndex + ic ] = dp * scale;
	}
}

================================================
FILE: ComputeShaders/flashAttentionCompat2.hlsl
================================================
// Dispatch with [ ( neq1*neq2*neq3 + 31 ) / 32, 1, 1 ] thread groups
#include "flashAttentionCommon.hlsli"
Buffer<uint> lookupTable: register( t3 );

void scaleTempVector( uint i, const uint length, const float multiplier )
{
	const uint end = i + length;
	for( ; i < end; i++ )
	{
		float f = temp[ i ];
		f *= multiplier;
		// Rounding in this shader causes numerical errors on my GeForce 1080 Ti GPU, driver 527.56
		// f = roundToFp16( f );
		temp[ i ] = f;
	}
}

inline float computeTempVectorMax( uint i, const uint length )
{
	// Compute per-thread maximum
	const uint end = i + length;
	float ax = negativeInfinity;
	for( ; i < end; i++ )
		ax = max( ax, temp[ i ] );
	return ax;
}

#include "miscUtils.hlsli"
#include "fp64Utils.hlsli"

// Transform temp[ i ] = exp( temp[ i ] - tempMax ), and return the sum of these values
inline double applySoftMax( uint i, const uint length, const float tempMax )
{
	// Transform the values, and compute per-thread sum
	const uint end = i + length;
	double sum = 0;
	for( ; i < end; i++ )
	{
		float f = temp[ i ];
		[branch]
		if( f != negativeInfinity )
		{
			f -= tempMax;
			const uint index = fp16Rounded( f );
			const uint res16 = lookupTable[ index ];
			f = f16tof32( res16 );
			sum += f;
		}
		else
			f = 0;

		temp[ i ] = f;
	}
	return sum;
}

[ numthreads( 32, 1, 1 ) ]
void main( uint3 dtid: SV_DispatchThreadID )
{
	const uint neq0 = q_elements[ 0 ];
	const uint neq1 = q_elements[ 1 ];
	const uint neq2 = q_elements[ 2 ];
	const uint neq3 = q_elements[ 3 ];

	const uint nek0 = k_elements[ 0 ];
	const uint nek1 = k_elements[ 1 ];

	const uint nev1 = v_elements[ 1 ];

	const uint ne0 = res_elements[ 0 ];
	const uint ne1 = res_elements[ 1 ];

	const uint nbk0 = k_strides[ 0 ];
	const uint nbk1 = k_strides[ 1 ];
	const uint nbk2 = k_strides[ 2 ];
	const uint nbk3 = k_strides[ 3 ];

	const uint nbq0 = q_strides[ 0 ];
	const uint nbq1 = q_strides[ 1 ];
	const uint nbq2 = q_strides[ 2 ];
	const uint nbq3 = q_strides[ 3 ];

	const uint nbv0 = v_strides[ 0 ];
	const uint nbv1 = v_strides[ 1 ];
	const uint nbv2 = v_strides[ 2 ];
	const uint nbv3 = v_strides[ 3 ];

	const uint nb0 = res_strides[ 0 ];
	const uint nb1 = res_strides[ 1 ];
	const uint nb2 = res_strides[ 2 ];
	const uint nb3 = res_strides[ 3 ];

	const uint D = neq0;
	const uint N = neq1;
	const uint P = nek1 - N;
	// const uint M = P + N;
	const uint M = nek1;

	const uint ir = dtid.x;
	if( ir >= neq1 * neq2 * neq3 )
		return;

	const uint iq3 = ir / ( neq2 * neq1 );
	const uint iq2 = ( ir - iq3 * neq2 * neq1 ) / neq1;
	const uint iq1 = ( ir - iq3 * neq2 * neq1 - iq2 * neq1 );

	const uint tempIndex = ir * tempBufferStride;

	// Softmax
	float tvm = computeTempVectorMax( tempIndex, M );
	double sum = applySoftMax( tempIndex, M, tvm );

	scaleTempVector( tempIndex, M, (float)( 1.0 / sum ) );
}

================================================
FILE: ComputeShaders/flashAttentionCompat3.hlsl
================================================
// Dispatch with [ neq1*neq2*neq3, 1, 1 ] thread groups
#include "flashAttentionCommon.hlsli"
#include "groupReduce.hlsli"
#include "miscUtils.hlsli"

inline void roundTempVector( uint i, const uint len, const uint thread )
{
	const uint iEnd = i + len;
	for( i += thread; i < iEnd; i += 32 )
	{
		float f = temp[ i ];
		f = roundToFp16( f );
		temp[ i ] = f;
	}
}

inline void computeDotProduct( Buffer<float> buff0, RWBuffer<float> buff1, uint s0, uint s1, const uint len, const uint thread, inout float acc )
{
	acc = 0;
/*	const uint s0End = s0 + len;
	s0 += thread;
	s1 += thread;
	for( ; s0 < s0End; s0 += 32, s1 += 32 )
		acc = mad( buff0[ s0 ], buff1[ s1 ], acc );

	horizontalSumCompatNew( thread, acc ); */
	const uint completeVectors = len / 32;
	uint i;
	for( i = 0; i < completeVectors; i++, s0 += 32, s1 += 32 )
		acc = mad( buff0[ s0 + thread ], buff1[ s1 + thread ], acc );

	horizontalSumCompatNew( thread, acc );

	if( 0 == thread )
	{
		const uint rem = len % 32;
		if( 0 != rem )
		{
			double f64 = acc;
			for( i = 0; i < rem; i++ )
			{
				precise float a = buff0[ s0 + i ];
				precise float b = buff1[ s1 + i ];
				precise float prod = a * b;
				f64 += prod;
			}
			acc = (float)f64;
		}
	}
}

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint neq0 = q_elements[ 0 ];
	const uint neq1 = q_elements[ 1 ];
	const uint neq2 = q_elements[ 2 ];
	const uint neq3 = q_elements[ 3 ];

	const uint nek0 = k_elements[ 0 ];
	const uint nek1 = k_elements[ 1 ];

	const uint nev1 = v_elements[ 1 ];

	const uint ne0 = res_elements[ 0 ];
	const uint ne1 = res_elements[ 1 ];

	const uint nbk0 = k_strides[ 0 ];
	const uint nbk1 = k_strides[ 1 ];
	const uint nbk2 = k_strides[ 2 ];
	const uint nbk3 = k_strides[ 3 ];

	const uint nbq0 = q_strides[ 0 ];
	const uint nbq1 = q_strides[ 1 ];
	const uint nbq2 = q_strides[ 2 ];
	const uint nbq3 = q_strides[ 3 ];

	const uint nbv0 = v_strides[ 0 ];
	const uint nbv1 = v_strides[ 1 ];
	const uint nbv2 = v_strides[ 2 ];
	const uint nbv3 = v_strides[ 3 ];

	const uint nb0 = res_strides[ 0 ];
	const uint nb1 = res_strides[ 1 ];
	const uint nb2 = res_strides[ 2 ];
	const uint nb3 = res_strides[ 3 ];

	const uint D = neq0;
	const uint N = neq1;
	const uint P = nek1 - N;
	// const uint M = P + N;
	const uint M = nek1;

	const uint ir = group.x;
	const uint iq3 = ir / ( neq2 * neq1 );
	const uint iq2 = ( ir - iq3 * neq2 * neq1 ) / neq1;
	const uint iq1 = ( ir - iq3 * neq2 * neq1 - iq2 * neq1 );

	const uint tempIndex = ir * tempBufferStride;

	roundTempVector( tempIndex, nek1, thread );
	AllMemoryBarrierWithGroupSync();

	uint rdi = iq1 * nb1 + iq2 * nb2 + iq3 * nb3;
	for( uint ic = 0; ic < nev1; ic++, rdi += nb0 )
	{
		// dst indices
		const uint i1 = iq1;
		const uint i2 = iq2;
		const uint i3 = iq3;

		const uint s0 = ic * nbv1 + i2 * nbv2 + i3 * nbv3;
		float dp;
		computeDotProduct( v, temp, s0, tempIndex, nek1, thread, dp );
		if( 0 == thread )
			result[ rdi ] = dp;
	}
}

================================================
FILE: ComputeShaders/fmaRepeat1.hlsl
================================================
// Implementation of fmaRepeat() when both source arguments have same size and strides
// Dispatch [ nb[ 1 ], nb[ 2 ], nb[ 3 ] ] thread groups of this shader, where nb is size of the destination tensor
RWBuffer<float> tensor: register( u0 );
Buffer<float> patternMul: register( t0 );
Buffer<float> patternAdd: register( t1 );

cbuffer Constants: register( b0 )
{
	uint4 tensorSize: packoffset( c0 );
	uint4 tensorStrides: packoffset( c1 );
	uint4 patternSize: packoffset( c2 );
	uint4 patternStrides: packoffset( c3 );
}

#ifndef THREADS
#define THREADS 512
#endif

#include "repeatUtils.hlsli"

inline void computeSimple( uint idx, float mul, float add )
{
	precise float f = tensor[ idx ];
	f *= mul;
	f += add;
	tensor[ idx ] = f;
}

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	uint3 it = tensorIteratorState( group, thread, tensorSize, tensorStrides );
	uint rsi = rowOffset( group % patternSize.yzw, patternStrides );

	if( patternSize[ 0 ] == 1 )
	{
		// The pattern only has 1 column - broadcasting over the row
		const float pMul = patternMul[ rsi ];
		const float pAdd = patternAdd[ rsi ];
		ROW_LOOP( it )
			computeSimple( it.x, pMul, pAdd );
	}
	else if( patternSize[ 0 ] <= THREADS )
	{
		// pattern size doesn't exceed thread group size: load pattern value outside of the loop
		const uint threadsPerGroup = THREADS - ( THREADS % patternSize[ 0 ] );
		if( thread >= threadsPerGroup )
			return;

		rsi += ( thread % patternSize[ 0 ] ) * patternStrides[ 0 ];
		const float pMul = patternMul[ rsi ];
		const float pAdd = patternAdd[ rsi ];
		ROW_LOOP_EX( it, threadsPerGroup, tensorStrides )
			computeSimple( it.x, pMul, pAdd );
	}
	else
	{
		// Pattern rows are larger than the thread group, need to stream from both buffers
		const uint rsiInc = THREADS * patternStrides[ 0 ];
		const uint rsiDec = patternSize[ 0 ] * patternStrides[ 0 ];
		const uint rsiEnd = rsi + rsiDec;
		rsi += thread * patternStrides[ 0 ];

		ROW_LOOP( it )
		{
			precise float f = tensor[ it.x ];
			float mul = patternMul[ rsi ];
			float add = patternAdd[ rsi ];
			rsi += rsiInc;
			if( rsi >= rsiEnd )
				rsi -= rsiDec;
			f *= mul;
			f += add;
			tensor[ it.x ] = f;
		}
	}
}

================================================
FILE: ComputeShaders/fmaRepeat164.hlsl
================================================
#define THREADS 64
#include "fmaRepeat1.hlsl"

================================================
FILE: ComputeShaders/fmaRepeat2.hlsl
================================================
// Implementation of fmaRepeat() when source arguments have different shape or VRAM layout
// Dispatch [ nb[ 1 ], nb[ 2 ], nb[ 3 ] ] thread groups of this shader, where nb is size of the destination tensor
RWBuffer<float> tensor: register( u0 );
Buffer<float> patternMul: register( t0 );
Buffer<float> patternAdd: register( t1 );

cbuffer Constants: register( b0 )
{
	uint4 tensorSize: packoffset( c0 );
	uint4 tensorStrides: packoffset( c1 );
	uint4 patternSizeMul: packoffset( c2 );
	uint4 patternStridesMul: packoffset( c3 );
	uint4 patternSizeAdd: packoffset( c4 );
	uint4 patternStridesAdd: packoffset( c5 );
}

#ifndef THREADS
#define THREADS 32
#endif

#include "repeatUtils.hlsli"

inline float loadPattern( Buffer<float> buffer, uint rowStart, uint i, uint4 size, uint4 stride )
{
	i %= size.x;
	return buffer[ i * stride.x + rowStart ];
}

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	uint3 it = tensorIteratorState( group, thread, tensorSize, tensorStrides );
	const uint rsiMul = rowOffset( group % patternSizeMul.yzw, patternStridesMul );
	const uint rsiAdd = rowOffset( group % patternSizeAdd.yzw, patternStridesAdd );

	for( uint i = thread; it.x < it.z; it.x += it.y, i++ )
	{
		precise float f = tensor[ it.x ];
		float mul = loadPattern( patternMul, rsiMul, i, patternSizeMul, patternStridesMul );
		float add = loadPattern( patternAdd, rsiAdd, i, patternSizeAdd, patternStridesAdd );
		f *= mul;
		f += add;
		tensor[ it.x ] = f;
	}
}

================================================
FILE: ComputeShaders/fp64Utils.hlsli
================================================
// TODO: compile another version of these shader, and use it on GPUs with ExtendedDoublesShaderInstructions flag, will become slightly faster
// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/ns-d3d11-d3d11_feature_data_d3d11_options
#ifndef ExtendedDoublesShaderInstructions
#define ExtendedDoublesShaderInstructions 0
#endif

// Compute num/den in FP64 precision
inline double div64( double num, double den )
{
#if ExtendedDoublesShaderInstructions
	return num / den;
#else
	// https://en.wikipedia.org/wiki/Division_algorithm#Newton%E2%80%93Raphson_division
	double x = 1.0f / (float)den;
	x += x * ( 1.0 - den * x );
	x += x * ( 1.0 - den * x );
	return num * x;
#endif
}

// Compute sqrt(x) in FP64 precision
inline double sqrt64( double x )
{
	double root = sqrt( (float)x );
	root = 0.5 * ( root + div64( x, root ) );
	root = 0.5 * ( root + div64( x, root ) );
	return root;
}

================================================
FILE: ComputeShaders/groupReduce.hlsli
================================================
groupshared float sharedAccumulators[ 32 ];

// Compute horisontal sum of the numbers. The result is only correct on the thread #0 of the group.
void horizontalSum( const uint thread, inout float sum )
{
	sharedAccumulators[ thread ] = sum;
	for( uint i = 16; i > 1; i /= 2 )
	{
		GroupMemoryBarrierWithGroupSync();
		if( thread < i )
		{
			sum += sharedAccumulators[ thread + i ];
			sharedAccumulators[ thread ] = sum;
		}
	}
	GroupMemoryBarrierWithGroupSync();
	if( 0 == thread )
		sum += sharedAccumulators[ 1 ];
}

// Compute horisontal sum of the numbers, and broadcast to all threads of the group.
void horizontalSumBroadcast( const uint thread, inout float sum )
{
	horizontalSum( thread, sum );
	if( 0 == thread )
		sharedAccumulators[ 0 ] = sum;
	GroupMemoryBarrierWithGroupSync();
	sum = sharedAccumulators[ 0 ];
}

// Compute horisontal sum of the numbers, in the order equal to the CPU-running dot product implementation.
// The result is only correct on the thread #0 of the group.
void horizontalSumCompat( const uint thread, inout float sum )
{
	sharedAccumulators[ thread ] = sum;
	GroupMemoryBarrierWithGroupSync();

	if( 0 == ( thread & 8 ) )
	{
		// This runs on threads [ 0 .. 7 ] and [ 16 .. 23 ]
		// sum01 = _mm256_add_ps( sum0, sum1 );
		// sum23 = _mm256_add_ps( sum2, sum3 );
		sum += sharedAccumulators[ thread + 8 ];
		sharedAccumulators[ thread ] = sum;
	}

	GroupMemoryBarrierWithGroupSync();
	if( thread < 8 )
	{
		// This runs on threads [ 0 .. 7 ]
		// sum0123 = _mm256_add_ps( sum01, sum23 );
		sum += sharedAccumulators[ thread + 16 ];
		sharedAccumulators[ thread ] = sum;
	}

	GroupMemoryBarrierWithGroupSync();
	if( thread < 4 )
	{
		// const __m128 r4 = _mm_add_ps( _mm256_castps256_ps128( sum0123 ), _mm256_extractf128_ps( sum0123, 1 ) );
		sum += sharedAccumulators[ thread + 4 ];
		sharedAccumulators[ thread ] = sum;
	}

	GroupMemoryBarrierWithGroupSync();
	if( thread < 2 )
	{
		// const __m128 r2 = _mm_add_ps( r4, _mm_movehl_ps( r4, r4 ) );
		sum += sharedAccumulators[ thread + 2 ];
		sharedAccumulators[ thread ] = sum;
	}

	GroupMemoryBarrierWithGroupSync();
	if( 0 == thread )
	{
		// const __m128 r1 = _mm_add_ss( r2, _mm_movehdup_ps( r2 ) );
		sum += sharedAccumulators[ 1 ];
	}
}

// Compute horisontal sum of the numbers, in yet another creative summation order recently implemented in the upstream
void horizontalSumCompatNew( const uint thread, inout float sum )
{
	// GGML_F32x8_REDUCE
	sharedAccumulators[ thread ] = sum;
	GroupMemoryBarrierWithGroupSync();

	if( 0 == ( thread & 8 ) )
	{
		// Runs on threads [ 0 .. 7 ] and [ 16 .. 23 ]
		sum += sharedAccumulators[ thread | 8 ];
		sharedAccumulators[ thread ] = sum;
	}
	GroupMemoryBarrierWithGroupSync();

	if( thread < 8 )
	{
		// Runs on threads [ 0 .. 7 ]
		sum += sharedAccumulators[ thread | 0x10 ];
		sharedAccumulators[ thread ] = sum;
	}
	GroupMemoryBarrierWithGroupSync();

	if( thread < 4 )
	{
		// Runs on threads [ 0 .. 3 ]
		sum += sharedAccumulators[ thread | 4 ];
		sharedAccumulators[ thread ] = sum;
	}
	GroupMemoryBarrierWithGroupSync();

	if( thread < 4 && 0 == ( thread & 1 ) )
	{
		// Runs on threads [ 0, 2 ]
		sum += sharedAccumulators[ thread | 1 ];
		sharedAccumulators[ thread ] = sum;
	}
	GroupMemoryBarrierWithGroupSync();

	if( 0 == thread )
		sum += sharedAccumulators[ 2 ];
}


// Compute horizontal maximum of the numbers, and broadcast to all threads of the group.
void horizontalMaxBroadcast( const uint thread, inout float ax )
{
	sharedAccumulators[ thread ] = ax;
	for( uint i = 16; i > 0; i /= 2 )
	{
		GroupMemoryBarrierWithGroupSync();
		if( thread < i )
		{
			ax = max( ax, sharedAccumulators[ thread + i ] );
			sharedAccumulators[ thread ] = ax;
		}
	}
	GroupMemoryBarrierWithGroupSync();
	ax = sharedAccumulators[ 0 ];
}

================================================
FILE: ComputeShaders/groupReduce64.hlsli
================================================
groupshared float sharedAccumulators[ 64 ];

// Compute horisontal sum of the numbers. The result is only correct on the thread #0 of the group.
void horizontalSum( const uint thread, inout float sum )
{
	sharedAccumulators[ thread ] = sum;
	for( uint i = 32; i > 1; i /= 2 )
	{
		GroupMemoryBarrierWithGroupSync();
		if( thread < i )
		{
			sum += sharedAccumulators[ thread + i ];
			sharedAccumulators[ thread ] = sum;
		}
	}
	GroupMemoryBarrierWithGroupSync();
	if( 0 == thread )
		sum += sharedAccumulators[ 1 ];
}

// Compute horisontal sum of the numbers, and broadcast to all threads of the group.
void horizontalSumBroadcast( const uint thread, inout float sum )
{
	horizontalSum( thread, sum );
	if( 0 == thread )
		sharedAccumulators[ 0 ] = sum;
	GroupMemoryBarrierWithGroupSync();
	sum = sharedAccumulators[ 0 ];
}

// Compute horizontal maximum of the numbers, and broadcast to all threads of the group.
void horizontalMaxBroadcast( const uint thread, inout float ax )
{
	sharedAccumulators[ thread ] = ax;
	for( uint i = 32; i > 0; i /= 2 )
	{
		GroupMemoryBarrierWithGroupSync();
		if( thread < i )
		{
			ax = max( ax, sharedAccumulators[ thread + i ] );
			sharedAccumulators[ thread ] = ax;
		}
	}
	GroupMemoryBarrierWithGroupSync();
	ax = sharedAccumulators[ 0 ];
}

================================================
FILE: ComputeShaders/matReshapePanels.hlsl
================================================
// This shader reshapes a matrix into the shape expected by mulMatTiledEx.hlsl and mulMatByRowTiledEx.hlsl compute shaders
// It's called in runtime, also while loading models from disk.
// So far, it's only used when running on AMD GPUs.
#ifndef TILE_SIZE
static const uint TILE_SIZE = 32;
#endif

// Input tensor
Buffer<float> source: register( t0 );
// Output tensor
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 arg0Size: packoffset( c0 );
	uint4 arg0Strides: packoffset( c1 );
	// Count of elements per panel
	uint panelSize : packoffset( c2.y );
	// Layer strides of the output matrix
	uint2 layerStrides: packoffset( c2.z );
}

inline uint hadd( uint2 v2 ) { return v2.x + v2.y; }

groupshared float tileBuffer[ TILE_SIZE ][ TILE_SIZE ];

[ numthreads( TILE_SIZE, 1, 1 ) ]
void main( const uint3 group: SV_GroupID, const uint thread : SV_GroupIndex )
{
	uint rdi = hadd( group.yz * layerStrides );
	rdi += group.x * panelSize;
	rdi += thread;

	uint rsi = hadd( group.yz * arg0Strides.zw );
	const uint baseY = group.x * TILE_SIZE;
	const uint dispatchThread = baseY + thread;
	// Reshaping into a column major horizontal panel, height = TILE_SIZE, width = width of the source matrix
	uint width = arg0Size.x;
	// Usually TILE_SIZE; can be less for the last panel on the matrix when we need to generate zeros instead of loading these numbers
	const uint height = min( TILE_SIZE, arg0Size.y - baseY );

	if( arg0Strides.x == 1 )
	{
		// The input matrix is row major, can improve performance with coalesced loads and group shared buffer.
		rsi += baseY * arg0Strides.y;

		const uint widthCompleteTiles = width / TILE_SIZE;

		if( height < TILE_SIZE )
		{
			// This thread group was dispatched for the last panel of the matrix, it doesn't have enough rows
			// Write zeros to the corresponding elements of the groupshared buffer
			for( uint j = height; j < TILE_SIZE; j++ )
				tileBuffer[ thread ][ j ] = 0.0;
		}

		for( uint i = 0; i < widthCompleteTiles; i++, rsi += TILE_SIZE )
		{
			// Load [ TILE_SIZE ] * [ TILE_SIZE ] block with fully coalesced loads, store to group shared buffer in transposed order
			uint rsiTile = rsi + thread;
			uint j;
			for( j = 0; j < height; j++, rsiTile += arg0Strides.y )
			{
				// Each iteration of the loop loads a row of [ TILE_SIZE ] elements from the corresponding row of the source tensor
				// Fully coalesced load
				float f = source[ rsiTile ];
				// Random store but the local memory's fast, this works rather well in practice
				tileBuffer[ thread ][ j ] = f;
			}

			GroupMemoryBarrierWithGroupSync();

			// Copy from group shared buffer to output tensor
			for( j = 0; j < TILE_SIZE; j++, rdi += TILE_SIZE )
			{
				// Fully coalesced loads and stores
				float f = tileBuffer[ j ][ thread ];
				result[ rdi ] = f;
			}

			GroupMemoryBarrierWithGroupSync();
		}

		width %= TILE_SIZE;
		if( 0 == width )
			return;
		rsi += thread * arg0Strides.y;
	}
	else
		rsi += dispatchThread * arg0Strides.y;

	for( uint i = 0; i < width; i++ )
	{
		float f;
		[branch]
		if( thread < height )
			f = source[ rsi ];
		else
			f = 0.0;
		rsi += arg0Strides.x;

		result[ rdi ] = f;
		rdi += TILE_SIZE;
	}
}

================================================
FILE: ComputeShaders/miscUtils.hlsli
================================================
// When GPUs are converting FP32 to FP16, they always truncate towards 0, documented there:
// https://learn.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-data-conversion#conververting-from-a-higher-range-representation-to-a-lower-range-representation
// Whisper code uses _mm_cvtps_ph( x, 0 ), the 0 stands for "Round to nearest even": https://www.felixcloutier.com/x86/vcvtps2ph
// This function adjusts FP32 value making it so that truncation towards 0 results in the value equal to what CPU is doing
inline float adjustFp16( const float src )
{
	const uint trunc16 = f32tof16( src );
	const float trunc32 = f16tof32( trunc16 );

	const uint truncExp = ( trunc16 >> 10 ) & 0x1F;
	if( truncExp != 0x1F )
	{
		const uint next16 = trunc16 + 1;
		const float next32 = f16tof32( next16 );

		const float errTrunc = abs( src - trunc32 );
		const float errNext = abs( src - next32 );

		if( errTrunc < errNext )
		{
			// Truncated was closer to the source
			return src;
		}
		else if( errTrunc > errNext )
		{
			// Truncated + 1 was closer to the source
			return next32;
		}
		else
		{
			// Exactly half, doing banker's rounding to nearest even
			return ( 0 == ( trunc16 & 1 ) ) ? src : next32;
		}
	}
	else
	{
		// INF or NAN
		return src;
	}
}

// Convert FP32 number to FP16, using rounding to nearest
inline uint fp16Rounded( const float src )
{
	const uint trunc16 = f32tof16( src );
	const float trunc32 = f16tof32( trunc16 );

	const uint truncExp = ( trunc16 >> 10 ) & 0x1F;
	if( truncExp != 0x1F )
	{
		const uint next16 = trunc16 + 1;
		const float next32 = f16tof32( next16 );

		const float errTrunc = abs( src - trunc32 );
		const float errNext = abs( src - next32 );

		if( errTrunc < errNext )
		{
			// Truncated was closer to the source
			return trunc16;
		}
		else if( errTrunc > errNext )
		{
			// Truncated + 1 was closer to the source
			return next16;
		}
		else
		{
			// Exactly half, doing banker's rounding to nearest even
			return ( 0 == ( trunc16 & 1 ) ) ? trunc16 : next16;
		}
	}
	else
	{
		// INF or NAN
		return trunc16;
	}
}

// Round up the number to be a multiple of 32
inline uint roundUp32( uint x )
{
	return ( x + 31 ) & ( ~31u );
}

================================================
FILE: ComputeShaders/mulMatByRow.hlsl
================================================
// Matrix * row product, like [ E0, E1, E2, E3 ] * [ E0, 1, E2, E3 ] = [ E1, 1, E2, E3 ]
// Dispatch [ E1, E2, E3 ] groups of this shader
Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 arg0Size: packoffset( c0 );
	uint4 arg0Strides: packoffset( c1 );
	uint4 arg1Size: packoffset( c2 );
	uint4 arg1Strides: packoffset( c3 );
	uint4 resultSize: packoffset( c4 );
	uint4 resultStrides: packoffset( c5 );
}

#include "groupReduce.hlsli"

inline uint hadd( uint3 vec )
{
	return vec.x + vec.y + vec.z;
}
inline uint hadd( uint2 vec )
{
	return vec.x + vec.y;
}

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	uint s0 = hadd( group * arg0Strides.yzw );
	uint s1 = hadd( group.yz * arg1Strides.zw );
	const uint s0End = s0 + arg0Size.x * arg0Strides.x;
	const uint s0Inc = 32 * arg0Strides.x;
	const uint s1Inc = 32 * arg1Strides.x;

	s0 += thread * arg0Strides.x;
	s1 += thread * arg1Strides.x;
	float dp = 0;
	for( ; s0 < s0End; s0 += s0Inc, s1 += s1Inc )
		dp = mad( arg0[ s0 ], arg1[ s1 ], dp );

	horizontalSum( thread, dp );
	if( 0 != thread )
		return;

	const uint rdi = group.x + hadd( group.yz * resultStrides.zw );
	result[ rdi ] = dp;
}

================================================
FILE: ComputeShaders/mulMatByRow64.hlsl
================================================
// Matrix * row product, like [ E0, E1, E2, E3 ] * [ E0, 1, E2, E3 ] = [ E1, 1, E2, E3 ]
// Dispatch [ E1, E2, E3 ] groups of this shader
Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 arg0Size: packoffset( c0 );
	uint4 arg0Strides: packoffset( c1 );
	uint4 arg1Size: packoffset( c2 );
	uint4 arg1Strides: packoffset( c3 );
	uint4 resultSize: packoffset( c4 );
	uint4 resultStrides: packoffset( c5 );
}

inline uint hadd( uint3 vec )
{
	return vec.x + vec.y + vec.z;
}
inline uint hadd( uint2 vec )
{
	return vec.x + vec.y;
}

// No idea why, but that particular configuration appears to be the fastest one on Ryzen 7 5700G iGPU
// Not by much, though: when trying a few numbers I saw 1.30 - 1.42 seconds for this compute shader
static const uint THREADS = 64;
static const uint REDUCTION_BUFFER = 32;
groupshared float sharedAccumulators[ REDUCTION_BUFFER ];

// Compute horisontal sum of the numbers. The result is only correct on the thread #0 of the group.
void horizontalSum( const uint thread, inout float sum )
{
	if( THREADS > REDUCTION_BUFFER )
	{
		for( uint t = REDUCTION_BUFFER; t < THREADS; t += REDUCTION_BUFFER )
		{
			// Threads [ t .. t + REDUCTION_BUFFER ] store into the buffer
			if( thread >= t && thread < t + REDUCTION_BUFFER )
				sharedAccumulators[ thread - t ] = sum;

			GroupMemoryBarrierWithGroupSync();

			// Threads [ 0 .. REDUCTION_BUFFER ] increment their local sum with the value loaded from the buffer
			if( thread < REDUCTION_BUFFER )
				sum += sharedAccumulators[ thread ];
		}
	}

	if( thread < REDUCTION_BUFFER )
		sharedAccumulators[ thread ] = sum;

	for( uint i = REDUCTION_BUFFER / 2; i > 1; i /= 2 )
	{
		GroupMemoryBarrierWithGroupSync();
		if( thread < i )
		{
			sum += sharedAccumulators[ thread + i ];
			sharedAccumulators[ thread ] = sum;
		}
	}

	GroupMemoryBarrierWithGroupSync();
	if( 0 == thread )
		sum += sharedAccumulators[ 1 ];
}

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	uint s0 = hadd( group * arg0Strides.yzw );
	uint s1 = hadd( group.yz * arg1Strides.zw );
	const uint s0End = s0 + arg0Size.x * arg0Strides.x;
	const uint s0Inc = THREADS * arg0Strides.x;
	const uint s1Inc = THREADS * arg1Strides.x;

	s0 += thread * arg0Strides.x;
	s1 += thread * arg1Strides.x;
	float dp = 0;
	for( ; s0 < s0End; s0 += s0Inc, s1 += s1Inc )
		dp = mad( arg0[ s0 ], arg1[ s1 ], dp );

	horizontalSum( thread, dp );
	if( 0 != thread )
		return;

	const uint rdi = group.x + hadd( group.yz * resultStrides.zw );
	result[ rdi ] = dp;
}

================================================
FILE: ComputeShaders/mulMatByRowTiled.hlsl
================================================
// Matrix * row product, like [ E0, E1, E2, E3 ] * [ E0, 1, E2, E3 ] = [ E1, 1, E2, E3 ]
// Dispatch [ ( E1 + TILE_Y - 1 ) / TILE_Y, E2, E3 ] thread groups of this shader
// This one here is the second most expensive shader in the model, after matrix*matrix product.
// Optimized heavily, as a result the readability ain't great.

#ifndef TILE_Y
static const uint TILE_Y = 64;
#endif
#ifndef THREADS_X
static const uint THREADS_X = 32;
#endif
#ifndef THREADS_Y
static const uint THREADS_Y = 16;
#endif

Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 arg0Size: packoffset( c0 );
	uint4 arg0Strides: packoffset( c1 );
	uint4 arg1Size: packoffset( c2 );
	uint4 arg1Strides: packoffset( c3 );
	uint4 resultSize: packoffset( c4 );
	uint4 resultStrides: packoffset( c5 );
}

inline uint hadd( uint2 vec )
{
	return vec.x + vec.y;
}

// Count of FP32 accumulators we need in every thread of the shader
static const uint heightScalars = TILE_Y / THREADS_Y;
// The local accumulators are float4 vectors, compute count of these vectors
static const uint heightVectors = ( heightScalars + 3 ) / 4;

groupshared float4 reductionBuffer[ heightVectors ][ THREADS_Y ][ THREADS_X ];

[numthreads( THREADS_X, THREADS_Y, 1 )]
void main( uint3 group: SV_GroupID, uint3 thread : SV_GroupThreadID )
{
	uint i;
	// Despite inside GPU cores, the shared memory is still much slower than registers
	// For this reason, this shader accumulates numbers in local variables. Only uses groupshared buffer for the final reduction.
	float4 acc[ heightVectors ];
	// Zero out the accumulators
	[unroll]
	for( i = 0; i < heightVectors; i++ )
		acc[ i ] = 0.0;

	// Count of rows to compute in this thread group
	const uint height = min( TILE_Y, arg0Size.y - group.x * TILE_Y );

	uint s0 = hadd( group.yz * arg0Strides.zw );   //< arg0 layer for the thread group
	s0 += group.x * TILE_Y * arg0Strides.y;        //< arg0 first row for the thread group
	s0 += hadd( arg0Strides.xy * thread.xy );      //< arg0 load index for the thread

	uint s1 = hadd( group.yz * arg1Strides.zw );   //< arg1 layer for the thread group
	s1 += thread.x * arg1Strides.x;                //< arg1 load index for the thread

	const uint completeTiles = arg0Size.x / THREADS_X;
	// Each iteration of that loop loads THREADS_X elements from arg1,
	// a block of [ THREADS_X, height ] elements from arg0,
	// and accumulates these dot products in the local variables
	for( uint t = 0; t < completeTiles; t++, s0 += THREADS_X * arg0Strides.x, s1 += THREADS_X * arg1Strides.x )
	{
		// Load THREADS_X elements from arg1
		const float v1 = arg1[ s1 ];

		uint rsi = s0;
		[unroll]
		for( i = 0; i < heightVectors; i++ )
		{
			float4 v0 = 0.0;
			// Load up to 4*THREADS_X elements from arg0
			[unroll]
			for( uint j = 0; j < 4; j++, rsi += arg0Strides.y * THREADS_Y )
			{
				const uint y = ( i * 4 + j ) * THREADS_Y + thread.y;
				[branch]
				if( y < height )
					v0[ j ] = arg0[ rsi ];
			}
			// Multiply + accumulate
			acc[ i ] = mad( v0, v1, acc[ i ] );
		}
	}

	const uint rem = arg0Size.x % THREADS_X;
	if( thread.x < rem )
	{
		// E0 ain't a multiple of THREADS_X, we have a remainder

		// Load `rem` elements from arg1
		const float v1 = arg1[ s1 ];

		[unroll]
		for( i = 0; i < heightVectors; i++ )
		{
			float4 v0 = 0.0;
			// Load up to 4*rem elements from arg0
			[unroll]
			for( uint j = 0; j < 4; j++, s0 += arg0Strides.y * THREADS_Y )
			{
				const uint y = ( i * 4 + j ) * THREADS_Y + thread.y;
				[branch]
				if( y < height )
					v0[ j ] = arg0[ s0 ];
			}
			// Multiply + accumulate
			acc[ i ] = mad( v0, v1, acc[ i ] );
		}
	}

	// Now we need horizontal sum of these accumulators, reducing [height][THREADS_X] of them into [height][1] column
	// First, store local variables into the shared memory.
	[ unroll ]
	for( i = 0; i < heightVectors; i++ )
		reductionBuffer[ i ][ thread.y ][ thread.x ] = acc[ i ];
	GroupMemoryBarrierWithGroupSync();

	// Run reduction using that shared memory buffer
	for( i = THREADS_X / 2; i > 1; i /= 2 )
	{
		if( thread.x < i )
		{
			[unroll]
			for( uint iv = 0; iv < heightVectors; iv++ )
			{
				float4 that = reductionBuffer[ iv ][ thread.y ][ thread.x + i ];
				float4 tmp = acc[ iv ];
				tmp += that;
				reductionBuffer[ iv ][ thread.y ][ thread.x ] = tmp;
				acc[ iv ] = tmp;
			}
		}
		GroupMemoryBarrierWithGroupSync();
	}

	// And finally, store that column to global memory.
	// Only running that code on the threads of the group with thread.x = 0, to save a few loads from the groupshared buffer
	// This allows to use registers instead, faster to access
	if( thread.x != 0 )
		return;

	uint rdi = hadd( group.yz * resultStrides.zw );
	rdi += ( group.x * TILE_Y + thread.y ) * resultStrides.x;
	const uint rdiInc = THREADS_Y * resultStrides.x;

	[unroll]
	for( i = 0; i < heightVectors; i++ )
	{
		// The previous loop had "i > 1" continue condition, it didn't complete the last step of the reduction
		// The following line is doing that last reduction step
		const float4 resultVec = acc[ i ] + reductionBuffer[ i ][ thread.y ][ 1 ];

		// Conditionally store these 4 floats to the output tensor
		[unroll]
		for( uint j = 0; j < 4; j++, rdi += rdiInc )
		{
			const uint y = ( i * 4 + j ) * THREADS_Y + thread.y;
			[branch]
			if( y < height )
				result[ rdi ] = resultVec[ j ];
		}
	}
}

================================================
FILE: ComputeShaders/mulMatByRowTiledEx.hlsl
================================================
// matrix*row vector product, needs first argument reshaped into a sequence of horizontal column major panels
#ifndef TILE_SIZE
static const uint TILE_SIZE = 32;
#endif
#ifndef THREADS_Y
static const uint THREADS_Y = 8;
#endif

// First tensor, reshaped into dense column major horizontal panels of size [ width, TILE_SIZE ]
Buffer<float> arg0: register( t0 );
// Second tensor, reshaped into dense column major horizontal panels of size [ width, TILE_SIZE ]
Buffer<float> arg1: register( t1 );
// FP32 output tensor, row major and continuous
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 arg0Size: packoffset( c0 );
	uint arg0panel: packoffset( c1.y );
	uint2 arg0LayerStrides: packoffset( c1.z );
	// uint4 arg1Size: packoffset( c2 );
	uint4 arg1Strides: packoffset( c3 );
	uint4 resultSize: packoffset( c4 );
	uint4 resultStrides: packoffset( c5 );
}

inline uint hadd4( const uint4 v )
{
	const uint2 v2 = v.xy + v.zw;
	return v2.x + v2.y;
}

inline float hadd4( const float4 v )
{
	const float2 v2 = v.xy + v.zw;
	return v2.x + v2.y;
}

groupshared float reductionBuffer[ THREADS_Y ][ TILE_SIZE ];

[numthreads( TILE_SIZE, THREADS_Y, 1 )]
void main( const uint3 group: SV_GroupID, const uint3 thread : SV_GroupThreadID )
{
	const uint2 layer = group.yz;
	// Source offsets for the complete thread group
	uint2 rsi;
	rsi.x = group.x * arg0panel + layer.x * arg0LayerStrides.x + layer.y * arg0LayerStrides.y;
	rsi.y = layer.x * arg1Strides.z + layer.y * arg1Strides.w;
	// Apply source offsets for this particular thread
	rsi.x += thread.y * TILE_SIZE + thread.x;
	rsi.y += thread.y * arg1Strides.x;

	const uint2 rsiInc = uint2( THREADS_Y * TILE_SIZE, THREADS_Y * arg1Strides.x );

	const uint completeTiles = arg0Size.x / ( THREADS_Y * 4 );
	uint i;
	float4 acc = 0.0;
	for( i = 0; i < completeTiles; i++ )
	{
		// Each iteration of this loop consumes THREADS_Y*4 columns from the arg0 panel, and THREADS_Y*4 values from arg1
		float4 v0, v1;
		[unroll]
		for( uint j = 0; j < 4; j++, rsi += rsiInc )
		{
			// Load [ TILE_SIZE, THREADS_Y ] block from the first source tensor
			v0[ j ] = arg0[ rsi.x ];
			// Broadcast [ THREADS_Y ] row from the second source tensor
			v1[ j ] = arg1[ rsi.y ];
		}

		// Now we have [ TILE_SIZE, THREADS_Y * 4 ] block from the first source tensor in the v0 vector,
		// and [ THREADS_Y * 4 ] row from the second one in the v1 vector
		// Multiply and accumulate.
		acc = mad( v0, v1, acc );
	}

	// Handle the remainder columns, if any.
	// When present, their count is in [ 1 .. THREADS_Y * 4 - 1 ] interval
	const uint rem = arg0Size.x % ( THREADS_Y * 4 );
	if( rem != 0 )
	{
		float4 v0 = 0.0, v1 = 0.0;
		[unroll]
		for( uint j = 0; j < 4; j++, rsi += rsiInc )
		{
			const uint x = ( j * THREADS_Y ) + thread.y;
			if( x < rem )
			{
				v0[ j ] = arg0[ rsi.x ];
				v1[ j ] = arg1[ rsi.y ];
			}
		}
		acc = mad( v0, v1, acc );
	}

	// We now have [ TILE_SIZE, THREADS_Y * 4 ] block in the local variables of this thread group
	// The group however only outputs [ TILE_SIZE ] elements max, need a reduction
	float acc1 = hadd4( acc );
	reductionBuffer[ thread.y ][ thread.x ] = acc1;
	GroupMemoryBarrierWithGroupSync();

	for( i = THREADS_Y / 2; i > 1; i /= 2 )
	{
		if( thread.y < i )
		{
			acc1 += reductionBuffer[ thread.y + i ][ thread.x ];
			reductionBuffer[ thread.y ][ thread.x ] = acc1;
		}
		GroupMemoryBarrierWithGroupSync();
	}

	if( thread.y != 0 )
		return;

	const uint resultPos = group.x * TILE_SIZE;
	const uint outputSize = min( TILE_SIZE, resultSize.x - resultPos );
	if( thread.x >= outputSize )
		return;

	const uint4 resultPos4 = uint4( resultPos + thread.x, 0, layer );
	const uint rdi = hadd4( resultPos4 * resultStrides );
	result[ rdi ] = acc1 + reductionBuffer[ 1 ][ thread.x ];
}

================================================
FILE: ComputeShaders/mulMatByScalar.hlsl
================================================
// Matrix * scalar product, like [ 1, E1, E2, E3 ] * [ 1, 1, E2, E3 ] = [ E1, 1, E2, E3 ]
// Dispatch [ E2, E3, 1 ] thread groups of this shader
Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 arg0Size: packoffset( c0 );
	uint4 arg0Strides: packoffset( c1 );
	uint4 arg1Size: packoffset( c2 );
	uint4 arg1Strides: packoffset( c3 );
	uint4 resultSize: packoffset( c4 );
	uint4 resultStrides: packoffset( c5 );
}

inline uint hadd( uint2 vec )
{
	return vec.x + vec.y;
}

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const float scalarValue = arg1[ hadd( group.xy * arg1Strides.zw ) ];

	uint s0 = hadd( group.xy * arg0Strides.zw );
	const uint s0Inc = 32 * arg0Strides.y;
	s0 += thread * arg0Strides.y;

	uint rdi = hadd( group.xy * resultStrides.zw );
	const uint rdiEnd = rdi + arg0Size.y;
	rdi += thread;

	for( ; rdi < rdiEnd; rdi += 32, s0 += s0Inc )
	{
		float f = arg0[ s0 ];
		f *= scalarValue;
		result[ rdi ] = f;
	}
}

================================================
FILE: ComputeShaders/mulMatDotMain.hlsl
================================================
// GGML_TASK_COMPUTE step for matrix*matrix product, where nb01 >= nb00;
// Dispatch with [ ne11, ne01*ne02*ne03 ] thread groups
// Each thread group computes a single dot product
Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	uint4 src1_elements: packoffset( c2 );
	uint4 result_elements: packoffset( c4 );
	uint4 result_strides: packoffset( c5 );
}

inline uint product( uint3 vec )
{
	return vec.x * vec.y * vec.z;
}

inline uint product( uint4 vec )
{
	uint2 tmp = vec.xy * vec.zw;
	return tmp.x * tmp.y;
}

inline float dotProductInner( uint i0, uint i1, uint length, uint thread )
{
	float res = 0;
	for( uint i = thread; i < length; i += 32 )
		res = mad( arg0[ i0 + i ], arg1[ i1 + i ], res );
	return res;
}

#include "groupReduce.hlsli"

[numthreads( 32, 1, 1 )]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint ne00 = src0_elements.x;
	const uint ne01 = src0_elements.y;
	const uint ne02 = src0_elements.z;
	const uint ne03 = src0_elements.w;

	const uint ne10 = src1_elements.x;
	const uint ne11 = src1_elements.y;
	const uint ne12 = src1_elements.z;
	const uint ne13 = src1_elements.w;

	const int nb00 = src0_strides.x;
	const int nb01 = src0_strides.y;
	const int nb02 = src0_strides.z;
	const int nb03 = src0_strides.w;

	// total rows in src0
	// const int nr = ne01*ne02*ne03;
	const uint nr = product( src0_elements.yzw );

	const uint ir = group.y;

	// src0 indices
	const uint i03 = ir / ( ne02 * ne01 );
	const uint i02 = ( ir - i03 * ne02 * ne01 ) / ne01;
	const uint i01 = ( ir - i03 * ne02 * ne01 - i02 * ne01 );

	const uint i13 = i03;
	const uint i12 = i02;

	const uint i0 = i01;
	const uint i2 = i02;
	const uint i3 = i03;

	// src0_row = (ggml_fp16_t *) ((char *) src0->data + (i01*nb01 + i02*nb02 + i03*nb03));
	// src1_col = wdata + ( i13 * ne12 * ne11 + i12 * ne11 + 0 ) * ne00;
	const uint src0_row = i01 * nb01 + i02 * nb02 + i03 * nb03;
	const uint src1_col = ( i13 * ne12 * ne11 + i12 * ne11 ) * ne00;

	const uint ic = group.x;
	float curr = dotProductInner( src0_row, src1_col + ic * ne00, ne00, thread );
	horizontalSumCompatNew( thread, curr );

	if( 0 != thread )
		return;

	const uint nb0 = result_strides.x;
	const uint nb1 = result_strides.y;
	const uint nb2 = result_strides.z;
	const uint nb3 = result_strides.w;

	const uint ne0 = result_elements.x;
	// float * dst_col = (float *) ((char *) dst->data + (i0*nb0 + 0*nb1 + i2*nb2 + i3*nb3));
	const uint dst_col = i0 * nb0 + i2 * nb2 + i3 * nb3;
	result[ dst_col + ic * ne0 ] = curr;
}

================================================
FILE: ComputeShaders/mulMatDotReshape.hlsl
================================================
// GGML_TASK_INIT step for matrix*matrix product, where nb01 >= nb00;
// Dispatch with [ ne11, ne12 ] groups
Buffer<float> arg0: register( t0 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
}

#include "miscUtils.hlsli"

// Each thread group of this shader copies a single rows of the matrix
[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint i12 = group.y;
	const uint i11 = group.x;
	const uint ne10 = src0_elements.x;
	const uint ne11 = src0_elements.y;
	const uint nb12 = src0_strides.z;
	const uint nb11 = src0_strides.y;

	uint rdi = i11 * ne10 + i12 * ne10 * ne11;
	const uint rdiEnd = rdi + ne10;
	uint rsi = i12 * nb12 + i11 * nb11;
	rdi += thread;
	rsi += thread;

	for( ; rdi < rdiEnd; rdi += 32, rsi += 32 )
		result[ rdi ] = adjustFp16( arg0[ rsi ] );
}

================================================
FILE: ComputeShaders/mulMatMadMain.hlsl
================================================
// GGML_TASK_COMPUTE step for matrix*matrix product, where nb01 < nb00
Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> resultTensor: register( u0 );
RWBuffer<float> tempBuffer: register( u1 );

cbuffer Constants: register( b0 )
{
	uint4 aSize: packoffset( c0 );
	uint4 aStride: packoffset( c1 );
	uint4 bSize: packoffset( c2 );
	uint4 bStride: packoffset( c3 );
	uint4 resSize: packoffset( c4 );
	bool resultFp16 : packoffset( c5.x );
	uint ne: packoffset( c5.y );
}

#include "miscUtils.hlsli"

// tempBuffer[ rdi .. ] = 0.0
inline void writeTempZeros( uint rdi, const uint len, const uint thread )
{
	const uint rdiEnd = rdi + len;
	for( rdi += thread; rdi < rdiEnd; rdi += 32 )
		tempBuffer[ rdi ] = 0.0;
}

// tempBuffer[ rdi .. ] += mul * arg0[ rsi .. ]
inline void vectorMad( uint rsi, uint rdi, const uint len, const float mul, const uint thread )
{
	const uint rsiEnd = rsi + len;
	rsi += thread;
	rdi += thread;
	for( ; rsi < rsiEnd; rsi += 32, rdi += 32 )
	{
		float f = tempBuffer[ rdi ];
		f = mad( mul, arg0[ rsi ], f );
		[branch]
		if( resultFp16 )
			f = adjustFp16( f );
		tempBuffer[ rdi ] = f;
	}
}

// resultTensor[ rdi .. ] = tempBuffer[ rsi .. ]
inline void copyRow( uint rsi, uint rdi, const uint len, const uint thread )
{
	const uint rsiEnd = rsi + len;
	rsi += thread;
	rdi += thread;
	for( ; rsi < rsiEnd; rsi += 32, rdi += 32 )
	{
		float f = tempBuffer[ rsi ];
		resultTensor[ rdi ] = f;
	}
}

// resultTensor[ rdi .. ] += tempBuffer[ rsi .. ]
inline void addRow( uint rsi, uint rdi, const uint len, const uint thread )
{
	const uint rsiEnd = rsi + len;
	rsi += thread;
	rdi += thread;
	for( ; rsi < rsiEnd; rsi += 32, rdi += 32 )
	{
		float f = resultTensor[ rdi ];
		f += tempBuffer[ rsi ];
		resultTensor[ rdi ] = f;
	}
}

[numthreads( 32, 1, 1 )]
void main( const uint3 group: SV_GroupID, const uint thread : SV_GroupIndex )
{
	const uint i1 = group[ 0 ];
	const uint i2 = group[ 1 ];
	const uint i3 = group[ 2 ];

	const uint ne00 = aSize[ 0 ];
	const uint ne01 = aSize[ 1 ];
	const uint ne02 = aSize[ 2 ];
	const uint ne03 = aSize[ 3 ];

	const uint ne10 = bSize[ 0 ];
	const uint ne11 = bSize[ 1 ];
	const uint ne12 = bSize[ 2 ];
	const uint ne13 = bSize[ 3 ];

	const uint ne0 = resSize[ 0 ];
	const uint ne1 = resSize[ 1 ];
	const uint ne2 = resSize[ 2 ];
	const uint ne3 = resSize[ 3 ];

	const uint nb00 = aStride[ 0 ];
	const uint nb01 = aStride[ 1 ];
	const uint nb02 = aStride[ 2 ];
	const uint nb03 = aStride[ 3 ];

	const uint nb10 = bStride[ 0 ];
	const uint nb11 = bStride[ 1 ];
	const uint nb12 = bStride[ 2 ];
	const uint nb13 = bStride[ 3 ];

	// dst_row = wdata + wo + i3*ne2*ne1*ne0 + i2*ne1*ne0 + i1*ne0;
	const uint tempRowThread0 = i3 * ne2 * ne1 * ne0 + i2 * ne1 * ne0 + i1 * ne0;

	// Faking 4 CPU threads trying to achieve bitwise compatibility with the CPU version
	const uint nth = 4;

	// GGML_TASK_COMPUTE
	{
		// src0_col = src0->data + ( i00 * nb00 + i02 * nb02 + i03 * nb03 );
		const uint aBase = i2 * nb02 + i3 * nb03;
		// src1_val = *      (float *) ((char *) src1->data + (i10*nb10 + i11*nb11 + i12*nb12 + i13*nb13));
		const uint bBase = i1 * nb11 + i2 * nb12 + i3 * nb13;

		// total columns in src1
		const uint nc = ne10;
		// columns per thread
		const uint dc = ( nc + nth - 1 ) / nth;

		uint tempRow = tempRowThread0;
		for( uint ith = 0; ith < nth; ith++, tempRow += ne )
		{
			writeTempZeros( tempRow, ne01, thread );

			// column range for this thread
			const uint ic0 = dc * ith;
			const uint ic1 = min( ic0 + dc, nc );

			for( uint ic = ic0; ic < ic1; ic++ )
			{
				const uint idxA = aBase + ic * aStride[ 0 ];
				const uint idxB = bBase + ic * bStride[ 0 ];
				const float bValue = arg1[ idxB ];
				vectorMad( idxA, tempRow, ne01, bValue, thread );
			}
		}
	}

	// GGML_TASK_FINALIZE
	{
		const uint rdi = tempRowThread0;
		// const uint rdi = i1 * resSize[ 0 ] + i2 * resSize[ 0 ] * resSize[ 1 ] + i3 * resSize[ 0 ] * resSize[ 1 ] * resSize[ 2 ];
		// const uint rdi = ( ( i3 * resSize[ 2 ] + i2 ) * resSize[ 1 ] + i1 ) * resSize[ 0 ];

		uint tempRow = tempRowThread0;
		copyRow( tempRow, rdi, ne01, thread );

		tempRow += ne;
		for( uint ith = 1; ith < nth; ith++, tempRow += ne )
			addRow( tempRow, rdi, ne01, thread );
	}
}

================================================
FILE: ComputeShaders/mulMatTiled.hlsl
================================================
// This compute shader implements matrix*matrix product, using tiling and many other tricks to improve the performance
// This one here is _the_ most expensive shader in the model. Optimized heavily, as a result the readability ain't great.

#ifndef TILE_SIZE
static const uint TILE_SIZE = 32;
#endif
#ifndef THREADS_Y
static const uint THREADS_Y = 8;
#endif
// The above values have a following constraint: TILE_SIZE = THREADS_Y * N * 4 where N is an integer

#ifndef STREAM_SECOND_MATRIX
// Funfact: enabling this on 1080Ti ruins the performance, by a factor of 3.5
#define STREAM_SECOND_MATRIX 0
#endif

#ifndef LOAD_ORDER

// Load with coalesced loads from global memory whenever possible, store into groupshared buffer with random stores
// #define LOAD_ORDER bool2( ( 1 == arg0Strides[ 0 ] ) || ( 1 != arg0Strides[ 1 ] ), ( 1 == arg1Strides[ 0 ] ) || ( 1 != arg1Strides[ 1 ] ) )

// Load with random loads from global memory, store into groupshared buffer with coalesced stores
// On my AMD iGPU inside Ryzen 7 5700G, there's whopping 15% performance win with that tactics, from 6.67 to 5.66 seconds for this shader.
// My nVidia GPU does about the same
#define LOAD_ORDER bool2( false, true )

#endif

Buffer<float> arg0: register( t0 );
Buffer<float> arg1: register( t1 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 arg0Size: packoffset( c0 );
	uint4 arg0Strides: packoffset( c1 );
	uint4 arg1Strides: packoffset( c3 );
	uint4 resultSize: packoffset( c4 );
	uint4 resultStrides: packoffset( c5 );
}

groupshared float tile0[ TILE_SIZE ][ TILE_SIZE ];
#if !STREAM_SECOND_MATRIX
groupshared float tile1[ TILE_SIZE ][ TILE_SIZE ];
#endif

// Count of FP32 accumulators we need in every thread of the shader
static const uint heightScalars = TILE_SIZE / THREADS_Y;
// The local accumulators are float4 vectors, compute count of these vectors
static const uint heightVectors = ( heightScalars + 3 ) / 4;

#if STREAM_SECOND_MATRIX
void multiplyTiles( uint rsi, const uint3 thread, const uint w, const uint h, inout float4 acc[ heightVectors ] )
{
	uint4 rsi4 = ( THREADS_Y * arg1Strides.y ) * uint4( 0, 1, 2, 3 ) + rsi;
	[unroll]
	for( uint iv = 0; iv < heightVectors; iv++, rsi4 += THREADS_Y * 4 * arg1Strides.y )
	{
		float4 r = 0;
		uint4 rsiRow = rsi4;
		for( uint j = 0; j < w; j++, rsiRow += arg1Strides.x )
		{
			// One TILE_SIZE * 4 bytes coalesced load, broadcasted into THREADS_Y copies
			const float s0 = tile0[ j ][ thread.x ];
			float4 s1 = 0.0;
			[unroll]
			for( uint k = 0; k < 4; k++ )
			{
				const uint i = ( iv * 4 + k ) * THREADS_Y + thread.y;
				if( i < h )
					s1[ k ] = arg1[ rsiRow[ k ] ];
			}
			// Multiply and accumulate
			r = mad( s0, s1, r );
		}
		// Accumulate into the output tile
		acc[ iv ] += r;
	}
}
#else
// Compute resTemp += tile0 * tile1, for TILE_SIZE^2 square matrices
// The group size is TILE_SIZE*THREADS_Y threads in this shader
void multiplyTiles( const uint3 thread, inout float4 acc[ heightVectors ] )
{
	[unroll]
	for( uint iv = 0; iv < heightVectors; iv++ )
	{
		float4 r = 0;
		for( uint j = 0; j < TILE_SIZE; j++ )
		{
			// One TILE_SIZE * 4 bytes coalesced load, broadcasted into THREADS_Y copies
			const float s0 = tile0[ j ][ thread.x ];
			float4 s1;
			[unroll]
			for( uint k = 0; k < 4; k++ )
			{
				const uint i = ( iv * 4 + k ) * THREADS_Y + thread.y;
				// THREADS_Y broadcasts, each one is 4 bytes broadcasted into TILE_SIZE copies
				s1[ k ] = tile1[ i ][ j ];
			}
			// Multiply and accumulate
			r = mad( s0, s1, r );
		}
		// Accumulate into the output tile
		acc[ iv ] += r;
	}
}
#endif

// Note we transposed these tiles while loading
void loadTile0( uint rsi, const uint3 thread, const uint w, const uint h, const bool rowMajor )
{
	uint i;
	if( rowMajor )
	{
		rsi += arg0Strides.y * thread.y;
		for( i = thread.y; i < h; i += THREADS_Y, rsi += arg0Strides.y * THREADS_Y )
		{
			if( thread.x < w )
				tile0[ thread.x ][ i ] = arg0[ rsi + thread.x * arg0Strides.x ];
			else
				tile0[ thread.x ][ i ] = 0.0;
		}
	}
	else
	{
		// Unlike width which is smaller for the last tile, the height is always the same, and all these tiles are zero-initialized
		if( thread.x >= h )
			return;

		rsi += arg0Strides.x * thread.y;
		for( i = thread.y; i < w; i += THREADS_Y, rsi += arg0Strides.x * THREADS_Y )
			tile0[ i ][ thread.x ] = arg0[ rsi + thread.x * arg0Strides.y ];

		if( i >= TILE_SIZE )
			return;
		for( ; i < TILE_SIZE; i += THREADS_Y )
			tile0[ i ][ thread.x ] = 0.0;
	}
}

#if !STREAM_SECOND_MATRIX
void loadTile1( uint rsi, const uint3 thread, const uint w, const uint h, const bool rowMajor )
{
	uint i;
	if( rowMajor )
	{
		rsi += thread.y * arg1Strides.y;

		for( i = thread.y; i < h; i += THREADS_Y, rsi += arg1Strides.y * THREADS_Y )
		{
			if( thread.x < w )
				tile1[ i ][ thread.x ] = arg1[ rsi + thread.x * arg1Strides.x ];
			else
				tile1[ i ][ thread.x ] = 0.0;
		}
	}
	else
	{
		// Unlike width which is smaller for the last tile, the height is always the same, and all these tiles are zero-initialized
		if( thread.x >= h )
			return;

		rsi += thread.y * arg1Strides.x;
		for( i = thread.y; i < w; i += THREADS_Y, rsi += arg1Strides.x * THREADS_Y )
			tile1[ thread.x ][ i ] = arg1[ rsi + thread.x * arg0Strides.y ];
		if( i >= TILE_SIZE )
			return;
		for( ; i < TILE_SIZE; i += THREADS_Y )
			tile1[ thread.x ][ i ] = 0.0;
	}
}
#endif

void storeTile( const uint3 thread, const uint4 pos, const uint2 size, in float4 acc[ heightVectors ] )
{
	if( thread.x >= size.x )
		return;

	const uint4 prod4 = pos * resultStrides;
	const uint2 prod2 = prod4.xy + prod4.zw;
	uint rdi = prod2.x + prod2.y;
	rdi += resultStrides.y * thread.y;
	rdi += resultStrides.x * thread.x;

	const uint4 offsets = THREADS_Y * uint4( 0, 1, 2, 3 );	//< a compile-time constant vector
	uint4 rdi4 = resultStrides.y * offsets + rdi;

	[unroll]
	for( uint iv = 0; iv < heightVectors; iv++, rdi4 += resultStrides.y * THREADS_Y * 4 )
	{
		const float4 source = acc[ iv ];
		[unroll]
		for( uint k = 0; k < 4; k++ )
		{
			const uint i = ( iv * 4 + k ) * THREADS_Y + thread.y;
			if( i < size.y )
				result[ rdi4[ k ] ] = source[ k ];
		}
	}
}

[ numthreads( TILE_SIZE, THREADS_Y, 1 ) ]
void main( uint3 group: SV_GroupID, uint3 thread : SV_GroupThreadID )
{
	// Zero out these shared buffers
	for( uint i = 0; i < TILE_SIZE; i += THREADS_Y )
	{
		tile0[ i + thread.y ][ thread.x ] = 0.0;
#if !STREAM_SECOND_MATRIX
		tile1[ i + thread.y ][ thread.x ] = 0.0;
#endif
	}
	// Despite inside GPU cores, the shared memory is still much slower than registers
	// For this reason, this shader accumulates numbers in local variables. Only uses groupshared memory for tiles of the argument matrices.
	float4 acc[ heightVectors ];
	// Zero out the accumulators
	[unroll]
	for( i = 0; i < heightVectors; i++ )
		acc[ i ] = 0.0;

	const uint2 resultPos = group.xy * TILE_SIZE;
	const uint2 layer = uint2( group.z % resultSize.z, group.z / resultSize.z );
	uint rsi0 = resultPos.x * arg0Strides.y + layer.x * arg0Strides.z + layer.y * arg0Strides.w;
	uint rsi1 = resultPos.y * arg1Strides.y + layer.x * arg1Strides.z + layer.y * arg1Strides.w;

	const uint rsi0Inc = TILE_SIZE * arg0Strides.x;
	const uint rsi1Inc = TILE_SIZE * arg1Strides.x;

	const uint completeTiles = arg0Size.x / TILE_SIZE;
	const uint rsi0AndAligned = rsi0 + rsi0Inc * completeTiles;
	// Output tile size
	// Normally TILE_SIZE^2, less than that for the tiles at the right and bottom edges of the output matrix
	const uint2 outputSize = min( TILE_SIZE, resultSize.xy - resultPos );

	const bool2 loadOrder = LOAD_ORDER;

#if STREAM_SECOND_MATRIX
	rsi1 += thread.y * arg1Strides.y;
#endif
	for( ; rsi0 < rsi0AndAligned; rsi0 += rsi0Inc, rsi1 += rsi1Inc )
	{
		loadTile0( rsi0, thread, TILE_SIZE, outputSize.x, loadOrder.x );
#if STREAM_SECOND_MATRIX
		GroupMemoryBarrierWithGroupSync();
		multiplyTiles( rsi1, thread, TILE_SIZE, outputSize.y, acc );
#else
		loadTile1( rsi1, thread, TILE_SIZE, outputSize.y, loadOrder.y );
		GroupMemoryBarrierWithGroupSync();
		multiplyTiles( thread, acc );
#endif
		// Need one moar barrier here.
		// Otherwise, some threads of the group are loading the next tile into tile0/tile1 groupshared buffers on the next iteration of the loop,
		// while other threads of the same group are still computing the matrix product, and getting incorrect values from that groupshared buffer.
		// The missing barrier only caused a bug on AMD, and only with "ggml-large.bin" model; no idea why that is.
		GroupMemoryBarrierWithGroupSync();
	}

	const uint rem = arg0Size.x % TILE_SIZE;
	if( 0 != rem )
	{
		loadTile0( rsi0, thread, rem, outputSize.x, loadOrder.x );
#if STREAM_SECOND_MATRIX
		GroupMemoryBarrierWithGroupSync();
		multiplyTiles( rsi1, thread, rem, outputSize.y, acc );
#else
		loadTile1( rsi1, thread, rem, outputSize.y, loadOrder.y );
		GroupMemoryBarrierWithGroupSync();
		multiplyTiles( thread, acc );
#endif
	}

	storeTile( thread, uint4( resultPos, layer ), outputSize, acc );
}

================================================
FILE: ComputeShaders/mulMatTiledEx.hlsl
================================================
// This compute shader implements yet another version of matrix*matrix product
// For optimal VRAM access pattern, it requires both arguments to be reshaped into a sequence of horizontal column major panels.
// The panel height is TILE_SIZE, and the last panel of the matrix needs to be padded with zeros; see matReshapePanels.hlsl shader for the reshaping.
// So far, it's only used when running on AMD GPUs.
#ifndef TILE_SIZE
static const uint TILE_SIZE = 32;
#endif
#ifndef TILE_HEIGHT
static const uint TILE_HEIGHT = 64;
#endif
#ifndef THREADS_Y
static const uint THREADS_Y = 8;
#endif
// The above values have a following constraint: TILE_SIZE = THREADS_Y * N * 4 where N is an integer

#ifndef STREAM_SECOND_MATRIX
#define STREAM_SECOND_MATRIX 1
#endif

// First tensor, reshaped into dense column major horizontal panels of size [ width, TILE_SIZE ]
Buffer<float> arg0: register( t0 );
// Second tensor, reshaped into dense column major horizontal panels of size [ width, TILE_SIZE ]
Buffer<float> arg1: register( t1 );
// FP32 output tensor, row major and continuous
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 arg0Size: packoffset( c0 );
	uint arg0panel: packoffset( c1.y );
	uint2 arg0LayerStrides: packoffset( c1.z );

	// uint4 arg1Size: packoffset( c2 );
	uint arg1panel: packoffset( c3.y );
	uint2 arg1LayerStrides: packoffset( c3.z );

	uint4 resultSize: packoffset( c4 );
	uint4 resultStrides: packoffset( c5 );
}

// A smaller tile loaded from the first source matrix
groupshared float tile0[ TILE_HEIGHT ][ TILE_SIZE ];
#if !STREAM_SECOND_MATRIX
// A smaller tile loaded from the second source matrix
groupshared float tile1[ TILE_HEIGHT ][ TILE_SIZE ];
#endif

// Count of FP32 accumulators we need in every thread of the shader
static const uint heightScalars = TILE_SIZE / THREADS_Y;
// The local accumulators are float4 vectors, compute count of these vectors
static const uint heightVectors = ( heightScalars + 3 ) / 4;

#if STREAM_SECOND_MATRIX
void multiplyTiles( const uint3 thread, uint rsi, const uint h, inout float4 acc[ heightVectors ] )
{
	uint4 rsi4 = rsi + uint4( 0, THREADS_Y, THREADS_Y * 2, THREADS_Y * 3 );
	[unroll]
	for( uint iv = 0; iv < heightVectors; iv++, rsi4 += THREADS_Y * 4 )
	{
		float4 r = 0.0;
		uint4 rsiRow = rsi4;
		for( uint j = 0; j < h; j++, rsiRow += TILE_SIZE )
		{
			const float a = tile0[ j ][ thread.x ];
			float4 b = 0.0;
			[unroll]
			for( uint k = 0; k < 4; k++ )
			{
				b[ k ] = arg1[ rsiRow[ k ] ];
			}
			r = mad( a, b, r );
		}
		acc[ iv ] += r;
	}
}
#else
void multiplyTiles( const uint3 thread, inout float4 acc[ heightVectors ] )
{
	[unroll]
	for( uint i = 0; i < heightVectors; i++ )
	{
		float4 r = 0.0;
		for( uint j = 0; j < TILE_HEIGHT; j++ )
		{
			const float a = tile0[ j ][ thread.x ];
			float4 b;
			[unroll]
			for( uint k = 0; k < 4; k++ )
			{
				const uint row = ( i * 4 + k ) * THREADS_Y + thread.y;
				b[ k ] = tile1[ j ][ row ];
			}
			r = mad( a, b, r );
		}
		acc[ i ] += r;
	}
}
#endif

void storeTile( const uint3 thread, const uint4 pos, const uint2 size, in float4 acc[ heightVectors ] )
{
	if( thread.x >= size.x )
		return;

	const uint4 prod4 = pos * resultStrides;
	const uint2 prod2 = prod4.xy + prod4.zw;
	uint rdi = prod2.x + prod2.y;
	rdi += resultStrides.y * thread.y;
	rdi += resultStrides.x * thread.x;

	const uint4 offsets = THREADS_Y * uint4( 0, 1, 2, 3 );	//< a compile-time constant vector
	uint4 rdi4 = resultStrides.y * offsets + rdi;

	[unroll]
	for( uint iv = 0; iv < heightVectors; iv++, rdi4 += resultStrides.y * THREADS_Y * 4 )
	{
		const float4 source = acc[ iv ];
		[unroll]
		for( uint k = 0; k < 4; k++ )
		{
			const uint i = ( iv * 4 + k ) * THREADS_Y + thread.y;
			if( i < size.y )
				result[ rdi4[ k ] ] = source[ k ];
		}
	}
}

[numthreads( TILE_SIZE, THREADS_Y, 1 )]
void main( const uint3 group: SV_GroupID, const uint3 thread : SV_GroupThreadID )
{
	uint i;
	// Zero all shared buffers
	for( i = thread.y; i < TILE_HEIGHT; i += THREADS_Y )
	{
		tile0[ i ][ thread.x ] = 0.0;
#if !STREAM_SECOND_MATRIX
		tile1[ i ][ thread.x ] = 0.0;
#endif
	}
	// Despite inside GPU cores, the shared memory is still much slower than registers
	// For this reason, this shader accumulates numbers in local variables. Only uses groupshared memory for tiles of the argument matrices.
	float4 acc[ heightVectors ];
	// Zero out the accumulators
	[unroll]
	for( i = 0; i < heightVectors; i++ )
		acc[ i ] = 0.0;

	const uint2 layer = uint2( group.z % resultSize.z, group.z / resultSize.z );

	uint rsi0 = group.x * arg0panel + layer.x * arg0LayerStrides.x + layer.y * arg0LayerStrides.y;
	uint rsi1 = group.y * arg1panel + layer.x * arg1LayerStrides.x + layer.y * arg1LayerStrides.y;

	const uint threadOffset = thread.y * TILE_SIZE + thread.x;
	rsi0 += threadOffset;
#if STREAM_SECOND_MATRIX
	rsi1 += thread.y;
#else
	rsi1 += threadOffset;
#endif

	const uint completeTiles = arg0Size.x / TILE_HEIGHT;
	for( i = 0; i < completeTiles; i++ )
	{
		// Load [ TILE_SIZE, TILE_HEIGHT ] block from both source tensors into these groupshared buffers
		for( uint j = thread.y; j < TILE_HEIGHT; j += THREADS_Y )
		{
			tile0[ j ][ thread.x ] = arg0[ rsi0 ];
			rsi0 += THREADS_Y * TILE_SIZE;
#if !STREAM_SECOND_MATRIX
			tile1[ j ][ thread.x ] = arg1[ rsi1 ];
			rsi1 += THREADS_Y * TILE_SIZE;
#endif
		}

		// Wait for all threads in the group to complete these loads
		GroupMemoryBarrierWithGroupSync();

#if STREAM_SECOND_MATRIX
		multiplyTiles( thread, rsi1, TILE_HEIGHT, acc );
		rsi1 += TILE_HEIGHT * TILE_SIZE;
#else
		// Multiply + accumulate the elements collected in the groupshared buffers
		multiplyTiles( thread, acc );
#endif
		GroupMemoryBarrierWithGroupSync();
	}

	const uint rem = arg0Size.x % TILE_HEIGHT;
	if( rem != 0 )
	{
		// Load [ TILE_SIZE, rem ] block from both source tensors, and zero out the padding elements
		for( uint j = thread.y; j < TILE_HEIGHT; j += THREADS_Y )
		{
			[branch]
			if( j < rem )
			{
				tile0[ j ][ thread.x ] = arg0[ rsi0 ];
				rsi0 += THREADS_Y * TILE_SIZE;
#if !STREAM_SECOND_MATRIX
				tile1[ j ][ thread.x ] = arg1[ rsi1 ];
				rsi1 += THREADS_Y * TILE_SIZE;
#endif
			}
			else
			{
				tile0[ j ][ thread.x ] = 0.0;
#if !STREAM_SECOND_MATRIX
				tile1[ j ][ thread.x ] = 0.0;
#endif
			}
		}

		// Wait for all threads in the group to complete these loads
		GroupMemoryBarrierWithGroupSync();

		// Multiply + accumulate the elements collected in the groupshared buffers
#if STREAM_SECOND_MATRIX
		multiplyTiles( thread, rsi1, rem, acc );
#else
		multiplyTiles( thread, acc );
#endif
		GroupMemoryBarrierWithGroupSync();
	}

	const uint2 resultPos = group.xy * TILE_SIZE;
	const uint2 outputSize = min( TILE_SIZE, resultSize.xy - resultPos );
	storeTile( thread, uint4( resultPos, layer ), outputSize, acc );
}

================================================
FILE: ComputeShaders/norm.hlsl
================================================
// Ported from ggml_compute_forward_norm_f32
// Dispatch [ ne01, ne02, ne03 ] thread groups of this shader
Buffer<float> arg0: register( t0 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	uint4 result_strides: packoffset( c3 );
}

static const float eps = 1e-5f; // TODO: make this a parameter

#include "groupReduce.hlsli"

float computeVectorSum( uint i, const uint length, const uint thread )
{
	float res = 0.0;

	const uint iEnd = i + length;
	i += thread;
	for( ; i < iEnd; i += 32 )
		res += arg0[ i ];

	horizontalSumBroadcast( thread, res );
	return res;
}

float offsetAndComputeSumSquares( uint rsi, uint rdi, const float mean, const uint length, const uint thread )
{
	float sum2 = 0.0;

	const uint rsiEnd = rsi + length;
	rsi += thread;
	rdi += thread;
	for( ; rsi < rsiEnd; rsi += 32, rdi += 32 )
	{
		float v = arg0[ rsi ] - mean;
		result[ rdi ] = v;
		sum2 = mad( v, v, sum2 );
	}

	horizontalSumBroadcast( thread, sum2 );
	return sum2;
}

void scaleVector( uint rdi, const float scale, const uint length, const uint thread )
{
	const uint rdiEnd = rdi + length;
	for( rdi += thread; rdi < rdiEnd; rdi += 32 )
	{
		float f = result[ rdi ];
		f *= scale;
		result[ rdi ] = f;
	}
}

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint i03 = group.z;
	const uint i02 = group.y;
	const uint i01 = group.x;

	const uint nb01 = src0_strides[ 1 ];
	const uint nb02 = src0_strides[ 2 ];
	const uint nb03 = src0_strides[ 3 ];

	const uint p = i01 * nb01 + i02 * nb02 + i03 * nb03;

	const uint ne00 = src0_elements[ 0 ];

	float mean = computeVectorSum( p, ne00, thread );
	mean /= (float)(int)ne00;

	const uint nb1 = result_strides[ 1 ];
	const uint nb2 = result_strides[ 2 ];
	const uint nb3 = result_strides[ 3 ];
	const uint y = i01 * nb1 + i02 * nb2 + i03 * nb3;

	float sum2 = offsetAndComputeSumSquares( p, y, mean, ne00, thread );
	const float scale = 1.0 / sqrt( sum2 / (float)(int)ne00 + eps );

	scaleVector( y, scale, ne00, thread );
}

================================================
FILE: ComputeShaders/normCompat.hlsl
================================================
// Ported from ggml_compute_forward_norm_f32
// Dispatch [ ( ne01 + 31 ) / 32, ne02, ne03 ] thread groups of this shader
Buffer<float> arg0: register( t0 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	uint4 result_strides: packoffset( c3 );
}

static const double eps = 1e-5; // TODO: make this a parameter

#include "groupReduce.hlsli"

double computeVectorSum( uint i, const uint length )
{
	double res = 0.0;
	const uint iEnd = i + length;
	for( ; i < iEnd; i++ )
		res += arg0[ i ];
	return res;
}

double offsetAndComputeSumSquares( uint rsi, uint rdi, const double mean, const uint length )
{
	precise double sum2 = 0.0;
	const uint rsiEnd = rsi + length;
	for( ; rsi < rsiEnd; rsi++, rdi++ )
	{
		double v = arg0[ rsi ];
		v -= mean;
		result[ rdi ] = (float)v;
		double prod = v * v;
		sum2 += prod;
	}
	return sum2;
}

void scaleVector( uint rdi, const float scale, const uint length )
{
	const uint rdiEnd = rdi + length;
	for( ; rdi < rdiEnd; rdi++ )
	{
		float f = result[ rdi ];
		f *= scale;
		result[ rdi ] = f;
	}
}

#include "fp64Utils.hlsli"

[ numthreads( 32, 1, 1 ) ]
void main( uint3 dtid: SV_DispatchThreadID )
{
	const uint i03 = dtid.z;
	const uint i02 = dtid.y;
	const uint i01 = dtid.x;
	if( i01 >= src0_elements[ 1 ] )
		return;

	const uint nb01 = src0_strides[ 1 ];
	const uint nb02 = src0_strides[ 2 ];
	const uint nb03 = src0_strides[ 3 ];

	const uint p = i01 * nb01 + i02 * nb02 + i03 * nb03;
	const uint ne00 = src0_elements[ 0 ];

	double mean = computeVectorSum( p, ne00 );
	mean = div64( mean, (double)(int)ne00 );

	const uint nb1 = result_strides[ 1 ];
	const uint nb2 = result_strides[ 2 ];
	const uint nb3 = result_strides[ 3 ];
	const uint y = i01 * nb1 + i02 * nb2 + i03 * nb3;

	const double sum2 = offsetAndComputeSumSquares( p, y, mean, ne00 );
	const float scale = (float)div64( 1.0, sqrt64( sum2 / (float)(int)ne00 + eps ) );

	scaleVector( y, scale, ne00 );
}

================================================
FILE: ComputeShaders/normFixed.hlsl
================================================
// Ported from ggml_compute_forward_norm_f32
// Dispatch [ ne01, ne02, ne03 ] thread groups of this shader
Buffer<float> arg0: register( t0 );
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	uint4 result_strides: packoffset( c3 );
}

static const float eps = 1e-5f; // TODO: make this a parameter

// #include "groupReduce.hlsli"

#ifndef THREADS
static const uint THREADS = 32;
#endif
static const uint ROW_LENGTH = 1024;
groupshared float rowBuffer[ ROW_LENGTH ];

static const uint REDUCTION_BUFFER = 32;
groupshared float sharedAccumulators[ REDUCTION_BUFFER ];

// Compute horisontal sum of the numbers. The result is only correct on the thread #0 of the group.
void horizontalSum( const uint thread, inout float sum )
{
	if( THREADS > REDUCTION_BUFFER )
	{
		for( uint t = REDUCTION_BUFFER; t < THREADS; t += REDUCTION_BUFFER )
		{
			// Threads [ t .. t + REDUCTION_BUFFER ] store into the buffer
			if( thread >= t && thread < t + REDUCTION_BUFFER )
				sharedAccumulators[ thread - t ] = sum;

			GroupMemoryBarrierWithGroupSync();

			// Threads [ 0 .. REDUCTION_BUFFER ] increment their local sum with the value loaded from the buffer
			if( thread < REDUCTION_BUFFER )
				sum += sharedAccumulators[ thread ];
		}
	}

	if( thread < REDUCTION_BUFFER )
		sharedAccumulators[ thread ] = sum;

	for( uint i = REDUCTION_BUFFER / 2; i > 1; i /= 2 )
	{
		GroupMemoryBarrierWithGroupSync();
		if( thread < i )
		{
			sum += sharedAccumulators[ thread + i ];
			sharedAccumulators[ thread ] = sum;
		}
	}

	GroupMemoryBarrierWithGroupSync();
	if( 0 == thread )
		sum += sharedAccumulators[ 1 ];
}

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint i03 = group.z;
	const uint i02 = group.y;
	const uint i01 = group.x;
	const uint ne00 = ROW_LENGTH;

	// First pass: copy the data to local buffer, and compute sum
	{
		const uint nb01 = src0_strides[ 1 ];
		const uint nb02 = src0_strides[ 2 ];
		const uint nb03 = src0_strides[ 3 ];
		const uint p = i01 * nb01 + i02 * nb02 + i03 * nb03;

		float sum = 0;
		for( uint i = thread; i < ne00; i += THREADS )
		{
			float f = arg0[ p + i ];
			rowBuffer[ i ] = f;
			sum += f;
		}
		horizontalSum( thread, sum );
		if( 0 == thread )
			sharedAccumulators[ 0 ] = sum / (float)(int)ne00;
		GroupMemoryBarrierWithGroupSync();
	}

	// Second pass: offset and compute sum of squares
	{
		const float mean = sharedAccumulators[ 0 ];
		float sum2 = 0;
		for( uint i = thread; i < ne00; i += THREADS )
		{
			float v = rowBuffer[ i ];
			v -= mean;
			rowBuffer[ i ] = v;
			sum2 = mad( v, v, sum2 );
		}
		horizontalSum( thread, sum2 );
		if( 0 == thread )
			sharedAccumulators[ 0 ] = 1.0 / sqrt( sum2 / (float)(int)ne00 + eps );
		GroupMemoryBarrierWithGroupSync();
	}

	// Final pass: apply the scale, and copy from group shared buffer to the destination
	{
		const float scale = sharedAccumulators[ 0 ];

		const uint nb1 = result_strides[ 1 ];
		const uint nb2 = result_strides[ 2 ];
		const uint nb3 = result_strides[ 3 ];
		const uint y = i01 * nb1 + i02 * nb2 + i03 * nb3;

		for( uint i = thread; i < ne00; i += THREADS )
		{
			float v = rowBuffer[ i ];
			v *= scale;
			result[ y + i ] = v;
		}
	}
}

================================================
FILE: ComputeShaders/normFixed64.hlsl
================================================
#define THREADS 64
#include "normFixed.hlsl"

================================================
FILE: ComputeShaders/repeatUtils.hlsli
================================================
inline uint rowOffset( uint3 idx, uint4 strides )
{
	return idx[ 0 ] * strides[ 1 ] + idx[ 1 ] * strides[ 2 ] + idx[ 2 ] * strides[ 3 ];
}

// Initial iterator state for a row of the output tensor
// x = current index, y = index increment, z = end of the index
inline uint3 tensorIteratorState( uint3 group, uint thread, uint4 size, uint4 stride )
{
	uint3 res;
	res.x = rowOffset( group, stride );
	res.y = THREADS * stride[ 0 ];
	res.z = res.x + size[ 0 ] * stride[ 0 ];
	res.x += thread * stride[ 0 ];
	return res;
}

// Handle a complete row of output tensor, using the iterator made by tensorIteratorState() function
#define ROW_LOOP( ts ) for( ; ts.x < ts.z; ts.x += ts.y )
// Same as above, using different row length
#define ROW_LOOP_EX( ts, len, stride ) for( ; ts.x < ts.z; ts.x += len * stride[ 0 ] )

================================================
FILE: ComputeShaders/scaleInPlace.hlsl
================================================
RWBuffer<float> buffer: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 src0_elements: packoffset( c0 );
	uint4 src0_strides: packoffset( c1 );
	float multiplier: packoffset( c2.x );
}

[ numthreads( 32, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint nc0 = src0_elements[ 0 ];
	uint i = group.x * src0_strides[ 1 ];
	const uint iEnd = i + nc0;
	const float mul = multiplier;
	for( i += thread; i < iEnd; i += 32 )
	{
		float f = buffer[ i ];
		f *= mul;
		buffer[ i ] = f;
	}
}

================================================
FILE: ComputeShaders/softMax.hlsl
================================================
// Dispatch [ nr, 1, 1 ] thread groups of this shader
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 elements: packoffset( c0 );
	uint4 strides: packoffset( c1 );
	uint nr: packoffset( c2.x );
	float inputScale: packoffset( c2.y );
}

#ifndef THREADS
static const uint THREADS = 32;
#endif

groupshared float sharedAccumulators[ THREADS ];

// Compute horizontal maximum of the numbers, and broadcast to all threads of the group.
void horizontalMaxBroadcast( const uint thread, inout float ax )
{
	sharedAccumulators[ thread ] = ax;
	for( uint i = THREADS / 2; i > 0; i /= 2 )
	{
		GroupMemoryBarrierWithGroupSync();
		if( thread < i )
		{
			ax = max( ax, sharedAccumulators[ thread + i ] );
			sharedAccumulators[ thread ] = ax;
		}
	}
	GroupMemoryBarrierWithGroupSync();
	ax = sharedAccumulators[ 0 ];
}

// Compute horisontal sum of the numbers. The result is only correct on the thread #0 of the group.
void horizontalSum( const uint thread, inout float sum )
{
	sharedAccumulators[ thread ] = sum;
	for( uint i = THREADS / 2; i > 1; i /= 2 )
	{
		GroupMemoryBarrierWithGroupSync();
		if( thread < i )
		{
			sum += sharedAccumulators[ thread + i ];
			sharedAccumulators[ thread ] = sum;
		}
	}
	GroupMemoryBarrierWithGroupSync();
	if( 0 == thread )
		sum += sharedAccumulators[ 1 ];
}

static const float negativeInfinity = asfloat( 0xff800000 );

[numthreads( THREADS, 1, 1 )]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint p = group.x * strides[ 1 ];
	const uint nc = elements[ 0 ];
	const uint pEnd = p + nc;
	uint i;

	float m = negativeInfinity;
	for( i = p + thread; i < pEnd; i += THREADS )
		m = max( m, result[ i ] );
	horizontalMaxBroadcast( thread, m );

	float sum = 0;
	for( i = p + thread; i < pEnd; i += THREADS )
	{
		float f = result[ i ];

		[branch]
		if( f != negativeInfinity )
		{
			f = ( f - m ) * inputScale;
			// On both Radeon Graphics and nVidia 1080Ti, computing the exponent is slightly faster than loading from the lookup table
			f = exp( f );
			sum += f;
		}
		else
			f = 0;

		result[ i ] = f;
	}

	horizontalSum( thread, sum );
	if( 0 == thread )
		sharedAccumulators[ 0 ] = 1.0 / sum;
	GroupMemoryBarrierWithGroupSync();
	const float scale = sharedAccumulators[ 0 ];

	// ggml_vec_scale_f32
	for( i = p + thread; i < pEnd; i += THREADS )
	{
		float f = result[ i ];
		f *= scale;
		result[ i ] = f;
	}
}

================================================
FILE: ComputeShaders/softMax64.hlsl
================================================
#define THREADS 64
#include "softMax.hlsl"

================================================
FILE: ComputeShaders/softMaxCompat.hlsl
================================================
// ggml_compute_forward_soft_max_f32
// Dispatch [ ( nr + 31 ) / 32, 1, 1 ] thread groups of this shader
RWBuffer<float> result: register( u0 );

// table_exp_f16
Buffer<uint> lookupTable: register( t0 );

cbuffer Constants: register( b0 )
{
	uint4 elements: packoffset( c0 );
	uint4 strides: packoffset( c1 );
	uint nr: packoffset( c2.x );
}

#include "miscUtils.hlsli"
#include "fp64Utils.hlsli"

static const float negativeInfinity = asfloat( 0xff800000 );

[ numthreads( 32, 1, 1 ) ]
void main( uint3 dtid: SV_DispatchThreadID )
{
	if( dtid.x >= nr )
		return;

	const uint p = dtid.x * strides[ 1 ];
	const uint nc = elements[ 0 ];
	const uint pEnd = p + nc;
	uint i;

	float m = negativeInfinity;
	for( i = p; i < pEnd; i++ )
		m = max( m, result[ i ] );

	double sum = 0;
	for( i = p; i < pEnd; i++ )
	{
		float f = result[ i ];

		[branch]
		if( f != negativeInfinity )
		{
			uint s = fp16Rounded( f - m );
			s = lookupTable[ s ];
			f = f16tof32( s );
			sum += f;
		}
		else
			f = 0;

		result[ i ] = f;
	}

	const float scale = (float)div64( 1.0, sum );
	// ggml_vec_scale_f32
	for( i = p; i < pEnd; i++ )
	{
		float f = result[ i ];
		f *= scale;
		result[ i ] = f;
	}
}

================================================
FILE: ComputeShaders/softMaxFixed.hlsl
================================================
// Special softMax shader for matrices with rows of 1500 elements.
// Uses group shared buffer of that length to save global memory bandwidth, more than 2x faster than the original.
// Dispatch [ nr, 1, 1 ] thread groups of this shader
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint4 elements: packoffset( c0 );
	uint4 strides: packoffset( c1 );
	uint nr: packoffset( c2.x );
	float inputScale: packoffset( c2.y );
}

#include "miscUtils.hlsli"
#include "groupReduce64.hlsli"

static const uint THREADS = 64;
static const uint ROW_LENGTH = 1500;
groupshared float rowBuffer[ ROW_LENGTH ];

static const float negativeInfinity = asfloat( 0xff800000 );

[ numthreads( THREADS, 1, 1 ) ]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	const uint p = group.x * strides[ 1 ];
	const uint nc = ROW_LENGTH;
	uint i;

	float m = negativeInfinity;
	// First pass: compute maximum, and copy the row into the group shared buffer
	for( i = thread; i < nc; i += THREADS )
	{
		float f = result[ p + i ];
		m = max( m, f );
		rowBuffer[ i ] = f;
	}
	horizontalMaxBroadcast( thread, m );

	// Second pass: apply initial scale, compute the exponent, and compute total sum over the row
	float sum = 0;
	for( i = thread; i < nc; i += THREADS )
	{
		float f = rowBuffer[ i ];

		[branch]
		if( f != negativeInfinity )
		{
			f = ( f - m ) * inputScale;
#if 1
			// At least on Radeon Graphics GPU inside Ryzen 7 5700G, computing exponent instead of loading from the buffer improves the performance
			f = exp( f );
#else
			uint s = fp16Rounded( f );
			s = lookupTable[ s ];
			f = f16tof32( s );
#endif
			sum += f;
		}
		else
			f = 0;

		rowBuffer[ i ] = f;
	}

	horizontalSum( thread, sum );
	if( 0 == thread )
		sharedAccumulators[ 0 ] = 1.0 / sum;
	GroupMemoryBarrierWithGroupSync();
	const float scale = sharedAccumulators[ 0 ];

	// Final pass: apply the final scale, and copy the row from the group shared buffer back into the global memory
	for( i = thread; i < nc; i += THREADS )
	{
		float f = rowBuffer[ i ];
		f *= scale;
		result[ p + i ] = f;
	}
}

================================================
FILE: ComputeShaders/softMaxLong.hlsl
================================================
// This version is for the "dec.probs" shader tag
// The input tensor has a size [ 51865, 3 ], a very long tensor with just 3 rows.
// Despite the shader only runs on 3 GPU cores, large count of threads helps substantially, this shader is about 50% faster.
#define THREADS 1024

#include "softMax.hlsl"

================================================
FILE: ComputeShaders/zeroMemory.hlsl
================================================
RWBuffer<float> result: register( u0 );

cbuffer Constants: register( b0 )
{
	uint elements: packoffset( c0.x );
	bool writeNan: packoffset( c0.y );
}

// Thread group index is 16 bits per coordinate:
// https://learn.microsoft.com/en-us/windows/win32/api/d3d11/nf-d3d11-id3d11devicecontext-dispatch
// We want this shader to support buffers up to 2 GB.
#ifndef THREADS
static const uint THREADS = 512;
#endif
#ifndef ITERATIONS
static const uint ITERATIONS = 128;
#endif

static const uint itemsPerGroup = THREADS * ITERATIONS;

[numthreads( THREADS, 1, 1 )]
void main( uint3 group: SV_GroupID, uint thread : SV_GroupIndex )
{
	uint rdi = group.x * itemsPerGroup;
	const uint rdiEnd = min( rdi + itemsPerGroup, elements );
	// https://www.h-schmidt.net/FloatConverter/IEEE754.html
	const float pattern = writeNan ? asfloat( 0x7FFFFFFFu ) : 0.0;
	for( rdi += thread; rdi < rdiEnd; rdi += THREADS )
		result[ rdi ] = pattern;
}

================================================
FILE: Examples/MicrophoneCS/CaptureThread.cs
================================================
using System.Runtime.ExceptionServices;
using Whisper;

namespace MicrophoneCS
{
	sealed class CaptureThread: CaptureCallbacks
	{
		public CaptureThread( CommandLineArgs args, Context context, iAudioCapture source )
		{
			callbacks = new TranscribeCallbacks( args );
			this.context = context;
			this.source = source;

			thread = new Thread( threadMain ) { Name = "Capture Thread" };
			Console.WriteLine( "Press any key to quit" );
			thread.Start();
		}

		static void readKeyCallback( object? state )
		{
			CaptureThread ct = ( state as CaptureThread ) ?? throw new ApplicationException();
			Console.ReadKey();
			ct.shouldQuit = true;
		}

		public void join()
		{
			ThreadPool.QueueUserWorkItem( readKeyCallback, this );
			thread.Join();
			edi?.Throw();
		}

		volatile bool shouldQuit = false;

		protected override bool shouldCancel( Context sender ) =>
			shouldQuit;

		protected override void captureStatusChanged( Context sender, eCaptureStatus status )
		{
			Console.WriteLine( $"CaptureStatusChanged: {status}" );
		}

		readonly TranscribeCallbacks callbacks;
		readonly Thread thread;
		readonly Context context;
		readonly iAudioCapture source;
		ExceptionDispatchInfo? edi = null;

		void threadMain()
		{
			try
			{
				context.runCapture( source, callbacks, this );
			}
			catch( Exception ex )
			{
				edi = ExceptionDispatchInfo.Capture( ex );
			}
		}
	}
}

================================================
FILE: Examples/MicrophoneCS/CommandLineArgs.cs
================================================
using System.Globalization;
using System.Reflection;
using Whisper;

namespace MicrophoneCS
{
	sealed record class CommandLineArgs
	{
		public int n_threads = Environment.ProcessorCount;
		public int offset_t_ms = 0;
		public int offset_n = 0;
		public int duration_ms = 0;
		public int max_context = -1;
		public int max_len = 0;

		public float word_thold = 0.01f;

		public bool speed_up = false;
		public bool translate = false;
		public bool diarize = false;
		public bool output_txt = false;
		public bool print_special = false;
		public bool print_progress = false;
		public bool print_colors = true;
		public bool no_timestamps = false;
		public int[]? prompt = null;
		public int captureDeviceIndex = 0;

		public eLanguage language = eLanguage.English;
		public string model = string.Empty;

		const bool output_wts = false;
		public bool listDevices = false;

		public void apply( ref Parameters p )
		{
			p.setFlag( eFullParamsFlags.PrintRealtime, false );
			p.setFlag( eFullParamsFlags.PrintProgress, print_progress );
			p.setFlag( eFullParamsFlags.PrintTimestamps, !no_timestamps );
			p.setFlag( eFullParamsFlags.PrintSpecial, print_special );
			p.setFlag( eFullParamsFlags.Translate, translate );
			p.language = language;
			p.cpuThreads = n_threads;
			if( max_context >= 0 )
				p.n_max_text_ctx = max_context;
			p.offset_ms = offset_t_ms;
			p.duration_ms = duration_ms;
			p.setFlag( eFullParamsFlags.TokenTimestamps, output_wts || max_len > 0 );
			p.thold_pt = word_thold;
			p.max_len = output_wts && max_len == 0 ? 60 : max_len;
			p.setFlag( eFullParamsFlags.SpeedupAudio, speed_up );
		}

		public eResultFlags resultFlags()
		{
			eResultFlags flags = eResultFlags.None;
			bool wts = output_wts || max_len > 0;
			if( !no_timestamps || wts )
				flags |= eResultFlags.Timestamps;
			if( wts || print_colors )
				flags |= eResultFlags.Tokens;
			return flags;
		}

		static eLanguage parseLanguage( string lang ) =>
			Library.languageFromCode( lang ) ?? throw new ArgumentException( $"Unknown language code \"{lang}\"" );

		public CommandLineArgs( string[] argv )
		{
			for( int i = 0; i < argv.Length; i++ )
			{
				string arg = argv[ i ];
				if( arg == "-h" || arg == "--help" )
				{
					printUsage();
					throw new OperationCanceledException();
				}
				else if( arg == "-c" || arg == "--capture" ) captureDeviceIndex = int.Parse( argv[ ++i ] );
				else if( arg == "-ld" || arg == "--list-devices" ) listDevices = true;
				else if( arg == "-t" || arg == "--threads" ) n_threads = int.Parse( argv[ ++i ] );
				else if( arg == "-ot" || arg == "--offset-t" ) offset_t_ms = int.Parse( argv[ ++i ] );
				else if( arg == "-on" || arg == "--offset-n" ) offset_n = int.Parse( argv[ ++i ] );
				else if( arg == "-d" || arg == "--duration" ) duration_ms = int.Parse( argv[ ++i ] );
				else if( arg == "-mc" || arg == "--max-context" ) max_context = int.Parse( argv[ ++i ] );
				else if( arg == "-ml" || arg == "--max-len" ) max_len = int.Parse( argv[ ++i ] );
				else if( arg == "-wt" || arg == "--word-thold" ) word_thold = float.Parse( argv[ ++i ], CultureInfo.InvariantCulture );
				else if( arg == "-su" || arg == "--speed-up" ) speed_up = true;
				else if( arg == "-tr" || arg == "--translate" ) translate = true;
				else if( arg == "-di" || arg == "--diarize" ) diarize = true;
				else if( arg == "-otxt" || arg == "--output-txt" ) output_txt = true;
				else if( arg == "-ps" || arg == "--print-special" ) print_special = true;
				else if( arg == "-nc" || arg == "--no-colors" ) print_colors = false;
				else if( arg == "-pp" || arg == "--print-progress" ) print_progress = true;
				else if( arg == "-nt" || arg == "--no-timestamps" ) no_timestamps = true;
				else if( arg == "-l" || arg == "--language" ) language = parseLanguage( argv[ ++i ] );
				else if( arg == "--prompt" ) prompt = parsePrompt( argv[ ++i ] );
				else if( arg == "-m" || arg == "--model" ) model = argv[ ++i ];
				else
					throw new ArgumentException( $"Unknown argument: \"{arg}\"" );
			}
			if( listDevices )
				return;
			if( string.IsNullOrWhiteSpace( model ) )
				throw new ArgumentException( "The model file is not provided in the arguments" );
			if( !File.Exists( model ) )
				throw new FileNotFoundException( "Model not found", model );
		}

		static string cstr( bool b ) => b.ToString();

		static int[]? parsePrompt( string str )
		{
			if( string.IsNullOrWhiteSpace( str ) )
				return null;
			// TODO: expose whisper_tokenize function, as a method of iModel COM interface
			throw new NotImplementedException();
		}

		void printUsage()
		{
			Console.WriteLine();

			Console.WriteLine( "usage: {0} [options] file0.mp3 file1.wma ...", Path.GetFileName( Assembly.GetExecutingAssembly().Location ) );
			Console.WriteLine();
			Console.WriteLine( "options:" );
			Console.WriteLine( "  -h,       --help          [default] show this help message and exit" );
			Console.WriteLine( "  -t N,     --threads N     [{0,-7:D}] number of threads to use during computation", n_threads );
			Console.WriteLine( "  -ot N,    --offset-t N    [{0,-7:D}] time offset in milliseconds", offset_t_ms );
			Console.WriteLine( "  -on N,    --offset-n N    [{0,-7:D}] segment index offset", offset_n );
			Console.WriteLine( "  -d  N,    --duration N    [{0,-7:D}] duration of audio to process in milliseconds", duration_ms );
			Console.WriteLine( "  -mc N,    --max-context N [{0,-7:D}] maximum number of text context tokens to store", max_context );
			Console.WriteLine( "  -ml N,    --max-len N     [{0,-7:D}] maximum segment length in characters", max_len );
			Console.WriteLine( "  -wt N,    --word-thold N  [{0,-7:F2}] word timestamp probability threshold", word_thold );
			Console.WriteLine( "  -su,      --speed-up      [{0,-7}] speed up audio by x2 (reduced accuracy)", cstr( speed_up ) );
			Console.WriteLine( "  -tr,      --translate     [{0,-7}] translate from source language to english", cstr( translate ) );
			Console.WriteLine( "  -di,      --diarize       [{0,-7}] stereo audio diarization", cstr( diarize ) );
			Console.WriteLine( "  -otxt,    --output-txt    [{0,-7}] output result in a text file", cstr( output_txt ) );
			Console.WriteLine( "  -ps,      --print-special [{0,-7}] print special tokens", cstr( print_special ) );
			Console.WriteLine( "  -nc,      --no-colors     [{0,-7}] do not print colors", cstr( !print_colors ) );
			Console.WriteLine( "  -nt,      --no-timestamps [{0,-7}] do not print timestamps", cstr( no_timestamps ) );
			Console.WriteLine( "  -l LANG,  --language LANG [{0,-7}] spoken language", language.getCode() );
			Console.WriteLine( "            --prompt PROMPT [       ] initial prompt" );
			Console.WriteLine( "  -m FNAME, --model FNAME   [{0,-7}] model path", model );
			Console.WriteLine( "  -f FNAME, --file FNAME    [{0,-7}] path of the input audio file", "" );
		}
	}
}

================================================
FILE: Examples/MicrophoneCS/MicrophoneCS.cs
================================================
using Whisper;

namespace MicrophoneCS
{
	static class Program
	{
		static int Main( string[] args )
		{
			try
			{
				CommandLineArgs cla;
				try
				{
					cla = new CommandLineArgs( args );
				}
				catch( OperationCanceledException )
				{
					return 1;
				}
				const eLoggerFlags loggerFlags = eLoggerFlags.UseStandardError | eLoggerFlags.SkipFormatMessage;
				Library.setLogSink( eLogLevel.Debug, loggerFlags );

				using iMediaFoundation mf = Library.initMediaFoundation();
				CaptureDeviceId[] devices = mf.listCaptureDevices() ??
					throw new ApplicationException( "This computer has no audio capture devices" );

				if( cla.listDevices )
				{
					for( int i = 0; i < devices.Length; i++ )
						Console.WriteLine( "#{0}: {1}", i, devices[ i ].displayName );
					return 0;
				}
				if( cla.captureDeviceIndex < 0 || cla.captureDeviceIndex >= devices.Length )
					throw new ApplicationException( $"Capture device index is out of range; the valid range is [ 0 .. {devices.Length - 1} ]" );

				sCaptureParams cp = new sCaptureParams( true );
				if( cla.diarize )
					cp.flags |= eCaptureFlags.Stereo;
				using iAudioCapture captureDev = mf.openCaptureDevice( devices[ cla.captureDeviceIndex ], cp );

				using iModel model = Library.loadModel( cla.model );
				using Context context = model.createContext();
				cla.apply( ref context.parameters );

				CaptureThread thread = new CaptureThread( cla, context, captureDev );
				thread.join();

				context.timingsPrint();
				return 0;
			}
			catch( Exception ex )
			{
				// Console.WriteLine( ex.Message );
				Console.WriteLine( ex.ToString() );
				return ex.HResult;
			}
		}
	}
}

================================================
FILE: Examples/MicrophoneCS/MicrophoneCS.csproj
================================================
<Project Sdk="Microsoft.NET.Sdk">

	<PropertyGroup>
		<OutputType>Exe</OutputType>
		<TargetFramework>net6.0-windows</TargetFramework>
		<ImplicitUsings>enable</ImplicitUsings>
		<Nullable>enable</Nullable>
		<CheckForOverflowUnderflow>true</CheckForOverflowUnderflow>
		<AppendTargetFrameworkToOutputPath>false</AppendTargetFrameworkToOutputPath>
		<Platforms>x64</Platforms>
	</PropertyGroup>

	<ItemGroup>
	  <Compile Include="..\TranscribeCS\AnsiCodes.cs" Link="AnsiCodes.cs" />
	</ItemGroup>

	<ItemGroup>
		<Content Include="..\..\x64\$(Configuration)\Whisper.dll" Link="Whisper.dll">
			<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
		</Content>
	</ItemGroup>

	<ItemGroup>
		<ProjectReference Include="..\..\WhisperNet\WhisperNet.csproj" />
	</ItemGroup>

</Project>

================================================
FILE: Examples/MicrophoneCS/Readme.txt
================================================
This example builds .NET 6 console application which shows how to use audio capture API of the .NET wrapper.

================================================
FILE: Examples/MicrophoneCS/TranscribeCallbacks.cs
================================================
using System.Globalization;
using Whisper;

namespace MicrophoneCS
{
	/// <summary>Implementation of Callbacks abstract class, to print these segments as soon as they’re produced by the library.</summary>
	sealed class TranscribeCallbacks: Callbacks
	{
		readonly CommandLineArgs args;
		readonly eResultFlags resultFlags;

		public TranscribeCallbacks( CommandLineArgs args )
		{
			this.args = args;
			resultFlags = args.resultFlags();
			Console.OutputEncoding = System.Text.Encoding.UTF8;
		}

		// Terminal color map. 10 colors grouped in ranges [0.0, 0.1, ..., 0.9]
		// Lowest is red, middle is yellow, highest is green.
		readonly string[] k_colors = new string[]
		{
			"\x1B[38;5;196m", "\x1B[38;5;202m", "\x1B[38;5;208m", "\x1B[38;5;214m", "\x1B[38;5;220m",
			"\x1B[38;5;226m", "\x1B[38;5;190m", "\x1B[38;5;154m", "\x1B[38;5;118m", "\x1B[38;5;82m"
		};

		int colorIndex( in sToken tok )
		{
			float p = tok.probability;
			float p3 = p * p * p;
			int col = (int)( p3 * k_colors.Length );
			col = Math.Clamp( col, 0, k_colors.Length - 1 );
			return col;
		}

		public static string printTime( TimeSpan ts ) =>
			ts.ToString( "hh':'mm':'ss'.'fff", CultureInfo.InvariantCulture );
		public static string printTimeWithComma( TimeSpan ts ) =>
			ts.ToString( "hh':'mm':'ss','fff", CultureInfo.InvariantCulture );

		protected override void onNewSegment( Context sender, int countNew )
		{
			TranscribeResult res = sender.results( resultFlags );
			ReadOnlySpan<sToken> tokens = res.tokens;

			int s0 = res.segments.Length - countNew;
			if( s0 == 0 )
				Console.WriteLine();

			for( int i = s0; i < res.segments.Length; i++ )
			{
				sSegment seg = res.segments[ i ];

				if( args.no_timestamps )
				{
					if( args.print_colors && AnsiCodes.enabled )
					{
						foreach( sToken tok in res.getTokens( seg ) )
						{
							if( !args.print_special && tok.hasFlag( eTokenFlags.Special ) )
								continue;
							Console.Write( "{0}{1}{2}", k_colors[ colorIndex( tok ) ], tok.text, "\x1B[0m" );
						}
					}
					else
						Console.Write( seg.text );
					Console.Out.Flush();
					continue;
				}

				string speaker = "";
				if( args.diarize )
				{
					speaker = sender.detectSpeaker( seg.time ) switch
					{
						eSpeakerChannel.Unsure => "(speaker ?)",
						eSpeakerChannel.Left => "(speaker 0)",
						eSpeakerChannel.Right => "(speaker 1)",
						_ => ""
					};
				}

				if( args.print_colors && AnsiCodes.enabled )
				{
					Console.Write( "[{0} --> {1}] {2} ", printTime( seg.time.begin ), printTime( seg.time.end ), speaker );
					foreach( sToken tok in res.getTokens( seg ) )
					{
						if( !args.print_special && tok.hasFlag( eTokenFlags.Special ) )
							continue;
						Console.Write( "{0}{1}{2}", k_colors[ colorIndex( tok ) ], tok.text, "\x1B[0m" );
					}
					Console.WriteLine();
				}
				else
					Console.WriteLine( "[{0} --> {1}] {2} {3}", printTime( seg.time.begin ), printTime( seg.time.end ), speaker, seg.text );
			}
		}
	}
}

================================================
FILE: Examples/OldMain/OldMain.vcxproj
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup Label="ProjectConfigurations">
    <ProjectConfiguration Include="Debug|x64">
      <Configuration>Debug</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
    <ProjectConfiguration Include="Release|x64">
      <Configuration>Release</Configuration>
      <Platform>x64</Platform>
    </ProjectConfiguration>
  </ItemGroup>
  <PropertyGroup Label="Globals">
    <VCProjectVersion>16.0</VCProjectVersion>
    <Keyword>Win32Proj</Keyword>
    <ProjectGuid>{596f9770-9aeb-49d3-86ca-4200197df12b}</ProjectGuid>
    <RootNamespace>OldMain</RootNamespace>
    <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>true</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
    <ConfigurationType>Application</ConfigurationType>
    <UseDebugLibraries>false</UseDebugLibraries>
    <PlatformToolset>v143</PlatformToolset>
    <WholeProgramOptimization>true</WholeProgramOptimization>
    <CharacterSet>Unicode</CharacterSet>
  </PropertyGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  <ImportGroup Label="ExtensionSettings">
  </ImportGroup>
  <ImportGroup Label="Shared">
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  </ImportGroup>
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <IncludePath>$(ProjectDir);$(SolutionDir)Whisper\Source\;$(IncludePath)</IncludePath>
  </PropertyGroup>
  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <IncludePath>$(ProjectDir);$(SolutionDir)Whisper\Source\;$(IncludePath)</IncludePath>
  </PropertyGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
      <LanguageStandard>stdcpp20</LanguageStandard>
    </ClCompile>
    <Link>
      <SubSystem>Console</SubSystem>
      <GenerateDebugInformation>true</GenerateDebugInformation>
    </Link>
  </ItemDefinitionGroup>
  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
    <ClCompile>
      <WarningLevel>Level3</WarningLevel>
      <FunctionLevelLinking>true</FunctionLevelLinking>
      <IntrinsicFunctions>true</IntrinsicFunctions>
      <SDLCheck>true</SDLCheck>
      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
      <ConformanceMode>true</ConformanceMode>
      <EnableEnhancedInstructionSet>AdvancedVectorExtensions2</EnableEnhancedInstructionSet>
      <LanguageStandard>stdcpp20</LanguageStandard>
    </ClCompile>
    <Link>
      <SubSystem>Console</SubSystem>
      <EnableCOMDATFolding>true</EnableCOMDATFolding>
      <OptimizeReferences>true</OptimizeReferences>
      <GenerateDebugInformation>true</GenerateDebugInformation>
      <LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
    <ClCompile Include="..\..\Whisper\source.compat\ggmlMsvc.c" />
    <ClCompile Include="..\..\Whisper\source\ggml.c">
      <ExcludedFromBuild>true</ExcludedFromBuild>
    </ClCompile>
    <ClCompile Include="..\..\Whisper\source\whisper.cpp" />
    <ClCompile Include="main.cpp" />
    <ClCompile Include="Utils\Logger.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\..\Whisper\source\ggml.h" />
    <ClInclude Include="..\..\Whisper\source\whisper.h" />
    <ClInclude Include="dr_wav.h" />
    <ClInclude Include="Utils\Logger.h" />
  </ItemGroup>
  <ItemGroup>
    <Text Include="Readme.txt" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">
  </ImportGroup>
</Project>

================================================
FILE: Examples/OldMain/OldMain.vcxproj.filters
================================================
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  <ItemGroup>
    <ClCompile Include="..\..\Whisper\source\whisper.cpp" />
    <ClCompile Include="main.cpp" />
    <ClCompile Include="..\..\Whisper\source\ggml.c" />
    <ClCompile Include="..\..\Whisper\source.compat\ggmlMsvc.c" />
    <ClCompile Include="Utils\Logger.cpp" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\..\Whisper\source\ggml.h" />
    <ClInclude Include="..\..\Whisper\source\whisper.h" />
    <ClInclude Include="dr_wav.h" />
    <ClInclude Include="Utils\Logger.h" />
  </ItemGroup>
  <ItemGroup>
    <Text Include="Readme.txt" />
  </ItemGroup>
</Project>

================================================
FILE: Examples/OldMain/Readme.txt
================================================
This project builds the original whisper.cpp command-line sample

================================================
FILE: Examples/OldMain/Utils/Logger.cpp
================================================
#include <stdint.h>
#include <vector>
#include <cstdarg>
#include "Logger.h"

namespace
{
	void logMessage( const char* lvl, const char8_t* pszFormat, std::va_list va )
	{
		fprintf( stderr, "%s: ", lvl );
		vfprintf( stderr, (const char*)pszFormat, va );
		fprintf( stderr, "\n" );
	}
}

#define LOG_MESSAGE_IMPL( lvl )                \
	std::va_list args;                         \
	va_start( args, pszFormat );               \
	logMessage( lvl, pszFormat, args );        \
	va_end( args );

void logError( const char8_t* pszFormat, ... )
{
	LOG_MESSAGE_IMPL( "Error" );
}

void logWarning( const char8_t* pszFormat, ... )
{
	LOG_MESSAGE_IMPL( "Warning" );
}

void logInfo( const char8_t* pszFormat, ... )
{
	LOG_MESSAGE_IMPL( "Info" );
}

void logDebug( const char8_t* pszFormat, ... )
{
	LOG_MESSAGE_IMPL( "Debug" );
}

================================================
FILE: Examples/OldMain/Utils/Logger.h
================================================
#pragma once

#ifdef  __cplusplus
extern "C" {
#endif

struct ggml_tensor;

void logError( const char8_t* pszFormat, ... );
void logWarning( const char8_t* pszFormat, ... );
void logInfo( const char8_t* pszFormat, ... );
void logDebug( const char8_t* pszFormat, ... );

#ifdef  __cplusplus
}

namespace Tracing
{
	struct ItemName
	{
		ItemName( const char* str ) { }
		ItemName( const char* str, uint32_t a0 ) { }
		ItemName( const char* str, int a0 ) { }
	};

	inline void tensor( const ItemName& name, const ggml_tensor* tensor ) { }
	inline void delayTensor( const ItemName& name, const ggml_tensor* tensor ) { }
	inline void vector( const ItemName& name, const std::vector<float>& vec ) { }
	inline void writeDelayedTensors() { }
}
#endif

================================================
FILE: Examples/OldMain/dr_wav.h
================================================
/*
WAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file.
dr_wav - v0.12.16 - 2020-12-02

David Reid - mackron@gmail.com

GitHub: https://github.com/mackron/dr_libs
*/

/*
RELEASE NOTES - VERSION 0.12
============================
Version 0.12 includes breaking changes to custom chunk handling.


Changes to Chunk Callback
-------------------------
dr_wav supports the ability to fire a callback when a chunk is encounted (except for WAVE and FMT chunks). The callback has been updated to include both the
container (RIFF or Wave64) and the FMT chunk which contains information about the format of the data in the wave file.

Previously, there was no direct way to determine the container, and therefore no way to discriminate against the different IDs in the chunk header (RIFF and
Wave64 containers encode chunk ID's differently). The `container` parameter can be used to know which ID to use.

Sometimes it can be useful to know the data format at the time the chunk callback is fired. A pointer to a `drwav_fmt` object is now passed into the chunk
callback which will give you information about the data format. To determine the sample format, use `drwav_fmt_get_format()`. This will return one of the
`DR_WAVE_FORMAT_*` tokens.
*/

/*
Introduction
============
This is a single file library. To use it, do something like the following in one .c file.
    
    ```c
    #define DR_WAV_IMPLEMENTATION
    #include "dr_wav.h"
    ```

You can then #include this file in other parts of the program as you would with any other header file. Do something like the following to read audio data:

    ```c
    drwav wav;
    if (!drwav_init_file(&wav, "my_song.wav", NULL)) {
        // Error opening WAV file.
    }

    drwav_int32* pDecodedInterleavedPCMFrames = malloc(wav.totalPCMFrameCount * wav.channels * sizeof(drwav_int32));
    size_t numberOfSamplesActuallyDecoded = drwav_read_pcm_frames_s32(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);

    ...

    drwav_uninit(&wav);
    ```

If you just want to quickly open and read the audio data in a single operation you can do something like this:

    ```c
    unsigned int channels;
    unsigned int sampleRate;
    drwav_uint64 totalPCMFrameCount;
    float* pSampleData = drwav_open_file_and_read_pcm_frames_f32("my_song.wav", &channels, &sampleRate, &totalPCMFrameCount, NULL);
    if (pSampleData == NULL) {
        // Error opening and reading WAV file.
    }

    ...

    drwav_free(pSampleData);
    ```

The examples above use versions of the API that convert the audio data to a consistent format (32-bit signed PCM, in this case), but you can still output the
audio data in its internal format (see notes below for supported formats):

    ```c
    size_t framesRead = drwav_read_pcm_frames(&wav, wav.totalPCMFrameCount, pDecodedInterleavedPCMFrames);
    ```

You can also read the raw bytes of audio data, which could be useful if dr_wav does not have native support for a particular data format:

    ```c
    size_t bytesRead = drwav_read_raw(&wav, bytesToRead, pRawDataBuffer);
    ```

dr_wav can also be used to output WAV files. This does not currently support compressed formats. To use this, look at `drwav_init_write()`,
`drwav_init_file_write()`, etc. Use `drwav_write_pcm_frames()` to write samples, or `drwav_write_raw()` to write raw data in the "data" chunk.

    ```c
    drwav_data_format format;
    format.container = drwav_container_riff;     // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64.
    format.format = DR_WAVE_FORMAT_PCM;          // <-- Any of the DR_WAVE_FORMAT_* codes.
    format.channels = 2;
    format.sampleRate = 44100;
    format.bitsPerSample = 16;
    drwav_init_file_write(&wav, "data/recording.wav", &format, NULL);

    ...

    drwav_uint64 framesWritten = drwav_write_pcm_frames(pWav, frameCount, pSamples);
    ```

dr_wav has seamless support the Sony Wave64 format. The decoder will automatically detect it and it should Just Work without any manual intervention.


Build Options
=============
#define these options before including this file.

#define DR_WAV_NO_CONVERSION_API
  Disables conversion APIs such as `drwav_read_pcm_frames_f32()` and `drwav_s16_to_f32()`.

#define DR_WAV_NO_STDIO
  Disables APIs that initialize a decoder from a file such as `drwav_init_file()`, `drwav_init_file_write()`, etc.



Notes
=====
- Samples are always interleaved.
- The default read function does not do any data conversion. Use `drwav_read_pcm_frames_f32()`, `drwav_read_pcm_frames_s32()` and `drwav_read_pcm_frames_s16()`
  to read and convert audio data to 32-bit floating point, signed 32-bit integer and signed 16-bit integer samples respectively. Tested and supported internal
  formats include the following:
  - Unsigned 8-bit PCM
  - Signed 12-bit PCM
  - Signed 16-bit PCM
  - Signed 24-bit PCM
  - Signed 32-bit PCM
  - IEEE 32-bit floating point
  - IEEE 64-bit floating point
  - A-law and u-law
  - Microsoft ADPCM
  - IMA ADPCM (DVI, format code 0x11)
- dr_wav will try to read the WAV file as best it can, even if it's not strictly conformant to the WAV format.
*/

#ifndef dr_wav_h
#define dr_wav_h

#ifdef __cplusplus
extern "C" {
#endif

#define DRWAV_STRINGIFY(x)      #x
#define DRWAV_XSTRINGIFY(x)     DRWAV_STRINGIFY(x)

#define DRWAV_VERSION_MAJOR     0
#define DRWAV_VERSION_MINOR     12
#define DRWAV_VERSION_REVISION  16
#define DRWAV_VERSION_STRING    DRWAV_XSTRINGIFY(DRWAV_VERSION_MAJOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_MINOR) "." DRWAV_XSTRINGIFY(DRWAV_VERSION_REVISION)

#include <stddef.h> /* For size_t. */

/* Sized types. */
typedef   signed char           drwav_int8;
typedef unsigned char           drwav_uint8;
typedef   signed short          drwav_int16;
typedef unsigned short          drwav_uint16;
typedef   signed int            drwav_int32;
typedef unsigned int            drwav_uint32;
#if defined(_MSC_VER)
    typedef   signed __int64    drwav_int64;
    typedef unsigned __int64    drwav_uint64;
#else
    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
        #pragma GCC diagnostic push
        #pragma GCC diagnostic ignored "-Wlong-long"
        #if defined(__clang__)
            #pragma GCC diagnostic ignored "-Wc++11-long-long"
        #endif
    #endif
    typedef   signed long long  drwav_int64;
    typedef unsigned long long  drwav_uint64;
    #if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)))
        #pragma GCC diagnostic pop
    #endif
#endif
#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__)) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
    typedef drwav_uint64        drwav_uintptr;
#else
    typedef drwav_
Download .txt
gitextract_nzqawnur/

├── .gitignore
├── ComLightLib/
│   ├── ComLightLib.vcxproj
│   ├── ComLightLib.vcxproj.filters
│   ├── Exception.hpp
│   ├── Readme.txt
│   ├── client/
│   │   └── CComPtr.hpp
│   ├── comLightClient.h
│   ├── comLightCommon.h
│   ├── comLightServer.h
│   ├── hresult.h
│   ├── pal/
│   │   ├── guiddef.h
│   │   └── hresult.h
│   ├── server/
│   │   ├── Object.hpp
│   │   ├── ObjectRoot.hpp
│   │   ├── RefCounter.hpp
│   │   ├── freeThreadedMarshaller.cpp
│   │   ├── freeThreadedMarshaller.h
│   │   └── interfaceMap.h
│   ├── streams.h
│   ├── unknwn.h
│   └── utils/
│       ├── guid_parse.hpp
│       └── typeTraits.hpp
├── ComputeShaders/
│   ├── ComputeShaders.cpp
│   ├── ComputeShaders.vcxproj
│   ├── ComputeShaders.vcxproj.filters
│   ├── Readme.txt
│   ├── add.hlsl
│   ├── addInPlace.hlsl
│   ├── addRepeat.hlsl
│   ├── addRepeat64.hlsl
│   ├── addRepeatEx.hlsl
│   ├── addRepeatGelu.hlsl
│   ├── addRepeatGelu64.hlsl
│   ├── addRepeatScale.hlsl
│   ├── addRows.hlsl
│   ├── componentwiseBinaryOp.hlsli
│   ├── convolutionMain.hlsl
│   ├── convolutionMain2.hlsl
│   ├── convolutionMain2Fixed.hlsl
│   ├── convolutionPrep1.hlsl
│   ├── convolutionPrep2.hlsl
│   ├── copyConvert.hlsl
│   ├── copyTranspose.hlsl
│   ├── dbgFindNaN.hlsl
│   ├── diagMaskInf.hlsl
│   ├── flashAttention.hlsl
│   ├── flashAttentionCommon.hlsli
│   ├── flashAttentionCompat1.hlsl
│   ├── flashAttentionCompat2.hlsl
│   ├── flashAttentionCompat3.hlsl
│   ├── fmaRepeat1.hlsl
│   ├── fmaRepeat164.hlsl
│   ├── fmaRepeat2.hlsl
│   ├── fp64Utils.hlsli
│   ├── groupReduce.hlsli
│   ├── groupReduce64.hlsli
│   ├── matReshapePanels.hlsl
│   ├── miscUtils.hlsli
│   ├── mulMatByRow.hlsl
│   ├── mulMatByRow64.hlsl
│   ├── mulMatByRowTiled.hlsl
│   ├── mulMatByRowTiledEx.hlsl
│   ├── mulMatByScalar.hlsl
│   ├── mulMatDotMain.hlsl
│   ├── mulMatDotReshape.hlsl
│   ├── mulMatMadMain.hlsl
│   ├── mulMatTiled.hlsl
│   ├── mulMatTiledEx.hlsl
│   ├── norm.hlsl
│   ├── normCompat.hlsl
│   ├── normFixed.hlsl
│   ├── normFixed64.hlsl
│   ├── repeatUtils.hlsli
│   ├── scaleInPlace.hlsl
│   ├── softMax.hlsl
│   ├── softMax64.hlsl
│   ├── softMaxCompat.hlsl
│   ├── softMaxFixed.hlsl
│   ├── softMaxLong.hlsl
│   └── zeroMemory.hlsl
├── Examples/
│   ├── MicrophoneCS/
│   │   ├── CaptureThread.cs
│   │   ├── CommandLineArgs.cs
│   │   ├── MicrophoneCS.cs
│   │   ├── MicrophoneCS.csproj
│   │   ├── Readme.txt
│   │   └── TranscribeCallbacks.cs
│   ├── OldMain/
│   │   ├── OldMain.vcxproj
│   │   ├── OldMain.vcxproj.filters
│   │   ├── Readme.txt
│   │   ├── Utils/
│   │   │   ├── Logger.cpp
│   │   │   └── Logger.h
│   │   ├── dr_wav.h
│   │   └── main.cpp
│   ├── TranscribeCS/
│   │   ├── AnsiCodes.cs
│   │   ├── CommandLineArgs.cs
│   │   ├── Readme.txt
│   │   ├── Transcribe.cs
│   │   ├── TranscribeCS.cs
│   │   └── TranscribeCS.csproj
│   ├── WhisperDesktop/
│   │   ├── AppState.cpp
│   │   ├── AppState.h
│   │   ├── CaptureDlg.cpp
│   │   ├── CaptureDlg.h
│   │   ├── CircleIndicator.cpp
│   │   ├── CircleIndicator.h
│   │   ├── LoadModelDlg.cpp
│   │   ├── LoadModelDlg.h
│   │   ├── ModelAdvancedDlg.cpp
│   │   ├── ModelAdvancedDlg.h
│   │   ├── Readme.txt
│   │   ├── Resource.h
│   │   ├── TranscribeDlg.cpp
│   │   ├── TranscribeDlg.h
│   │   ├── Utils/
│   │   │   ├── DebugConsole.cpp
│   │   │   ├── DebugConsole.h
│   │   │   ├── LanguageDropdown.cpp
│   │   │   ├── LanguageDropdown.h
│   │   │   ├── PendingState.cpp
│   │   │   ├── PendingState.h
│   │   │   ├── TranslateCheckbox.cpp
│   │   │   ├── TranslateCheckbox.h
│   │   │   ├── WTL/
│   │   │   │   ├── MS-PL.txt
│   │   │   │   ├── ReadMe.html
│   │   │   │   ├── atlapp.h
│   │   │   │   ├── atlcrack.h
│   │   │   │   ├── atlctrls.h
│   │   │   │   ├── atlddx.h
│   │   │   │   ├── atlgdi.h
│   │   │   │   ├── atlres.h
│   │   │   │   ├── atluser.h
│   │   │   │   └── atlwinx.h
│   │   │   ├── logger.cpp
│   │   │   ├── logger.h
│   │   │   ├── miscUtils.cpp
│   │   │   └── miscUtils.h
│   │   ├── WhisperDesktop.cpp
│   │   ├── WhisperDesktop.manifest
│   │   ├── WhisperDesktop.rc
│   │   ├── WhisperDesktop.vcxproj
│   │   ├── WhisperDesktop.vcxproj.filters
│   │   ├── framework.h
│   │   ├── stdafx.cpp
│   │   ├── stdafx.h
│   │   ├── targetver.h
│   │   └── useDiscreteGpu.c
│   └── main/
│       ├── Readme.txt
│       ├── main.cpp
│       ├── main.vcxproj
│       ├── main.vcxproj.filters
│       ├── miscUtils.cpp
│       ├── miscUtils.h
│       ├── params.cpp
│       ├── params.h
│       ├── textWriter.cpp
│       └── textWriter.h
├── LICENSE
├── Readme.md
├── SampleClips/
│   ├── Readme.txt
│   ├── columbia-large-1080ti.txt
│   ├── columbia-large-1650.txt
│   ├── columbia-large-vega7.txt
│   ├── columbia-large-vega8.txt
│   ├── columbia-medium-1080ti.txt
│   ├── columbia-medium-1650.txt
│   ├── columbia-medium-vega7.txt
│   ├── columbia-medium-vega8.txt
│   ├── columbia.wma
│   ├── jfk-large-1080ti.txt
│   ├── jfk-large-1650.txt
│   ├── jfk-large-vega7.txt
│   ├── jfk-large-vega8.txt
│   ├── jfk-medium-1080ti.txt
│   ├── jfk-medium-1650.txt
│   ├── jfk-medium-vega7.txt
│   ├── jfk-medium-vega8.txt
│   └── summary.tsv
├── Tools/
│   ├── CompressShaders/
│   │   ├── Cabinet.cs
│   │   ├── CompressShaders.cs
│   │   ├── CompressShaders.csproj
│   │   ├── DetectFp64.cs
│   │   ├── LZ4.cs
│   │   ├── LanguageCodes.cs
│   │   ├── Readme.txt
│   │   └── ShaderNames.cs
│   ├── CompressTables/
│   │   ├── CompressTables.cs
│   │   └── CompressTables.csproj
│   ├── PerfSummary/
│   │   ├── LogParser.cs
│   │   ├── PerfSummary.cs
│   │   ├── PerfSummary.csproj
│   │   └── Summary.cs
│   ├── compareTraces/
│   │   ├── CommandLineArgs.cpp
│   │   ├── CommandLineArgs.h
│   │   ├── Readme.txt
│   │   ├── TraceReader.cpp
│   │   ├── TraceReader.h
│   │   ├── compare.cpp
│   │   ├── compare.h
│   │   ├── compareTraces.cpp
│   │   ├── compareTraces.vcxproj
│   │   ├── compareTraces.vcxproj.filters
│   │   ├── stdafx.cpp
│   │   ├── stdafx.h
│   │   └── testUtils.cpp
│   └── copy-binaries.cmd
├── Whisper/
│   ├── API/
│   │   ├── MfStructs.h
│   │   ├── Readme.txt
│   │   ├── SpecialTokens.h
│   │   ├── TranscribeStructs.h
│   │   ├── iContext.cl.h
│   │   ├── iContext.h
│   │   ├── iMediaFoundation.cl.h
│   │   ├── iMediaFoundation.h
│   │   ├── iTranscribeResult.cl.h
│   │   ├── iTranscribeResult.h
│   │   ├── loggerApi.h
│   │   ├── sFullParams.h
│   │   ├── sLanguageList.h
│   │   ├── sLoadModelCallbacks.h
│   │   ├── sModelSetup.h
│   │   ├── whisperComLight.h
│   │   └── whisperWindows.h
│   ├── CPU/
│   │   ├── BufferAllocator.cpp
│   │   ├── BufferAllocator.h
│   │   ├── DecoderTensors.cpp
│   │   ├── DecoderTensors.h
│   │   ├── HybridLoader.cpp
│   │   ├── HybridLoader.h
│   │   ├── KvTensors.h
│   │   ├── KvTensorsCpu.cpp
│   │   ├── LargeBuffer.cpp
│   │   ├── LargeBuffer.h
│   │   ├── MlContext.h
│   │   ├── MlContextCpu.cpp
│   │   ├── ParallelForRunner.cpp
│   │   ├── ParallelForRunner.h
│   │   ├── Readme.txt
│   │   ├── Tensor.h
│   │   ├── TensorCpu.cpp
│   │   ├── mulMat.cpp
│   │   ├── mulMat.h
│   │   ├── mulMat.kernel.hpp
│   │   ├── mulMatImpl.avx2.cpp
│   │   ├── mulMatImpl.cpp
│   │   ├── mulMatImpl.h
│   │   ├── mulMatImpl.panel.cpp
│   │   ├── mulMatUtils.hpp
│   │   ├── simdUtils.cpp
│   │   └── simdUtils.h
│   ├── D3D/
│   │   ├── Binder.cpp
│   │   ├── Binder.h
│   │   ├── MappedResource.cpp
│   │   ├── MappedResource.h
│   │   ├── RenderDoc/
│   │   │   ├── renderDoc.cpp
│   │   │   ├── renderDoc.h
│   │   │   └── renderdoc_app.h
│   │   ├── createBuffer.cpp
│   │   ├── createBuffer.h
│   │   ├── createDevice.cpp
│   │   ├── createDevice.h
│   │   ├── device.h
│   │   ├── downloadBuffer.cpp
│   │   ├── downloadBuffer.h
│   │   ├── enums.cpp
│   │   ├── enums.h
│   │   ├── listGPUs.cpp
│   │   ├── listGPUs.h
│   │   ├── sGpuInfo.h
│   │   ├── shaderNames.cpp
│   │   ├── shaderNames.h
│   │   ├── shaders.cpp
│   │   └── shaders.h
│   ├── DllMain.cpp
│   ├── Hybrid/
│   │   ├── HybridContext.cpp
│   │   ├── HybridContext.h
│   │   ├── KeyValueDownloader.cpp
│   │   ├── KeyValueDownloader.h
│   │   └── Readme.txt
│   ├── MF/
│   │   ├── AudioBuffer.cpp
│   │   ├── AudioBuffer.h
│   │   ├── AudioCapture.cpp
│   │   ├── AudioCapture.h
│   │   ├── MediaFoundation.cpp
│   │   ├── PcmReader.cpp
│   │   ├── PcmReader.h
│   │   ├── loadAudioFile.cpp
│   │   ├── loadAudioFile.h
│   │   ├── mfStartup.cpp
│   │   ├── mfStartup.h
│   │   ├── mfUtils.cpp
│   │   └── mfUtils.h
│   ├── ML/
│   │   ├── ConstantBuffer.cpp
│   │   ├── ConstantBuffer.h
│   │   ├── Context.ops.cpp
│   │   ├── DbgNanTest.cpp
│   │   ├── DbgNanTest.h
│   │   ├── Device.cpp
│   │   ├── Device.h
│   │   ├── LookupTables.cpp
│   │   ├── LookupTables.h
│   │   ├── LookupTablesData.cpp
│   │   ├── LookupTablesData.h
│   │   ├── LookupTablesData.inl
│   │   ├── MlContext.cpp
│   │   ├── MlContext.dbg.cpp
│   │   ├── MlContext.h
│   │   ├── Reshaper.cpp
│   │   ├── Reshaper.h
│   │   ├── TempBuffers.cpp
│   │   ├── TempBuffers.h
│   │   ├── Tensor.cpp
│   │   ├── Tensor.h
│   │   ├── TensorEx.cpp
│   │   ├── TensorEx.h
│   │   ├── TensorGpuViews.cpp
│   │   ├── TensorGpuViews.h
│   │   ├── TensorShape.cpp
│   │   ├── TensorShape.h
│   │   ├── TensorsArena.cpp
│   │   ├── TensorsArena.h
│   │   ├── mlUtils.cpp
│   │   ├── mlUtils.h
│   │   ├── reshapedMultiply.h
│   │   ├── tensorOpsTests.cpp
│   │   ├── tensorOpsTests.h
│   │   ├── testUtils.cpp
│   │   ├── testUtils.h
│   │   └── testUtilsC.h
│   ├── Readme.txt
│   ├── Resource.rc
│   ├── Utils/
│   │   ├── CpuProfiler.cpp
│   │   ├── CpuProfiler.h
│   │   ├── DelayExecution.cpp
│   │   ├── DelayExecution.h
│   │   ├── GpuProfiler.cpp
│   │   ├── GpuProfiler.h
│   │   ├── GpuProfilerSimple.h
│   │   ├── LZ4/
│   │   │   ├── LICENSE
│   │   │   ├── lz4.c
│   │   │   └── lz4.h
│   │   ├── Logger.cpp
│   │   ├── Logger.h
│   │   ├── MurmurHash3.cpp
│   │   ├── MurmurHash3.h
│   │   ├── ProfileCollection.cpp
│   │   ├── ProfileCollection.h
│   │   ├── ReadStream.h
│   │   ├── Trace/
│   │   │   ├── TraceStructures.cpp
│   │   │   ├── TraceStructures.h
│   │   │   ├── TraceWriter.cpp
│   │   │   ├── TraceWriter.h
│   │   │   ├── tracing.cpp
│   │   │   └── tracing.h
│   │   ├── miscUtils.cpp
│   │   ├── miscUtils.h
│   │   ├── parallelFor.cpp
│   │   └── parallelFor.h
│   ├── Whisper/
│   │   ├── ContextImpl.capture.cpp
│   │   ├── ContextImpl.cpp
│   │   ├── ContextImpl.diarize.cpp
│   │   ├── ContextImpl.h
│   │   ├── ContextImpl.misc.cpp
│   │   ├── DecoderInputBuffers.cpp
│   │   ├── DecoderInputBuffers.h
│   │   ├── DecoderResultBuffer.cpp
│   │   ├── DecoderResultBuffer.h
│   │   ├── KeyValueBuffers.cpp
│   │   ├── KeyValueBuffers.h
│   │   ├── Languages.cpp
│   │   ├── Languages.h
│   │   ├── MelInputTensor.cpp
│   │   ├── MelInputTensor.h
│   │   ├── MelStreamer.cpp
│   │   ├── MelStreamer.h
│   │   ├── ModelBuffers.clone.cpp
│   │   ├── ModelBuffers.cpp
│   │   ├── ModelBuffers.h
│   │   ├── ModelImpl.cpp
│   │   ├── ModelImpl.h
│   │   ├── ModelLoader.h
│   │   ├── Spectrogram.cpp
│   │   ├── Spectrogram.h
│   │   ├── TranscribeResult.h
│   │   ├── Vocabulary.cpp
│   │   ├── Vocabulary.h
│   │   ├── WhisperContext.cpp
│   │   ├── WhisperContext.h
│   │   ├── WhisperModel.cpp
│   │   ├── WhisperModel.h
│   │   ├── audioConstants.h
│   │   ├── iSpectrogram.h
│   │   ├── languageCodez.inl
│   │   ├── languageCodez.tsv
│   │   ├── loaderUtils.h
│   │   ├── melSpectrogram.cpp
│   │   ├── melSpectrogram.h
│   │   ├── sEncodeParams.h
│   │   ├── sModelParams.h
│   │   ├── sTokenData.h
│   │   ├── voiceActivityDetection.cpp
│   │   └── voiceActivityDetection.h
│   ├── Whisper.vcxproj
│   ├── Whisper.vcxproj.filters
│   ├── misc.natvis
│   ├── modelFactory.cpp
│   ├── modelFactory.h
│   ├── resource.h
│   ├── source/
│   │   ├── LICENSE
│   │   ├── Readme.txt
│   │   ├── ggml.c
│   │   ├── ggml.h
│   │   ├── whisper.cpp
│   │   └── whisper.h
│   ├── source.compat/
│   │   ├── Readme.txt
│   │   ├── convertThings.cpp
│   │   ├── convertThings.h
│   │   └── ggmlMsvc.c
│   ├── stdafx.cpp
│   ├── stdafx.h
│   ├── whisper.def
│   └── whisperCom.cpp
├── WhisperCpp.sln
├── WhisperNet/
│   ├── API/
│   │   ├── CaptureDeviceId.cs
│   │   ├── Parameters.cs
│   │   ├── SpecialTokens.cs
│   │   ├── eCaptureStatus.cs
│   │   ├── eGpuModelFlags.cs
│   │   ├── eLanguage.cs
│   │   ├── eLogLevel.cs
│   │   ├── eModelImplementation.cs
│   │   ├── eResultFlags.cs
│   │   ├── eSpeakerChannel.cs
│   │   ├── iAudioBuffer.cs
│   │   ├── iAudioReader.cs
│   │   ├── iMediaFoundation.cs
│   │   ├── iModel.cs
│   │   └── sCaptureParams.cs
│   ├── AssemblyInfo.cs
│   ├── AssemblyTitle.cs
│   ├── Callbacks.cs
│   ├── CaptureCallbacks.cs
│   ├── Context.cs
│   ├── ExtensionMethods.cs
│   ├── Internal/
│   │   ├── NativeLogger.cs
│   │   ├── iContext.cs
│   │   ├── iTranscribeResult.cs
│   │   ├── sCaptureCallbacks.cs
│   │   ├── sCaptureDevice.cs
│   │   ├── sFullParams.cs
│   │   ├── sLoadModelCallbacks.cs
│   │   ├── sLoggerSetup.cs
│   │   ├── sModelSetup.cs
│   │   └── sProgressSink.cs
│   ├── Library.cs
│   ├── Readme.md
│   ├── WhisperNet.csproj
│   └── WhisperNet.nuspec
└── WhisperPS/
    ├── Commands/
    │   ├── ExportBase.cs
    │   ├── ExportSubrip.cs
    │   ├── ExportText.cs
    │   ├── ExportWebVtt.cs
    │   ├── FormatSegments.cs
    │   ├── ListAdapters.cs
    │   ├── LoadModel.cs
    │   ├── TranscribeBase.cs
    │   └── TranscribeFile.cs
    ├── Internal/
    │   ├── MarshalEx.cs
    │   ├── NativeLogger.cs
    │   ├── iTranscribeResult.cs
    │   ├── sCaptureDevice.cs
    │   ├── sFullParams.cs
    │   ├── sLoadModelCallbacks.cs
    │   ├── sModelSetup.cs
    │   └── sProgressSink.cs
    ├── Library.cs
    ├── Properties/
    │   └── AssemblyTitle.cs
    ├── Readme.md
    ├── Types/
    │   ├── Model.cs
    │   ├── Segment.cs
    │   └── Transcription.cs
    ├── Utils/
    │   ├── CommandLogger.cs
    │   └── MiscUtils.cs
    ├── WhisperPS.csproj
    ├── WhisperPS.psd1
    ├── app.config
    └── packages.config
Download .txt
Showing preview only (321K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (3982 symbols across 320 files)

FILE: ComLightLib/Exception.hpp
  type ComLight (line 3) | namespace ComLight
    class Exception (line 5) | class Exception : public std::runtime_error
      method Exception (line 16) | Exception( HRESULT hr ) : runtime_error( "ComLight HRESULT exception...
      method HRESULT (line 18) | HRESULT code() const { return m_code; }

FILE: ComLightLib/client/CComPtr.hpp
  type ComLight (line 3) | namespace ComLight
    class CComPtr (line 7) | class CComPtr
      method callAddRef (line 11) | void callAddRef() const
      method CComPtr (line 21) | CComPtr() : p( nullptr ) { }
      method release (line 24) | void release()
      method attach (line 38) | void attach( I* raw )
      method I (line 45) | I* detach()
      method detach (line 54) | void detach( Other** pp )
      method assign (line 63) | void assign( I* raw )
      method swap (line 70) | void swap( CComPtr<I>& that )
      method CComPtr (line 76) | CComPtr( I* raw ) : p( raw )
      method CComPtr (line 82) | CComPtr( const CComPtr<I>& that ) : CComPtr( that.p ) { }
      method CComPtr (line 84) | CComPtr( CComPtr<I>&& that ) : p( that.p ) { that.p = nullptr; }
      method I (line 105) | I* operator -> () const { return p; }
      method I (line 106) | I** operator &() { return &p; }

FILE: ComLightLib/comLightClient.h
  function namespace (line 6) | namespace ComLight

FILE: ComLightLib/hresult.h
  function HRESULT (line 13) | inline constexpr HRESULT HRESULT_FROM_WIN32( int c )

FILE: ComLightLib/pal/guiddef.h
  type GUID (line 8) | struct GUID

FILE: ComLightLib/pal/hresult.h
  function HRESULT (line 8) | inline constexpr HRESULT MAKE_SCODE( uint32_t sev, uint32_t fac, uint32_...
  function SUCCEEDED (line 93) | inline constexpr bool SUCCEEDED( HRESULT hr )
  function FAILED (line 98) | inline constexpr bool FAILED( HRESULT hr )

FILE: ComLightLib/server/Object.hpp
  type ComLight (line 7) | namespace ComLight
    type details (line 9) | namespace details
    class Object (line 18) | class Object : public T
      method Object (line 21) | Object() = default;
      method Object (line 24) | Object( Args&& ... args ) : T{ std::forward<Args>( args )... } {}
      method QueryInterface (line 29) | QueryInterface( REFIID riid, void** ppvObject ) override
      method AddRef (line 52) | AddRef() override
      method Release (line 58) | Release() override
      method HRESULT (line 71) | static inline HRESULT create( CComPtr<Object<T>>& result )
      method HRESULT (line 97) | static inline HRESULT create( CComPtr<Object<T>>& result, Args&& ......
      method HRESULT (line 127) | static inline HRESULT create( I** pp )

FILE: ComLightLib/server/ObjectRoot.hpp
  type ComLight (line 6) | namespace ComLight
    class ObjectRoot (line 11) | class ObjectRoot : public RefCounter, public I
      method HRESULT (line 15) | inline HRESULT internalFinalConstruct()
      method HRESULT (line 20) | inline HRESULT FinalConstruct()
      method FinalRelease (line 25) | inline void FinalRelease() { }
      method IUnknown (line 27) | IUnknown* getUnknown()
      method queryExtraInterfaces (line 33) | bool queryExtraInterfaces( REFIID riid, void **ppvObject ) const
      method implQueryInterface (line 39) | bool implQueryInterface( REFIID riid, void** ppvObject )

FILE: ComLightLib/server/RefCounter.hpp
  type ComLight (line 6) | namespace ComLight
    class RefCounter (line 9) | class RefCounter
      method RefCounter (line 15) | RefCounter() : referenceCounter( 0 ) { }
      method RefCounter (line 19) | RefCounter( const RefCounter &that ) = delete;
      method RefCounter (line 20) | RefCounter( RefCounter &&that ) = delete;
      method implAddRef (line 24) | uint32_t implAddRef()
      method implRelease (line 29) | uint32_t implRelease()

FILE: ComLightLib/server/freeThreadedMarshaller.cpp
  function HRESULT (line 5) | HRESULT ComLight::details::createFreeThreadedMarshaller( IUnknown* pUnkO...

FILE: ComLightLib/server/freeThreadedMarshaller.h
  function namespace (line 5) | namespace ComLight

FILE: ComLightLib/server/interfaceMap.h
  function namespace (line 12) | namespace ComLight

FILE: ComLightLib/streams.h
  function namespace (line 6) | namespace ComLight

FILE: ComLightLib/unknwn.h
  function namespace (line 23) | namespace ComLight

FILE: ComLightLib/utils/guid_parse.hpp
  type GUID (line 19) | struct GUID {
  type ComLight (line 27) | namespace ComLight
    type details (line 29) | namespace details
      function parse_hex_digit (line 34) | constexpr uint8_t parse_hex_digit( const char c )
      function parse_hex_uint8_t (line 48) | constexpr uint8_t parse_hex_uint8_t( const char *ptr )
      function parse_hex_uint16_t (line 53) | constexpr uint16_t parse_hex_uint16_t( const char *ptr )
      function parse_hex_uint32_t (line 58) | constexpr uint32_t parse_hex_uint32_t( const char *ptr )
      function GUID (line 63) | constexpr GUID parse_guid( const char *begin )
      function GUID (line 82) | constexpr GUID make_guid_helper( const char *str, size_t N )
      function GUID (line 97) | constexpr GUID make_guid( const char( &str )[ N ] )

FILE: ComLightLib/utils/typeTraits.hpp
  type ComLight (line 4) | namespace ComLight
    type details (line 6) | namespace details
      function pointersAssignable (line 9) | constexpr bool pointersAssignable()

FILE: ComputeShaders/ComputeShaders.cpp
  function fnComputeShaders (line 1) | void fnComputeShaders()

FILE: Examples/MicrophoneCS/CaptureThread.cs
  class CaptureThread (line 6) | sealed class CaptureThread: CaptureCallbacks
    method CaptureThread (line 8) | public CaptureThread( CommandLineArgs args, Context context, iAudioCap...
    method readKeyCallback (line 19) | static void readKeyCallback( object? state )
    method join (line 26) | public void join()
    method shouldCancel (line 35) | protected override bool shouldCancel( Context sender ) =>
    method captureStatusChanged (line 38) | protected override void captureStatusChanged( Context sender, eCapture...
    method threadMain (line 49) | void threadMain()

FILE: Examples/MicrophoneCS/CommandLineArgs.cs
  type CommandLineArgs (line 7) | sealed record class CommandLineArgs

FILE: Examples/MicrophoneCS/MicrophoneCS.cs
  class Program (line 5) | static class Program
    method Main (line 7) | static int Main( string[] args )

FILE: Examples/MicrophoneCS/TranscribeCallbacks.cs
  class TranscribeCallbacks (line 7) | sealed class TranscribeCallbacks: Callbacks
    method TranscribeCallbacks (line 12) | public TranscribeCallbacks( CommandLineArgs args )
    method colorIndex (line 27) | int colorIndex( in sToken tok )
    method printTime (line 36) | public static string printTime( TimeSpan ts ) =>
    method printTimeWithComma (line 38) | public static string printTimeWithComma( TimeSpan ts ) =>
    method onNewSegment (line 41) | protected override void onNewSegment( Context sender, int countNew )

FILE: Examples/OldMain/Utils/Logger.cpp
  function logMessage (line 8) | void logMessage( const char* lvl, const char8_t* pszFormat, std::va_list...
  function logError (line 22) | void logError( const char8_t* pszFormat, ... )
  function logWarning (line 27) | void logWarning( const char8_t* pszFormat, ... )
  function logInfo (line 32) | void logInfo( const char8_t* pszFormat, ... )
  function logDebug (line 37) | void logDebug( const char8_t* pszFormat, ... )

FILE: Examples/OldMain/Utils/Logger.h
  type ggml_tensor (line 7) | struct ggml_tensor
  function namespace (line 17) | namespace Tracing

FILE: Examples/OldMain/dr_wav.h
  type drwav_int8 (line 153) | typedef   signed char           drwav_int8;
  type drwav_uint8 (line 154) | typedef unsigned char           drwav_uint8;
  type drwav_int16 (line 155) | typedef   signed short          drwav_int16;
  type drwav_uint16 (line 156) | typedef unsigned short          drwav_uint16;
  type drwav_int32 (line 157) | typedef   signed int            drwav_int32;
  type drwav_uint32 (line 158) | typedef unsigned int            drwav_uint32;
  type drwav_int64 (line 160) | typedef   signed __int64    drwav_int64;
  type drwav_uint64 (line 161) | typedef unsigned __int64    drwav_uint64;
  type drwav_int64 (line 170) | typedef   signed long long  drwav_int64;
  type drwav_uint64 (line 171) | typedef unsigned long long  drwav_uint64;
  type drwav_uint64 (line 177) | typedef drwav_uint64        drwav_uintptr;
  type drwav_uint32 (line 179) | typedef drwav_uint32        drwav_uintptr;
  type drwav_uint8 (line 181) | typedef drwav_uint8             drwav_bool8;
  type drwav_uint32 (line 182) | typedef drwav_uint32            drwav_bool32;
  type drwav_int32 (line 216) | typedef drwav_int32 drwav_result;
  type drwav_seek_origin (line 292) | typedef enum
  type drwav_container (line 298) | typedef enum
  type drwav_chunk_header (line 305) | typedef struct
  type drwav_fmt (line 323) | typedef struct
  type drwav_bool32 (line 405) | typedef drwav_bool32 (* drwav_seek_proc)(void* pUserData, int offset, dr...
  type drwav_uint64 (line 431) | typedef drwav_uint64 (* drwav_chunk_proc)(void* pChunkUserData, drwav_re...
  type drwav_allocation_callbacks (line 433) | typedef struct
  type drwav__memory_stream (line 442) | typedef struct
  type drwav__memory_stream_write (line 450) | typedef struct
  type drwav_data_format (line 459) | typedef struct
  type drwav_smpl_loop (line 470) | typedef struct
  type drwav_smpl (line 480) | typedef struct
  type drwav (line 494) | typedef struct
  function DRWAV_API (line 1066) | DRWAV_API void drwav_version(drwav_uint32* pMajor, drwav_uint32* pMinor,...
  function DRWAV_API (line 1081) | DRWAV_API const char* drwav_version_string(void)
  function DRWAV_INLINE (line 1108) | static DRWAV_INLINE drwav_bool32 drwav__guid_equal(const drwav_uint8 a[1...
  function DRWAV_INLINE (line 1120) | static DRWAV_INLINE drwav_bool32 drwav__fourcc_equal(const drwav_uint8* ...
  function DRWAV_INLINE (line 1131) | static DRWAV_INLINE int drwav__is_little_endian(void)
  function DRWAV_INLINE (line 1143) | static DRWAV_INLINE drwav_uint16 drwav__bytes_to_u16(const drwav_uint8* ...
  function DRWAV_INLINE (line 1148) | static DRWAV_INLINE drwav_int16 drwav__bytes_to_s16(const drwav_uint8* d...
  function DRWAV_INLINE (line 1153) | static DRWAV_INLINE drwav_uint32 drwav__bytes_to_u32(const drwav_uint8* ...
  function DRWAV_INLINE (line 1158) | static DRWAV_INLINE drwav_int32 drwav__bytes_to_s32(const drwav_uint8* d...
  function DRWAV_INLINE (line 1163) | static DRWAV_INLINE drwav_uint64 drwav__bytes_to_u64(const drwav_uint8* ...
  function DRWAV_INLINE (line 1170) | static DRWAV_INLINE drwav_int64 drwav__bytes_to_s64(const drwav_uint8* d...
  function DRWAV_INLINE (line 1175) | static DRWAV_INLINE void drwav__bytes_to_guid(const drwav_uint8* data, d...
  function DRWAV_INLINE (line 1184) | static DRWAV_INLINE drwav_uint16 drwav__bswap16(drwav_uint16 n)
  function DRWAV_INLINE (line 1231) | static DRWAV_INLINE drwav_uint64 drwav__bswap64(drwav_uint64 n)
  function DRWAV_INLINE (line 1255) | static DRWAV_INLINE drwav_int16 drwav__bswap_s16(drwav_int16 n)
  function DRWAV_INLINE (line 1260) | static DRWAV_INLINE void drwav__bswap_samples_s16(drwav_int16* pSamples,...
  function DRWAV_INLINE (line 1269) | static DRWAV_INLINE void drwav__bswap_s24(drwav_uint8* p)
  function DRWAV_INLINE (line 1277) | static DRWAV_INLINE void drwav__bswap_samples_s24(drwav_uint8* pSamples,...
  function DRWAV_INLINE (line 1287) | static DRWAV_INLINE drwav_int32 drwav__bswap_s32(drwav_int32 n)
  function DRWAV_INLINE (line 1292) | static DRWAV_INLINE void drwav__bswap_samples_s32(drwav_int32* pSamples,...
  function DRWAV_INLINE (line 1301) | static DRWAV_INLINE float drwav__bswap_f32(float n)
  function DRWAV_INLINE (line 1313) | static DRWAV_INLINE void drwav__bswap_samples_f32(float* pSamples, drwav...
  function DRWAV_INLINE (line 1322) | static DRWAV_INLINE double drwav__bswap_f64(double n)
  function DRWAV_INLINE (line 1334) | static DRWAV_INLINE void drwav__bswap_samples_f64(double* pSamples, drwa...
  function DRWAV_INLINE (line 1343) | static DRWAV_INLINE void drwav__bswap_samples_pcm(void* pSamples, drwav_...
  function DRWAV_INLINE (line 1368) | static DRWAV_INLINE void drwav__bswap_samples_ieee(void* pSamples, drwav...
  function DRWAV_INLINE (line 1394) | static DRWAV_INLINE void drwav__bswap_samples(void* pSamples, drwav_uint...
  function drwav__free_default (line 1437) | static void drwav__free_default(void* p, void* pUserData)
  function drwav__free_from_callbacks (line 1492) | static void drwav__free_from_callbacks(void* p, const drwav_allocation_c...
  function drwav_allocation_callbacks (line 1504) | static drwav_allocation_callbacks drwav_copy_allocation_callbacks_or_def...
  function DRWAV_INLINE (line 1521) | static DRWAV_INLINE drwav_bool32 drwav__is_compressed_format_tag(drwav_u...
  function drwav__chunk_padding_size_riff (line 1528) | static unsigned int drwav__chunk_padding_size_riff(drwav_uint64 chunkSize)
  function drwav__chunk_padding_size_w64 (line 1533) | static unsigned int drwav__chunk_padding_size_w64(drwav_uint64 chunkSize)
  function drwav_result (line 1542) | static drwav_result drwav__read_chunk_header(drwav_read_proc onRead, voi...
  function drwav_bool32 (line 1577) | static drwav_bool32 drwav__seek_forward(drwav_seek_proc onSeek, drwav_ui...
  function drwav_bool32 (line 1597) | static drwav_bool32 drwav__seek_from_start(drwav_seek_proc onSeek, drwav...
  function drwav_bool32 (line 1625) | static drwav_bool32 drwav__read_fmt(drwav_read_proc onRead, drwav_seek_p...
  function drwav__on_read (line 1735) | static size_t drwav__on_read(drwav_read_proc onRead, void* pUserData, vo...
  function drwav_bool32 (line 1748) | static drwav_bool32 drwav__on_seek(drwav_seek_proc onSeek, void* pUserDa...
  function drwav_uint32 (line 1769) | static drwav_uint32 drwav_get_bytes_per_pcm_frame(drwav* pWav)
  function DRWAV_API (line 1783) | DRWAV_API drwav_uint16 drwav_fmt_get_format(const drwav_fmt* pFMT)
  function drwav_bool32 (line 1796) | static drwav_bool32 drwav_preinit(drwav* pWav, drwav_read_proc onRead, d...
  function drwav_bool32 (line 1815) | static drwav_bool32 drwav_init__internal(drwav* pWav, drwav_chunk_proc o...
  function DRWAV_API (line 2239) | DRWAV_API drwav_bool32 drwav_init(drwav* pWav, drwav_read_proc onRead, d...
  function DRWAV_API (line 2244) | DRWAV_API drwav_bool32 drwav_init_ex(drwav* pWav, drwav_read_proc onRead...
  function drwav_uint32 (line 2254) | static drwav_uint32 drwav__riff_chunk_size_riff(drwav_uint64 dataChunkSize)
  function drwav_uint32 (line 2264) | static drwav_uint32 drwav__data_chunk_size_riff(drwav_uint64 dataChunkSize)
  function drwav_uint64 (line 2273) | static drwav_uint64 drwav__riff_chunk_size_w64(drwav_uint64 dataChunkSize)
  function drwav_uint64 (line 2280) | static drwav_uint64 drwav__data_chunk_size_w64(drwav_uint64 dataChunkSize)
  function drwav_uint64 (line 2285) | static drwav_uint64 drwav__riff_chunk_size_rf64(drwav_uint64 dataChunkSize)
  function drwav_uint64 (line 2295) | static drwav_uint64 drwav__data_chunk_size_rf64(drwav_uint64 dataChunkSize)
  function drwav__write (line 2301) | static size_t drwav__write(drwav* pWav, const void* pData, size_t dataSize)
  function drwav__write_u16ne_to_le (line 2310) | static size_t drwav__write_u16ne_to_le(drwav* pWav, drwav_uint16 value)
  function drwav__write_u32ne_to_le (line 2322) | static size_t drwav__write_u32ne_to_le(drwav* pWav, drwav_uint32 value)
  function drwav__write_u64ne_to_le (line 2334) | static size_t drwav__write_u64ne_to_le(drwav* pWav, drwav_uint64 value)
  function drwav_bool32 (line 2347) | static drwav_bool32 drwav_preinit_write(drwav* pWav, const drwav_data_fo...
  function drwav_bool32 (line 2387) | static drwav_bool32 drwav_init_write__internal(drwav* pWav, const drwav_...
  function DRWAV_API (line 2501) | DRWAV_API drwav_bool32 drwav_init_write(drwav* pWav, const drwav_data_fo...
  function DRWAV_API (line 2510) | DRWAV_API drwav_bool32 drwav_init_write_sequential(drwav* pWav, const dr...
  function DRWAV_API (line 2519) | DRWAV_API drwav_bool32 drwav_init_write_sequential_pcm_frames(drwav* pWa...
  function DRWAV_API (line 2528) | DRWAV_API drwav_uint64 drwav_target_write_size_bytes(const drwav_data_fo...
  function drwav_result (line 2554) | static drwav_result drwav_result_from_errno(int e)
  function drwav_result (line 2956) | static drwav_result drwav_fopen(FILE** ppFile, const char* pFilePath, co...
  function drwav_result (line 3016) | static drwav_result drwav_wfopen(FILE** ppFile, const wchar_t* pFilePath...
  function drwav__on_read_stdio (line 3099) | static size_t drwav__on_read_stdio(void* pUserData, void* pBufferOut, si...
  function drwav__on_write_stdio (line 3104) | static size_t drwav__on_write_stdio(void* pUserData, const void* pData, ...
  function drwav_bool32 (line 3109) | static drwav_bool32 drwav__on_seek_stdio(void* pUserData, int offset, dr...
  function DRWAV_API (line 3114) | DRWAV_API drwav_bool32 drwav_init_file(drwav* pWav, const char* filename...
  function drwav_bool32 (line 3120) | static drwav_bool32 drwav_init_file__internal_FILE(drwav* pWav, FILE* pF...
  function DRWAV_API (line 3139) | DRWAV_API drwav_bool32 drwav_init_file_ex(drwav* pWav, const char* filen...
  function DRWAV_API (line 3150) | DRWAV_API drwav_bool32 drwav_init_file_w(drwav* pWav, const wchar_t* fil...
  function DRWAV_API (line 3155) | DRWAV_API drwav_bool32 drwav_init_file_ex_w(drwav* pWav, const wchar_t* ...
  function drwav_bool32 (line 3167) | static drwav_bool32 drwav_init_file_write__internal_FILE(drwav* pWav, FI...
  function drwav_bool32 (line 3186) | static drwav_bool32 drwav_init_file_write__internal(drwav* pWav, const c...
  function drwav_bool32 (line 3197) | static drwav_bool32 drwav_init_file_write_w__internal(drwav* pWav, const...
  function DRWAV_API (line 3208) | DRWAV_API drwav_bool32 drwav_init_file_write(drwav* pWav, const char* fi...
  function DRWAV_API (line 3213) | DRWAV_API drwav_bool32 drwav_init_file_write_sequential(drwav* pWav, con...
  function DRWAV_API (line 3218) | DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames(drwav...
  function DRWAV_API (line 3227) | DRWAV_API drwav_bool32 drwav_init_file_write_w(drwav* pWav, const wchar_...
  function DRWAV_API (line 3232) | DRWAV_API drwav_bool32 drwav_init_file_write_sequential_w(drwav* pWav, c...
  function DRWAV_API (line 3237) | DRWAV_API drwav_bool32 drwav_init_file_write_sequential_pcm_frames_w(drw...
  function drwav__on_read_memory (line 3248) | static size_t drwav__on_read_memory(void* pUserData, void* pBufferOut, s...
  function drwav_bool32 (line 3269) | static drwav_bool32 drwav__on_seek_memory(void* pUserData, int offset, d...
  function drwav__on_write_memory (line 3298) | static size_t drwav__on_write_memory(void* pUserData, const void* pDataI...
  function drwav_bool32 (line 3338) | static drwav_bool32 drwav__on_seek_memory_write(void* pUserData, int off...
  function DRWAV_API (line 3367) | DRWAV_API drwav_bool32 drwav_init_memory(drwav* pWav, const void* data, ...
  function DRWAV_API (line 3372) | DRWAV_API drwav_bool32 drwav_init_memory_ex(drwav* pWav, const void* dat...
  function drwav_bool32 (line 3390) | static drwav_bool32 drwav_init_memory_write__internal(drwav* pWav, void*...
  function DRWAV_API (line 3412) | DRWAV_API drwav_bool32 drwav_init_memory_write(drwav* pWav, void** ppDat...
  function DRWAV_API (line 3417) | DRWAV_API drwav_bool32 drwav_init_memory_write_sequential(drwav* pWav, v...
  function DRWAV_API (line 3422) | DRWAV_API drwav_bool32 drwav_init_memory_write_sequential_pcm_frames(drw...
  function DRWAV_API (line 3433) | DRWAV_API drwav_result drwav_uninit(drwav* pWav)
  function DRWAV_API (line 3531) | DRWAV_API size_t drwav_read_raw(drwav* pWav, size_t bytesToRead, void* p...
  function DRWAV_API (line 3585) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_le(drwav* pWav, drwav_uint6...
  function DRWAV_API (line 3621) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_be(drwav* pWav, drwav_uint6...
  function DRWAV_API (line 3632) | DRWAV_API drwav_uint64 drwav_read_pcm_frames(drwav* pWav, drwav_uint64 f...
  function DRWAV_API (line 3643) | DRWAV_API drwav_bool32 drwav_seek_to_first_pcm_frame(drwav* pWav)
  function DRWAV_API (line 3670) | DRWAV_API drwav_bool32 drwav_seek_to_pcm_frame(drwav* pWav, drwav_uint64...
  function DRWAV_API (line 3774) | DRWAV_API size_t drwav_write_raw(drwav* pWav, size_t bytesToWrite, const...
  function DRWAV_API (line 3789) | DRWAV_API drwav_uint64 drwav_write_pcm_frames_le(drwav* pWav, drwav_uint...
  function DRWAV_API (line 3827) | DRWAV_API drwav_uint64 drwav_write_pcm_frames_be(drwav* pWav, drwav_uint...
  function DRWAV_API (line 3883) | DRWAV_API drwav_uint64 drwav_write_pcm_frames(drwav* pWav, drwav_uint64 ...
  function drwav_uint64 (line 3893) | static drwav_uint64 drwav_read_pcm_frames_s16__msadpcm(drwav* pWav, drwa...
  function drwav_uint64 (line 4073) | static drwav_uint64 drwav_read_pcm_frames_s16__ima(drwav* pWav, drwav_ui...
  function DRWAV_INLINE (line 4269) | static DRWAV_INLINE drwav_int16 drwav__alaw_to_s16(drwav_uint8 sampleIn)
  function DRWAV_INLINE (line 4274) | static DRWAV_INLINE drwav_int16 drwav__mulaw_to_s16(drwav_uint8 sampleIn)
  function drwav__pcm_to_s16 (line 4281) | static void drwav__pcm_to_s16(drwav_int16* pOut, const drwav_uint8* pIn,...
  function drwav__ieee_to_s16 (line 4333) | static void drwav__ieee_to_s16(drwav_int16* pOut, const drwav_uint8* pIn...
  function drwav_uint64 (line 4348) | static drwav_uint64 drwav_read_pcm_frames_s16__pcm(drwav* pWav, drwav_ui...
  function drwav_uint64 (line 4382) | static drwav_uint64 drwav_read_pcm_frames_s16__ieee(drwav* pWav, drwav_u...
  function drwav_uint64 (line 4415) | static drwav_uint64 drwav_read_pcm_frames_s16__alaw(drwav* pWav, drwav_u...
  function drwav_uint64 (line 4448) | static drwav_uint64 drwav_read_pcm_frames_s16__mulaw(drwav* pWav, drwav_...
  function DRWAV_API (line 4481) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16(drwav* pWav, drwav_uint...
  function DRWAV_API (line 4523) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16le(drwav* pWav, drwav_ui...
  function DRWAV_API (line 4533) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_s16be(drwav* pWav, drwav_ui...
  function DRWAV_API (line 4544) | DRWAV_API void drwav_u8_to_s16(drwav_int16* pOut, const drwav_uint8* pIn...
  function DRWAV_API (line 4556) | DRWAV_API void drwav_s24_to_s16(drwav_int16* pOut, const drwav_uint8* pI...
  function DRWAV_API (line 4567) | DRWAV_API void drwav_s32_to_s16(drwav_int16* pOut, const drwav_int32* pI...
  function DRWAV_API (line 4578) | DRWAV_API void drwav_f32_to_s16(drwav_int16* pOut, const float* pIn, siz...
  function DRWAV_API (line 4593) | DRWAV_API void drwav_f64_to_s16(drwav_int16* pOut, const double* pIn, si...
  function DRWAV_API (line 4608) | DRWAV_API void drwav_alaw_to_s16(drwav_int16* pOut, const drwav_uint8* p...
  function DRWAV_API (line 4616) | DRWAV_API void drwav_mulaw_to_s16(drwav_int16* pOut, const drwav_uint8* ...
  function drwav__pcm_to_f32 (line 4626) | static void drwav__pcm_to_f32(float* pOut, const drwav_uint8* pIn, size_...
  function drwav__ieee_to_f32 (line 4675) | static void drwav__ieee_to_f32(float* pOut, const drwav_uint8* pIn, size...
  function drwav_uint64 (line 4694) | static drwav_uint64 drwav_read_pcm_frames_f32__pcm(drwav* pWav, drwav_ui...
  function drwav_uint64 (line 4722) | static drwav_uint64 drwav_read_pcm_frames_f32__msadpcm(drwav* pWav, drwa...
  function drwav_uint64 (line 4746) | static drwav_uint64 drwav_read_pcm_frames_f32__ima(drwav* pWav, drwav_ui...
  function drwav_uint64 (line 4770) | static drwav_uint64 drwav_read_pcm_frames_f32__ieee(drwav* pWav, drwav_u...
  function drwav_uint64 (line 4804) | static drwav_uint64 drwav_read_pcm_frames_f32__alaw(drwav* pWav, drwav_u...
  function drwav_uint64 (line 4831) | static drwav_uint64 drwav_read_pcm_frames_f32__mulaw(drwav* pWav, drwav_...
  function DRWAV_API (line 4859) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32(drwav* pWav, drwav_uint...
  function DRWAV_API (line 4901) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32le(drwav* pWav, drwav_ui...
  function DRWAV_API (line 4911) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_f32be(drwav* pWav, drwav_ui...
  function DRWAV_API (line 4922) | DRWAV_API void drwav_u8_to_f32(float* pOut, const drwav_uint8* pIn, size...
  function DRWAV_API (line 4951) | DRWAV_API void drwav_s16_to_f32(float* pOut, const drwav_int16* pIn, siz...
  function DRWAV_API (line 4964) | DRWAV_API void drwav_s24_to_f32(float* pOut, const drwav_uint8* pIn, siz...
  function DRWAV_API (line 4983) | DRWAV_API void drwav_s32_to_f32(float* pOut, const drwav_int32* pIn, siz...
  function DRWAV_API (line 4995) | DRWAV_API void drwav_f64_to_f32(float* pOut, const double* pIn, size_t s...
  function DRWAV_API (line 5008) | DRWAV_API void drwav_alaw_to_f32(float* pOut, const drwav_uint8* pIn, si...
  function DRWAV_API (line 5021) | DRWAV_API void drwav_mulaw_to_f32(float* pOut, const drwav_uint8* pIn, s...
  function drwav__pcm_to_s32 (line 5036) | static void drwav__pcm_to_s32(drwav_int32* pOut, const drwav_uint8* pIn,...
  function drwav__ieee_to_s32 (line 5087) | static void drwav__ieee_to_s32(drwav_int32* pOut, const drwav_uint8* pIn...
  function drwav_uint64 (line 5103) | static drwav_uint64 drwav_read_pcm_frames_s32__pcm(drwav* pWav, drwav_ui...
  function drwav_uint64 (line 5137) | static drwav_uint64 drwav_read_pcm_frames_s32__msadpcm(drwav* pWav, drwa...
  function drwav_uint64 (line 5161) | static drwav_uint64 drwav_read_pcm_frames_s32__ima(drwav* pWav, drwav_ui...
  function drwav_uint64 (line 5185) | static drwav_uint64 drwav_read_pcm_frames_s32__ieee(drwav* pWav, drwav_u...
  function drwav_uint64 (line 5213) | static drwav_uint64 drwav_read_pcm_frames_s32__alaw(drwav* pWav, drwav_u...
  function drwav_uint64 (line 5241) | static drwav_uint64 drwav_read_pcm_frames_s32__mulaw(drwav* pWav, drwav_...
  function DRWAV_API (line 5269) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32(drwav* pWav, drwav_uint...
  function DRWAV_API (line 5311) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32le(drwav* pWav, drwav_ui...
  function DRWAV_API (line 5321) | DRWAV_API drwav_uint64 drwav_read_pcm_frames_s32be(drwav* pWav, drwav_ui...
  function DRWAV_API (line 5332) | DRWAV_API void drwav_u8_to_s32(drwav_int32* pOut, const drwav_uint8* pIn...
  function DRWAV_API (line 5345) | DRWAV_API void drwav_s16_to_s32(drwav_int32* pOut, const drwav_int16* pI...
  function DRWAV_API (line 5358) | DRWAV_API void drwav_s24_to_s32(drwav_int32* pOut, const drwav_uint8* pI...
  function DRWAV_API (line 5376) | DRWAV_API void drwav_f32_to_s32(drwav_int32* pOut, const float* pIn, siz...
  function DRWAV_API (line 5389) | DRWAV_API void drwav_f64_to_s32(drwav_int32* pOut, const double* pIn, si...
  function DRWAV_API (line 5402) | DRWAV_API void drwav_alaw_to_s32(drwav_int32* pOut, const drwav_uint8* p...
  function DRWAV_API (line 5415) | DRWAV_API void drwav_mulaw_to_s32(drwav_int32* pOut, const drwav_uint8* ...
  function drwav_int16 (line 5430) | static drwav_int16* drwav__read_pcm_frames_and_close_s16(drwav* pWav, un...
  function drwav_int32 (line 5514) | static drwav_int32* drwav__read_pcm_frames_and_close_s32(drwav* pWav, un...
  function DRWAV_API (line 5558) | DRWAV_API drwav_int16* drwav_open_and_read_pcm_frames_s16(drwav_read_pro...
  function DRWAV_API (line 5579) | DRWAV_API float* drwav_open_and_read_pcm_frames_f32(drwav_read_proc onRe...
  function DRWAV_API (line 5600) | DRWAV_API drwav_int32* drwav_open_and_read_pcm_frames_s32(drwav_read_pro...
  function DRWAV_API (line 5622) | DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16(const cha...
  function DRWAV_API (line 5643) | DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32(const char* fil...
  function DRWAV_API (line 5664) | DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32(const cha...
  function DRWAV_API (line 5686) | DRWAV_API drwav_int16* drwav_open_file_and_read_pcm_frames_s16_w(const w...
  function DRWAV_API (line 5707) | DRWAV_API float* drwav_open_file_and_read_pcm_frames_f32_w(const wchar_t...
  function DRWAV_API (line 5728) | DRWAV_API drwav_int32* drwav_open_file_and_read_pcm_frames_s32_w(const w...
  function DRWAV_API (line 5750) | DRWAV_API drwav_int16* drwav_open_memory_and_read_pcm_frames_s16(const v...
  function DRWAV_API (line 5771) | DRWAV_API float* drwav_open_memory_and_read_pcm_frames_f32(const void* d...
  function DRWAV_API (line 5792) | DRWAV_API drwav_int32* drwav_open_memory_and_read_pcm_frames_s32(const v...
  function DRWAV_API (line 5815) | DRWAV_API void drwav_free(void* p, const drwav_allocation_callbacks* pAl...
  function DRWAV_API (line 5824) | DRWAV_API drwav_uint16 drwav_bytes_to_u16(const drwav_uint8* data)
  function DRWAV_API (line 5829) | DRWAV_API drwav_int16 drwav_bytes_to_s16(const drwav_uint8* data)
  function DRWAV_API (line 5834) | DRWAV_API drwav_uint32 drwav_bytes_to_u32(const drwav_uint8* data)
  function DRWAV_API (line 5839) | DRWAV_API drwav_int32 drwav_bytes_to_s32(const drwav_uint8* data)
  function DRWAV_API (line 5844) | DRWAV_API drwav_uint64 drwav_bytes_to_u64(const drwav_uint8* data)
  function DRWAV_API (line 5849) | DRWAV_API drwav_int64 drwav_bytes_to_s64(const drwav_uint8* data)
  function DRWAV_API (line 5855) | DRWAV_API drwav_bool32 drwav_guid_equal(const drwav_uint8 a[16], const d...
  function DRWAV_API (line 5860) | DRWAV_API drwav_bool32 drwav_fourcc_equal(const drwav_uint8* a, const ch...

FILE: Examples/OldMain/main.cpp
  function to_timestamp (line 24) | std::string to_timestamp(int64_t t, bool comma = false) {
  function timestamp_to_sample (line 39) | int timestamp_to_sample(int64_t t, int n_samples) {
  function replace_all (line 44) | void replace_all(std::string & s, const std::string & search, const std:...
  type whisper_params (line 54) | struct whisper_params {
  function whisper_params_parse (line 86) | bool whisper_params_parse(int argc, char ** argv, whisper_params & param...
  function whisper_print_usage (line 132) | void whisper_print_usage(int /*argc*/, char ** argv, const whisper_param...
  type whisper_print_user_data (line 164) | struct whisper_print_user_data {
  function whisper_print_segment_callback (line 170) | void whisper_print_segment_callback(struct whisper_context * ctx, int n_...
  function output_txt (line 263) | bool output_txt(struct whisper_context * ctx, const char * fname) {
  function output_vtt (line 281) | bool output_vtt(struct whisper_context * ctx, const char * fname) {
  function output_srt (line 305) | bool output_srt(struct whisper_context * ctx, const char * fname, const ...
  function output_wts (line 331) | bool output_wts(struct whisper_context * ctx, const char * fname, const ...
  function main (line 440) | int main(int argc, char ** argv) {

FILE: Examples/TranscribeCS/AnsiCodes.cs
  class AnsiCodes (line 5) | static class AnsiCodes
    method GetStdHandle (line 9) | [DllImport( dll, SetLastError = true )]
    type ConsoleModes (line 14) | [Flags]
    method GetConsoleMode (line 37) | [DllImport( dll, SetLastError = true )]
    method SetConsoleMode (line 40) | [DllImport( dll, SetLastError = true )]
    method AnsiCodes (line 43) | static AnsiCodes()

FILE: Examples/TranscribeCS/CommandLineArgs.cs
  type CommandLineArgs (line 7) | sealed record class CommandLineArgs

FILE: Examples/TranscribeCS/Transcribe.cs
  class Transcribe (line 7) | sealed class Transcribe: Callbacks
    method Transcribe (line 12) | public Transcribe( CommandLineArgs args )
    method colorIndex (line 27) | int colorIndex( in sToken tok )
    method printTime (line 36) | public static string printTime( TimeSpan ts ) =>
    method printTimeWithComma (line 38) | public static string printTimeWithComma( TimeSpan ts ) =>
    method onNewSegment (line 41) | protected override void onNewSegment( Context sender, int countNew )

FILE: Examples/TranscribeCS/TranscribeCS.cs
  type eFileOpenMode (line 4) | enum eFileOpenMode: byte
  class Program (line 16) | static class Program
    method Main (line 22) | static int Main( string[] args )
    method writeTextFile (line 90) | static void writeTextFile( Context context, string audioPath )
    method writeSubRip (line 97) | static void writeSubRip( Context context, string audioPath, CommandLin...
    method writeWebVTT (line 114) | static void writeWebVTT( Context context, string audioPath )
    method dbgListGPUs (line 130) | static void dbgListGPUs()

FILE: Examples/WhisperDesktop/AppState.cpp
  function HRESULT (line 18) | static HRESULT readString( CRegKey& k, LPCTSTR name, CString& rdi )
  function HRESULT (line 41) | HRESULT AppState::startup()
  function HRESULT (line 102) | HRESULT AppState::findModelSource()
  function HRESULT (line 121) | HRESULT AppState::saveModelSource()
  function CString (line 151) | CString AppState::stringLoad( LPCTSTR name )
  function HRESULT (line 183) | HRESULT AppState::lastScreenLoad()

FILE: Examples/WhisperDesktop/AppState.h
  function class (line 4) | class AppState

FILE: Examples/WhisperDesktop/CaptureDlg.cpp
  function HRESULT (line 4) | HRESULT CaptureDlg::show()
  type CaptureDlg::eTextFlags (line 23) | enum struct CaptureDlg::eTextFlags : uint32_t
  function LRESULT (line 30) | LRESULT CaptureDlg::OnInitDialog( UINT nMessage, WPARAM wParam, LPARAM l...
  function HRESULT (line 88) | HRESULT __stdcall CaptureDlg::listDevicesCallback( int len, const Whispe...
  function HRESULT (line 269) | static HRESULT appendDate( CString& str, const SYSTEMTIME& time )
  function HRESULT (line 289) | static HRESULT appendTime( CString& str, const SYSTEMTIME& time )
  function HRESULT (line 309) | static HRESULT printDateTime( CAtlFile& file )
  function HRESULT (line 326) | inline HRESULT CaptureDlg::runCapture()
  function LRESULT (line 402) | LRESULT CaptureDlg::onThreadQuit( UINT nMessage, WPARAM wParam, LPARAM l...
  function LRESULT (line 432) | LRESULT CaptureDlg::onThreadStatus( UINT nMessage, WPARAM wParam, LPARAM...
  function HRESULT (line 443) | HRESULT __stdcall CaptureDlg::cbCancel( void* pv ) noexcept
  function HRESULT (line 449) | HRESULT __stdcall CaptureDlg::cbStatus( void* pv, Whisper::eCaptureStatu...
  function HRESULT (line 457) | HRESULT __cdecl CaptureDlg::newSegmentCallback( Whisper::iContext* ctx, ...
  function HRESULT (line 469) | HRESULT CaptureDlg::appendTextFile( Whisper::iTranscribeResult* results,...

FILE: Examples/WhisperDesktop/CaptureDlg.h
  function appState (line 22) | appState( app ) { }
  function onClose (line 64) | void onClose()
  function onBack (line 68) | void onBack()
  function onTranscribe (line 72) | void onTranscribe()
  type struct (line 90) | enum struct
  type sCaptureDevice (line 101) | struct sCaptureDevice
  type sThreadState (line 114) | struct sThreadState

FILE: Examples/WhisperDesktop/CircleIndicator.cpp
  function HRESULT (line 45) | HRESULT CircleIndicator::registerClass()
  function HRESULT (line 54) | HRESULT CircleIndicator::createFont( int height )

FILE: Examples/WhisperDesktop/CircleIndicator.h
  function class (line 6) | class CircleIndicator: public CWindowImpl<CircleIndicator>

FILE: Examples/WhisperDesktop/LoadModelDlg.cpp
  function HRESULT (line 9) | HRESULT LoadModelDlg::show()
  function LRESULT (line 29) | LRESULT LoadModelDlg::OnInitDialog( UINT nMessage, WPARAM wParam, LPARAM...
  function LRESULT (line 67) | LRESULT LoadModelDlg::OnBrowse( UINT, INT, HWND, BOOL& bHandled )
  function LRESULT (line 81) | LRESULT LoadModelDlg::validationError( LPCTSTR message )
  function LRESULT (line 87) | LRESULT LoadModelDlg::validationError( LPCTSTR message, HRESULT hr )
  function LRESULT (line 109) | LRESULT LoadModelDlg::OnOk( UINT, INT, HWND, BOOL& bHandled )
  function HRESULT (line 135) | static HRESULT loadModel( const wchar_t* path, Whisper::sModelSetup setu...
  function HRESULT (line 182) | HRESULT __stdcall LoadModelDlg::progressCallback( double val, void* pv )...
  function LRESULT (line 191) | LRESULT LoadModelDlg::OnCallbackStatus( UINT, WPARAM wParam, LPARAM, BOO...
  function LRESULT (line 219) | LRESULT LoadModelDlg::OnHyperlink( int idCtrl, LPNMHDR pnmh, BOOL& bHand...

FILE: Examples/WhisperDesktop/LoadModelDlg.h
  function appState (line 16) | appState( app ) { }
  function LRESULT (line 45) | LRESULT OnCommand( UINT, INT nIdentifier, HWND, BOOL& bHandled )

FILE: Examples/WhisperDesktop/ModelAdvancedDlg.cpp
  function addGpu (line 5) | static void __stdcall addGpu( const wchar_t* name, void* pv )
  function LRESULT (line 11) | LRESULT ModelAdvancedDlg::onInitDialog( UINT nMessage, WPARAM wParam, LP...

FILE: Examples/WhisperDesktop/ModelAdvancedDlg.h
  function appState (line 15) | appState( app ) { }
  function onCancel (line 31) | void onCancel()

FILE: Examples/WhisperDesktop/TranscribeDlg.cpp
  function HRESULT (line 5) | HRESULT TranscribeDlg::show()
  function LRESULT (line 27) | LRESULT TranscribeDlg::OnInitDialog( UINT nMessage, WPARAM wParam, LPARA...
  type TranscribeDlg::eOutputFormat (line 118) | enum struct TranscribeDlg::eOutputFormat : uint8_t
  type TranscribeDlg::eVisualState (line 127) | enum struct TranscribeDlg::eVisualState : uint8_t
  function LRESULT (line 135) | LRESULT TranscribeDlg::onOutFormatChange( UINT, INT, HWND, BOOL& bHandled )
  function LRESULT (line 152) | LRESULT TranscribeDlg::onInputChange( UINT, INT, HWND, BOOL& )
  function printTime (line 323) | static void printTime( CString& rdi, int64_t ticks )
  function LRESULT (line 341) | LRESULT TranscribeDlg::onCallbackStatus( UINT, WPARAM wParam, LPARAM, BO...
  function HRESULT (line 393) | HRESULT TranscribeDlg::transcribe()
  function HRESULT (line 465) | inline HRESULT TranscribeDlg::progressCallback( double p ) noexcept
  function HRESULT (line 473) | HRESULT __cdecl TranscribeDlg::progressCallbackStatic( double p, Whisper...
  function HRESULT (line 481) | HRESULT write( CAtlFile& file, const CStringA& line )
  function HRESULT (line 506) | HRESULT TranscribeDlg::writeTextFile( const sSegment* const segments, co...
  function HRESULT (line 533) | HRESULT TranscribeDlg::writeSubRip( const sSegment* const segments, cons...
  function HRESULT (line 553) | HRESULT TranscribeDlg::writeWebVTT( const sSegment* const segments, cons...
  function HRESULT (line 576) | inline HRESULT TranscribeDlg::newSegmentCallback( Whisper::iContext* ctx...
  function HRESULT (line 584) | HRESULT __cdecl TranscribeDlg::newSegmentCallbackStatic( Whisper::iConte...
  function HRESULT (line 590) | HRESULT __cdecl TranscribeDlg::encoderBeginCallback( Whisper::iContext* ...

FILE: Examples/WhisperDesktop/TranscribeDlg.h
  function appState (line 21) | appState( app ) { }
  function onClose (line 60) | void onClose()
  function onBack (line 64) | void onBack()
  function LRESULT (line 85) | LRESULT onOutFormatChange( UINT, INT, HWND, BOOL& bHandled );
  type struct (line 99) | enum struct
  type struct (line 100) | enum struct
  type TranscribeArgs (line 102) | struct TranscribeArgs

FILE: Examples/WhisperDesktop/Utils/DebugConsole.cpp
  function textAttributes (line 15) | inline uint16_t textAttributes( eLogLevel lvl )
  function HRESULT (line 42) | HRESULT DebugConsole::Entry::print( HANDLE hConsole, CString& tempString...
  function clearLastError (line 54) | void clearLastError()
  function getLastError (line 59) | bool getLastError( CString& rdi )
  function HRESULT (line 97) | HRESULT DebugConsole::initialize( Whisper::eLogLevel level )
  function BOOL (line 144) | BOOL __stdcall DebugConsole::consoleHandlerRoutine( DWORD dwCtrlType )
  function HRESULT (line 157) | HRESULT DebugConsole::show()
  function HRESULT (line 216) | HRESULT DebugConsole::hide()
  function HRESULT (line 234) | HRESULT ConsoleCheckbox::initialize( HWND dialog, int idc, AppState& sta...
  function logMessage (line 287) | void logMessage( Whisper::eLogLevel lvl, const char8_t* pczFormat, va_li...

FILE: Examples/WhisperDesktop/Utils/DebugConsole.h
  function class (line 7) | class DebugConsole
  function class (line 50) | class ConsoleCheckbox

FILE: Examples/WhisperDesktop/Utils/LanguageDropdown.cpp
  function wchar_t (line 7) | inline wchar_t toUpper( wchar_t c )
  function makeTitleCase (line 14) | void makeTitleCase( CString& s )

FILE: Examples/WhisperDesktop/Utils/LanguageDropdown.h
  function class (line 5) | class LanguageDropdown

FILE: Examples/WhisperDesktop/Utils/PendingState.h
  function class (line 4) | class PendingState

FILE: Examples/WhisperDesktop/Utils/TranslateCheckbox.h
  function class (line 4) | class TranslateCheckbox

FILE: Examples/WhisperDesktop/Utils/WTL/atlapp.h
  type _IMAGELIST (line 135) | struct _IMAGELIST { }
  type _TREEITEM (line 136) | struct _TREEITEM { }
  type _PSP (line 137) | struct _PSP { }
  function namespace (line 142) | namespace ATL { HRESULT AtlGetCommCtrlVersion(LPDWORD pdwMajor, LPDWORD ...
  function namespace (line 155) | namespace WTL
  function namespace (line 250) | namespace RunTimeHelper
  function namespace (line 379) | namespace ModuleHelper
  function namespace (line 407) | namespace SecureHelper
  function namespace (line 510) | namespace MinCrtHelper
  function namespace (line 545) | namespace GenericWndClass
  function class (line 575) | class ATL_NO_VTABLE CMessageFilter
  function class (line 585) | class ATL_NO_VTABLE CIdleHandler
  function class (line 595) | class CMessageLoop
  function class (line 724) | class CStaticDataInitCriticalSectionLock
  function class (line 744) | class CWindowCreateCriticalSectionLock
  function class (line 773) | class CAppModule : public ATL::CComModule
  function Term (line 801) | void Term()
  function BOOL (line 809) | BOOL AddMessageLoop(CMessageLoop* pMsgLoop)
  function BOOL (line 829) | BOOL RemoveMessageLoop()
  function CMessageLoop (line 846) | CMessageLoop* GetMessageLoop(DWORD dwThreadID = ::GetCurrentThreadId()) ...
  function BOOL (line 865) | BOOL InitSettingChangeNotify(DLGPROC pfnDlgProc = _SettingChangeDlgProc)
  function TermSettingChangeNotify (line 905) | void TermSettingChangeNotify()
  function BOOL (line 923) | BOOL AddSettingChangeNotify(HWND hWnd)
  function BOOL (line 943) | BOOL RemoveSettingChangeNotify(HWND hWnd)
  function DLGTEMPLATE (line 963) | struct _ATL_EMPTY_DLGTEMPLATE : DLGTEMPLATE
  function INT_PTR (line 973) | static INT_PTR CALLBACK _SettingChangeDlgProc(HWND hWnd, UINT uMsg, WPAR...
  function class (line 999) | class CServerAppModule : public CAppModule
  function Term (line 1018) | void Term()
  function LONG (line 1026) | LONG Unlock() throw()
  function MonitorShutdown (line 1037) | void MonitorShutdown()
  function StartMonitor (line 1066) | bool StartMonitor()
  function DWORD (line 1083) | static DWORD WINAPI MonitorProc(void* pv)
  type ATL (line 1095) | typedef ATL::CRegKey CRegKeyEx;
  function namespace (line 1119) | namespace ATL

FILE: Examples/WhisperDesktop/Utils/WTL/atlctrls.h
  function namespace (line 80) | namespace WTL
  function LPCTSTR (line 124) | static LPCTSTR GetWndClassName()
  function HICON (line 129) | HICON GetIcon() const
  function HENHMETAFILE (line 141) | HENHMETAFILE GetEnhMetaFile() const
  function CBitmapHandle (line 153) | CBitmapHandle GetBitmap() const
  type CStaticT (line 178) | typedef CStaticT<ATL::CWindow>   CStatic;
  function LPCTSTR (line 206) | static LPCTSTR GetWndClassName()
  function UINT (line 211) | UINT GetState() const
  function GetCheck (line 223) | int GetCheck() const
  function UINT (line 235) | UINT GetButtonStyle() const
  function HICON (line 253) | HICON SetIcon(HICON hIcon)
  function CBitmapHandle (line 259) | CBitmapHandle GetBitmap() const
  function BOOL (line 324) | BOOL SetSplitInfo(PBUTTON_SPLITINFO pSplitInfo)
  function GetNoteLength (line 331) | int GetNoteLength() const
  function BOOL (line 345) | BOOL SetNote(LPCWSTR lpstrNoteText)
  function LRESULT (line 352) | LRESULT SetElevationRequiredState(BOOL bSet)
  function Click (line 360) | void Click()
  type CButtonT (line 367) | typedef CButtonT<ATL::CWindow>   CButton;
  function LPCTSTR (line 395) | static LPCTSTR GetWndClassName()
  function GetCount (line 401) | int GetCount() const
  function GetHorizontalExtent (line 414) | int GetHorizontalExtent() const
  function GetTopIndex (line 426) | int GetTopIndex() const
  function LCID (line 438) | LCID GetLocale() const
  function DWORD (line 450) | DWORD GetListBoxInfo() const
  function SetCurSel (line 464) | int SetCurSel(int nSelect)
  function GetSel (line 472) | int GetSel(int nIndex) const           // also works for single-selection
  function GetSelItems (line 492) | int GetSelItems(int nMaxItems, LPINT rgIndex) const
  function GetAnchorIndex (line 499) | int GetAnchorIndex() const
  function GetCaretIndex (line 513) | int GetCaretIndex() const
  function SetItemData (line 532) | int SetItemData(int nIndex, DWORD_PTR dwItemData)
  function SetItemDataPtr (line 544) | int SetItemDataPtr(int nIndex, void* pData)
  function GetItemRect (line 550) | int GetItemRect(int nIndex, LPRECT lpRect) const
  function GetText (line 556) | int GetText(int nIndex, LPTSTR lpszBuffer) const
  function BOOL (line 563) | BOOL GetTextBSTR(int nIndex, BSTR& bstrText) const
  function GetText (line 587) | int GetText(int nIndex, ATL::CString& strText) const
  function GetTextLen (line 604) | int GetTextLen(int nIndex) const
  function GetItemHeight (line 610) | int GetItemHeight(int nIndex) const
  function SetItemHeight (line 616) | int SetItemHeight(int nIndex, UINT cyItemHeight)
  function SetColumnWidth (line 623) | void SetColumnWidth(int cxWidth)
  function BOOL (line 629) | BOOL SetTabStops(int nTabStops, LPINT rgTabStops)
  function BOOL (line 636) | BOOL SetTabStops()
  function BOOL (line 643) | BOOL SetTabStops(const int& cxEachStop)    // takes an 'int'
  function InitStorage (line 651) | int InitStorage(int nItems, UINT nBytes)
  function ResetContent (line 657) | void ResetContent()
  function UINT (line 663) | UINT ItemFromPoint(POINT pt, BOOL& bOutside) const
  function AddString (line 672) | int AddString(LPCTSTR lpszItem)
  function DeleteString (line 678) | int DeleteString(UINT nIndex)
  function InsertString (line 684) | int InsertString(int nIndex, LPCTSTR lpszItem)
  function Dir (line 690) | int Dir(UINT attr, LPCTSTR lpszWildCard)
  function AddFile (line 696) | int AddFile(LPCTSTR lpstrFileName)
  function FindString (line 703) | int FindString(int nStartAfter, LPCTSTR lpszItem) const
  function FindStringExact (line 709) | int FindStringExact(int nIndexStart, LPCTSTR lpszFind) const
  function SelectString (line 715) | int SelectString(int nStartAfter, LPCTSTR lpszItem)
  function SelItemRange (line 721) | int SelItemRange(BOOL bSelect, int nFirstItem, int nLastItem)
  type CListBoxT (line 730) | typedef CListBoxT<ATL::CWindow>   CListBox;
  function LPCTSTR (line 758) | static LPCTSTR GetWndClassName()
  function GetCount (line 764) | int GetCount() const
  function SetCurSel (line 776) | int SetCurSel(int nSelect)
  function LCID (line 782) | LCID GetLocale() const
  function GetTopIndex (line 794) | int GetTopIndex() const
  function UINT (line 806) | UINT GetHorizontalExtent() const
  function GetDroppedWidth (line 818) | int GetDroppedWidth() const
  function BOOL (line 830) | BOOL GetComboBoxInfo(PCOMBOBOXINFO pComboBoxInfo) const
  function DWORD (line 837) | DWORD GetEditSel() const
  function DWORD_PTR (line 850) | DWORD_PTR GetItemData(int nIndex) const
  function SetItemData (line 856) | int SetItemData(int nIndex, DWORD_PTR dwItemData)
  function SetItemDataPtr (line 868) | int SetItemDataPtr(int nIndex, void* pData)
  function GetLBText (line 874) | int GetLBText(int nIndex, LPTSTR lpszText) const
  function BOOL (line 880) | BOOL GetLBTextBSTR(int nIndex, BSTR& bstrText) const
  function GetLBText (line 903) | int GetLBText(int nIndex, ATL::CString& strText) const
  function GetLBTextLen (line 920) | int GetLBTextLen(int nIndex) const
  function GetItemHeight (line 926) | int GetItemHeight(int nIndex) const
  function SetItemHeight (line 932) | int SetItemHeight(int nIndex, UINT cyItemHeight)
  function BOOL (line 938) | BOOL GetExtendedUI() const
  function BOOL (line 956) | BOOL GetDroppedState() const
  function BOOL (line 968) | BOOL SetMinVisible(int nMinVisible)
  function BOOL (line 975) | BOOL GetCueBannerText(LPWSTR lpwText, int cchText) const
  function BOOL (line 982) | BOOL SetCueBannerText(LPCWSTR lpcwText)
  function InitStorage (line 989) | int InitStorage(int nItems, UINT nBytes)
  function ResetContent (line 995) | void ResetContent()
  function BOOL (line 1002) | BOOL LimitText(int nMaxChars)
  function AddString (line 1016) | int AddString(LPCTSTR lpszString)
  function DeleteString (line 1022) | int DeleteString(UINT nIndex)
  function InsertString (line 1028) | int InsertString(int nIndex, LPCTSTR lpszString)
  function Dir (line 1034) | int Dir(UINT attr, LPCTSTR lpszWildCard)
  function FindString (line 1041) | int FindString(int nStartAfter, LPCTSTR lpszString) const
  function FindStringExact (line 1047) | int FindStringExact(int nIndexStart, LPCTSTR lpszFind) const
  function SelectString (line 1053) | int SelectString(int nStartAfter, LPCTSTR lpszString)
  function Clear (line 1060) | void Clear()
  function Copy (line 1066) | void Copy()
  function Cut (line 1072) | void Cut()
  function Paste (line 1078) | void Paste()
  type CComboBoxT (line 1085) | typedef CComboBoxT<ATL::CWindow>   CComboBox;
  function LPCTSTR (line 1113) | static LPCTSTR GetWndClassName()
  function BOOL (line 1118) | BOOL CanUndo() const
  function BOOL (line 1130) | BOOL GetModify() const
  function DWORD (line 1148) | DWORD GetSel() const
  function HLOCAL (line 1160) | HLOCAL GetHandle() const
  function DWORD (line 1172) | DWORD GetMargins() const
  function UINT (line 1192) | UINT GetLimitText() const
  function POINT (line 1204) | POINT PosFromChar(UINT nChar) const
  function GetLine (line 1222) | int GetLine(int nIndex, LPTSTR lpszBuffer) const
  function GetLine (line 1228) | int GetLine(int nIndex, LPTSTR lpszBuffer, int nMaxLength) const
  function TCHAR (line 1235) | TCHAR GetPasswordChar() const
  function EDITWORDBREAKPROC (line 1247) | EDITWORDBREAKPROC GetWordBreakProc() const
  function GetFirstVisibleLine (line 1259) | int GetFirstVisibleLine() const
  function UINT (line 1278) | UINT GetImeStatus(UINT uStatus) const
  function UINT (line 1284) | UINT SetImeStatus(UINT uStatus, UINT uData)
  function BOOL (line 1290) | BOOL GetCueBannerText(LPCWSTR lpstrText, int cchText) const
  function EmptyUndoBuffer (line 1304) | void EmptyUndoBuffer()
  function BOOL (line 1310) | BOOL FmtLines(BOOL bAddEOL)
  function SetRect (line 1352) | void SetRect(LPCRECT lpRect)
  function SetRectNP (line 1358) | void SetRectNP(LPCRECT lpRect)
  function BOOL (line 1390) | BOOL SetTabStops(int nTabStops, LPINT rgTabStops)
  function BOOL (line 1396) | BOOL SetTabStops()
  function BOOL (line 1402) | BOOL SetTabStops(const int& cxEachStop)    // takes an 'int'
  function ScrollCaret (line 1408) | void ScrollCaret()
  function Scroll (line 1414) | int Scroll(int nScrollAction)
  function BOOL (line 1436) | BOOL ShowBalloonTip(PEDITBALLOONTIP pEditBaloonTip)
  function BOOL (line 1442) | BOOL HideBalloonTip()
  function DWORD (line 1449) | DWORD GetHilite() const
  function SetHilite (line 1463) | void SetHilite(int nStartChar, int nEndChar)
  function BOOL (line 1471) | BOOL Undo()
  function Clear (line 1477) | void Clear()
  function Copy (line 1483) | void Copy()
  function Cut (line 1489) | void Cut()
  function Paste (line 1495) | void Paste()
  function DWORD (line 1503) | DWORD SetExtendedStyle(DWORD dwStyle, DWORD dwMask)
  function DWORD (line 1509) | DWORD GetExtendedStyle() const
  function EC_ENDOFLINE (line 1521) | EC_ENDOFLINE GetEndOfLine() const
  function SearchWeb (line 1533) | void SearchWeb()
  function BOOL (line 1539) | BOOL SetCaretIndex(DWORD dwCaretIndex)
  function DWORD (line 1545) | DWORD GetCaretIndex() const
  function BOOL (line 1557) | BOOL SetZoom(int nNum, int nDen)
  function DWORD (line 1565) | DWORD GetFileLineFromChar(DWORD dwCharIndex) const
  function DWORD (line 1571) | DWORD GetFileLineIndex(DWORD dwLineNum) const
  function DWORD (line 1577) | DWORD GetFileLineLength(DWORD dwCharIndex) const
  function DWORD (line 1583) | DWORD GetFileLine(DWORD dwLineNum, LPTSTR lpstrLine, WORD wLen) const
  function DWORD (line 1611) | DWORD GetFileLineCount() const
  function LRESULT (line 1668) | LRESULT OnEditCopy(WORD /*wNotifyCode*/, WORD /*wID*/, HWND /*hWndCtl*/,...
  function LRESULT (line 1682) | LRESULT OnEditPaste(WORD /*wNotifyCode*/, WORD /*wID*/, HWND /*hWndCtl*/...
  function LRESULT (line 1696) | LRESULT OnEditUndo(WORD /*wNotifyCode*/, WORD /*wID*/, HWND /*hWndCtl*/,...
  function BOOL (line 1729) | BOOL HasSelection() const
  function LPCTSTR (line 1770) | static LPCTSTR GetWndClassName()
  function GetScrollPos (line 1775) | int GetScrollPos() const
  function BOOL (line 1799) | BOOL GetScrollInfo(LPSCROLLINFO lpScrollInfo) const
  function BOOL (line 1821) | BOOL GetScrollBarInfo(PSCROLLBARINFO pScrollBarInfo) const
  function BOOL (line 1834) | BOOL EnableScrollBar(UINT nArrowFlags = ESB_ENABLE_BOTH)
  type CScrollBarT (line 1841) | typedef CScrollBarT<ATL::CWindow>   CScrollBar;
  type CImageListT (line 1851) | typedef CImageListT<false>   CImageList;
  type CImageListT (line 1852) | typedef CImageListT<true>    CImageListManaged;
  function Attach (line 1878) | void Attach(HIMAGELIST hImageList)
  function HIMAGELIST (line 1885) | HIMAGELIST Detach()
  function COLORREF (line 1909) | COLORREF SetBkColor(COLORREF cr)
  function BOOL (line 1915) | BOOL GetImageInfo(int nImage, IMAGEINFO* pImageInfo) const
  function HICON (line 1921) | HICON GetIcon(int nIndex, UINT uFlags = ILD_NORMAL) const
  function BOOL (line 1927) | BOOL GetIconSize(int& cx, int& cy) const
  function BOOL (line 1933) | BOOL GetIconSize(SIZE& size) const
  function BOOL (line 1939) | BOOL SetIconSize(int cx, int cy)
  function BOOL (line 1945) | BOOL SetIconSize(SIZE size)
  function BOOL (line 1951) | BOOL SetImageCount(UINT uNewCount)
  function BOOL (line 1957) | BOOL SetOverlayImage(int nImage, int nOverlay)
  function BOOL (line 1964) | BOOL Create(int cx, int cy, UINT nFlags, int nInitial, int nGrow)
  function BOOL (line 1971) | BOOL Create(ATL::_U_STRINGorID bitmap, int cx, int nGrow, COLORREF crMask)
  function BOOL (line 1985) | BOOL Merge(HIMAGELIST hImageList1, int nImage1, HIMAGELIST hImageList2, ...
  function BOOL (line 1993) | BOOL CreateFromStream(LPSTREAM lpStream)
  function BOOL (line 2001) | BOOL Destroy()
  function Add (line 2017) | int Add(HBITMAP hBitmap, COLORREF crMask)
  function BOOL (line 2023) | BOOL Remove(int nImage)
  function BOOL (line 2029) | BOOL RemoveAll()
  function BOOL (line 2035) | BOOL Replace(int nImage, HBITMAP hBitmap, HBITMAP hBitmapMask)
  function AddIcon (line 2041) | int AddIcon(HICON hIcon)
  function ReplaceIcon (line 2047) | int ReplaceIcon(int nImage, HICON hIcon)
  function HICON (line 2053) | HICON ExtractIcon(int nImage)
  function BOOL (line 2059) | BOOL Draw(HDC hDC, int nImage, int x, int y, UINT nStyle)
  function BOOL (line 2066) | BOOL Draw(HDC hDC, int nImage, POINT pt, UINT nStyle)
  function BOOL (line 2073) | BOOL DrawEx(int nImage, HDC hDC, int x, int y, int dx, int dy, COLORREF ...
  function BOOL (line 2080) | BOOL DrawEx(int nImage, HDC hDC, RECT& rect, COLORREF rgbBk, COLORREF rg...
  function BOOL (line 2087) | static BOOL DrawIndirect(IMAGELISTDRAWPARAMS* pimldp)
  function BOOL (line 2092) | BOOL Copy(int nSrc, int nDst, UINT uFlags = ILCF_MOVE)
  function HIMAGELIST (line 2099) | static HIMAGELIST Read(LPSTREAM lpStream)
  function BOOL (line 2104) | BOOL Write(LPSTREAM lpStream)
  function HRESULT (line 2110) | static HRESULT ReadEx(DWORD dwFlags, LPSTREAM lpStream, REFIID riid, PVO...
  function HRESULT (line 2115) | HRESULT WriteEx(DWORD dwFlags, LPSTREAM lpStream)
  function BOOL (line 2123) | BOOL BeginDrag(int nImage, POINT ptHotSpot)
  function BOOL (line 2129) | BOOL BeginDrag(int nImage, int xHotSpot, int yHotSpot)
  function EndDrag (line 2135) | static void EndDrag()
  function BOOL (line 2140) | static BOOL DragMove(POINT pt)
  function BOOL (line 2145) | static BOOL DragMove(int x, int y)
  function BOOL (line 2150) | BOOL SetDragCursorImage(int nDrag, POINT ptHotSpot)
  function BOOL (line 2156) | BOOL SetDragCursorImage(int nDrag, int xHotSpot, int yHotSpot)
  function CImageList (line 2167) | static CImageList GetDragImage(LPPOINT lpPoint, LPPOINT lpPointHotSpot)
  function BOOL (line 2172) | static BOOL DragEnter(HWND hWnd, POINT point)
  function BOOL (line 2177) | static BOOL DragEnter(HWND hWnd, int x, int y)
  function BOOL (line 2182) | static BOOL DragLeave(HWND hWnd)
  function CImageList (line 2193) | static CImageList Duplicate(HIMAGELIST hImageList)
  function class (line 2204) | class CToolInfo : public TOOLINFO
  function operator (line 2212) | operator LPTOOLINFO() { return this; }
  function operator (line 2214) | operator LPARAM() { return (LPARAM)this; }
  function LPCTSTR (line 2263) | static LPCTSTR GetWndClassName()
  function GetText (line 2268) | void GetText(LPTOOLINFO lpToolInfo) const
  function BOOL (line 2282) | BOOL GetToolInfo(LPTOOLINFO lpToolInfo) const
  function BOOL (line 2288) | BOOL GetToolInfo(HWND hWnd, UINT_PTR nIDTool, UINT* puFlags, LPRECT lpRe...
  function SetToolInfo (line 2304) | void SetToolInfo(LPTOOLINFO lpToolInfo)
  function SetToolRect (line 2310) | void SetToolRect(LPTOOLINFO lpToolInfo)
  function SetToolRect (line 2316) | void SetToolRect(HWND hWnd, UINT_PTR nIDTool, LPCRECT lpRect)
  function GetToolCount (line 2326) | int GetToolCount() const
  function SetDelayTime (line 2338) | void SetDelayTime(DWORD dwType, int nTime)
  function GetMargin (line 2344) | void GetMargin(LPRECT lpRect) const
  function SetMargin (line 2350) | void SetMargin(LPRECT lpRect)
  function GetMaxTipWidth (line 2356) | int GetMaxTipWidth() const
  function COLORREF (line 2368) | COLORREF GetTipBkColor() const
  function COLORREF (line 2380) | COLORREF GetTipTextColor() const
  function BOOL (line 2392) | BOOL GetCurrentTool(LPTOOLINFO lpToolInfo) const
  function SIZE (line 2398) | SIZE GetBubbleSize(LPTOOLINFO lpToolInfo) const
  function BOOL (line 2406) | BOOL SetTitle(UINT_PTR uIcon, LPCTSTR lpstrTitle)
  function BOOL (line 2413) | BOOL SetTitle(HICON hIcon, LPCTSTR lpstrTitle)
  function GetTitle (line 2419) | void GetTitle(PTTGETTITLE pTTGetTitle) const
  function SetWindowTheme (line 2425) | void SetWindowTheme(LPCWSTR lpstrTheme)
  function Activate (line 2432) | void Activate(BOOL bActivate)
  function BOOL (line 2438) | BOOL AddTool(LPTOOLINFO lpToolInfo)
  function DelTool (line 2455) | void DelTool(LPTOOLINFO lpToolInfo)
  function BOOL (line 2470) | BOOL HitTest(LPTTHITTESTINFO lpHitTestInfo) const
  function BOOL (line 2476) | BOOL HitTest(HWND hWnd, POINT pt, LPTOOLINFO lpToolInfo) const
  function RelayEvent (line 2495) | void RelayEvent(LPMSG lpMsg)
  function UpdateTipText (line 2501) | void UpdateTipText(LPTOOLINFO lpToolInfo)
  function BOOL (line 2516) | BOOL EnumTools(UINT_PTR nTool, LPTOOLINFO lpToolInfo) const
  function Pop (line 2522) | void Pop()
  function TrackActivate (line 2528) | void TrackActivate(LPTOOLINFO lpToolInfo, BOOL bActivate)
  function TrackActivate (line 2534) | void TrackActivate(HWND hWnd, UINT_PTR nIDTool, BOOL bActivate)
  function TrackPosition (line 2543) | void TrackPosition(int xPos, int yPos)
  function Update (line 2549) | void Update()
  function BOOL (line 2555) | BOOL AdjustRect(LPRECT lpRect, BOOL bLarger /*= TRUE*/)
  function Popup (line 2561) | void Popup()
  type CToolTipCtrlT (line 2568) | typedef CToolTipCtrlT<ATL::CWindow>   CToolTipCtrl;
  function LPCTSTR (line 2596) | static LPCTSTR GetWndClassName()
  function GetItemCount (line 2601) | int GetItemCount() const
  function BOOL (line 2613) | BOOL SetItem(int nIndex, LPHDITEM pHeaderItem)
  function CImageList (line 2619) | CImageList GetImageList() const
  function BOOL (line 2643) | BOOL GetItemRect(int nIndex, LPRECT lpItemRect) const
  function SetHotDivider (line 2649) | int SetHotDivider(BOOL bPos, DWORD dwInputValue)
  function BOOL (line 2655) | BOOL GetUnicodeFormat() const
  function SetBitmapMargin (line 2673) | int SetBitmapMargin(int nWidth)
  function SetFilterChangeTimeout (line 2679) | int SetFilterChangeTimeout(DWORD dwTimeOut)
  function BOOL (line 2686) | BOOL GetItemDropDownRect(int nIndex, LPRECT lpRect) const
  function BOOL (line 2692) | BOOL GetOverflowRect(LPRECT lpRect) const
  function GetFocusedItem (line 2698) | int GetFocusedItem() const
  function InsertItem (line 2712) | int InsertItem(int nIndex, LPHDITEM phdi)
  function AddItem (line 2718) | int AddItem(LPHDITEM phdi)
  function BOOL (line 2723) | BOOL DeleteItem(int nIndex)
  function BOOL (line 2729) | BOOL Layout(HD_LAYOUT* pHeaderLayout)
  function HitTest (line 2735) | int HitTest(LPHDHITTESTINFO lpHitTestInfo) const
  function OrderToIndex (line 2741) | int OrderToIndex(int nOrder)
  function CImageList (line 2747) | CImageList CreateDragImage(int nIndex)
  function EditFilter (line 2753) | int EditFilter(int nColumn, BOOL bDiscardChanges)
  function ClearFilter (line 2759) | int ClearFilter(int nColumn)
  function ClearAllFilters (line 2765) | int ClearAllFilters()
  type CHeaderCtrlT (line 2772) | typedef CHeaderCtrlT<ATL::CWindow>   CHeaderCtrl;
  function LPCTSTR (line 2800) | static LPCTSTR GetWndClassName()
  function COLORREF (line 2805) | COLORREF GetBkColor() const
  function CImageList (line 2817) | CImageList GetImageList(int nImageListType) const
  function CImageList (line 2823) | CImageList SetImageList(HIMAGELIST hImageList, int nImageList)
  function GetItemCount (line 2829) | int GetItemCount() const
  function BOOL (line 2841) | BOOL GetItem(LPLVITEM pItem) const
  function BOOL (line 2847) | BOOL SetItem(const LVITEM* pItem)
  function BOOL (line 2853) | BOOL SetItem(int nItem, int nSubItem, UINT nMask, LPCTSTR lpszItem,
  function UINT (line 2869) | UINT GetItemState(int nItem, UINT nMask) const
  function BOOL (line 2875) | BOOL SetItemState(int nItem, UINT nState, UINT nStateMask)
  function BOOL (line 2884) | BOOL SetItemState(int nItem, LPLVITEM pItem)
  function BOOL (line 2890) | BOOL GetItemText(int nItem, int nSubItem, BSTR& bstrText) const
  function GetItemText (line 2926) | int GetItemText(int nItem, int nSubItem, ATL::CString& strText) const
  function GetItemText (line 2952) | int GetItemText(int nItem, int nSubItem, LPTSTR lpszText, int nLen) const
  function BOOL (line 2962) | BOOL SetItemText(int nItem, int nSubItem, LPCTSTR lpszText)
  function DWORD_PTR (line 2968) | DWORD_PTR GetItemData(int nItem) const
  function BOOL (line 2978) | BOOL SetItemData(int nItem, DWORD_PTR dwData)
  function UINT (line 2984) | UINT GetCallbackMask() const
  function BOOL (line 2996) | BOOL GetItemPosition(int nItem, LPPOINT lpPoint) const
  function BOOL (line 3002) | BOOL SetItemPosition(int nItem, POINT pt)
  function BOOL (line 3009) | BOOL SetItemPosition(int nItem, int x, int y)
  function GetStringWidth (line 3017) | int GetStringWidth(LPCTSTR lpsz) const
  function CEdit (line 3023) | CEdit GetEditControl() const
  function GetColumnWidth (line 3041) | int GetColumnWidth(int nCol) const
  function BOOL (line 3047) | BOOL SetColumnWidth(int nCol, int cx)
  function BOOL (line 3053) | BOOL GetViewRect(LPRECT lpRect) const
  function COLORREF (line 3059) | COLORREF GetTextColor() const
  function COLORREF (line 3071) | COLORREF GetTextBkColor() const
  function GetTopIndex (line 3083) | int GetTopIndex() const
  function BOOL (line 3095) | BOOL GetOrigin(LPPOINT lpPoint) const
  function UINT (line 3101) | UINT GetSelectedCount() const
  function HCURSOR (line 3114) | HCURSOR GetHotCursor() const
  function GetHotItem (line 3126) | int GetHotItem() const
  function BOOL (line 3138) | BOOL GetColumnOrderArray(int nCount, int* lpnArray) const
  function BOOL (line 3144) | BOOL SetColumnOrderArray(int nCount, int* lpnArray)
  function CHeaderCtrl (line 3150) | CHeaderCtrl GetHeader() const
  function GetISearchString (line 3173) | int GetISearchString(LPTSTR lpstr) const
  function GetSelectedIndex (line 3188) | int GetSelectedIndex() const
  function DWORD (line 3207) | DWORD GetExtendedListViewStyle() const
  function BOOL (line 3229) | BOOL SetCheckState(int nItem, BOOL bCheck)
  function DWORD (line 3236) | DWORD GetViewType() const
  function BOOL (line 3252) | BOOL GetBkImage(LPLVBKIMAGE plvbki) const
  function BOOL (line 3258) | BOOL SetBkImage(LPLVBKIMAGE plvbki)
  function GetSelectionMark (line 3264) | int GetSelectionMark() const
  function BOOL (line 3276) | BOOL GetWorkAreas(int nWorkAreas, LPRECT lpRect) const
  function BOOL (line 3282) | BOOL SetWorkAreas(int nWorkAreas, LPRECT lpRect)
  function DWORD (line 3288) | DWORD GetHoverTime() const
  function BOOL (line 3302) | BOOL GetNumberOfWorkAreas(int* pnWorkAreas) const
  function BOOL (line 3308) | BOOL SetItemCountEx(int nItems, DWORD dwFlags)
  function CToolTipCtrl (line 3315) | CToolTipCtrl GetToolTips() const
  function SetSelectedColumn (line 3345) | void SetSelectedColumn(int nColumn)
  function DWORD (line 3351) | DWORD GetView() const
  function BOOL (line 3363) | BOOL IsGroupViewEnabled() const
  function SetGroupInfo (line 3375) | int SetGroupInfo(int nGroupID, PLVGROUP pGroup)
  function GetGroupMetrics (line 3381) | void GetGroupMetrics(PLVGROUPMETRICS pGroupMetrics) const
  function SetGroupMetrics (line 3387) | void SetGroupMetrics(PLVGROUPMETRICS pGroupMetrics)
  function GetTileViewInfo (line 3393) | void GetTileViewInfo(PLVTILEVIEWINFO pTileViewInfo) const
  function BOOL (line 3399) | BOOL SetTileViewInfo(PLVTILEVIEWINFO pTileViewInfo)
  function GetTileInfo (line 3405) | void GetTileInfo(PLVTILEINFO pTileInfo) const
  function BOOL (line 3411) | BOOL SetTileInfo(PLVTILEINFO pTileInfo)
  function BOOL (line 3417) | BOOL GetInsertMark(LPLVINSERTMARK pInsertMark) const
  function BOOL (line 3423) | BOOL SetInsertMark(LPLVINSERTMARK pInsertMark)
  function GetInsertMarkRect (line 3429) | int GetInsertMarkRect(LPRECT lpRect) const
  function COLORREF (line 3435) | COLORREF GetInsertMarkColor() const
  function COLORREF (line 3447) | COLORREF GetOutlineColor() const
  function GetGroupCount (line 3460) | int GetGroupCount() const
  function BOOL (line 3472) | BOOL GetGroupRect(int nGroupID, int nType, LPRECT lpRect) const
  function UINT (line 3481) | UINT GetGroupState(int nGroupID, UINT uMask) const
  function GetFocusedGroup (line 3487) | int GetFocusedGroup() const
  function BOOL (line 3499) | BOOL GetFooterRect(LPRECT lpRect) const
  function BOOL (line 3505) | BOOL GetFooterInfo(LPLVFOOTERINFO lpFooterInfo) const
  function BOOL (line 3511) | BOOL GetFooterItemRect(int nItem, LPRECT lpRect) const
  function BOOL (line 3517) | BOOL GetFooterItem(int nItem, LPLVFOOTERITEM lpFooterItem) const
  function BOOL (line 3523) | BOOL GetItemIndexRect(PLVITEMINDEX pItemIndex, int nSubItem, int nType, ...
  function BOOL (line 3536) | BOOL SetItemIndexState(PLVITEMINDEX pItemIndex, UINT uState, UINT dwMask)
  function BOOL (line 3545) | BOOL GetNextItemIndex(PLVITEMINDEX pItemIndex, WORD wFlags) const
  function InsertColumn (line 3553) | int InsertColumn(int nCol, const LVCOLUMN* pColumn)
  function BOOL (line 3589) | BOOL DeleteColumn(int nCol)
  function InsertItem (line 3595) | int InsertItem(UINT nMask, int nItem, LPCTSTR lpszItem, UINT nState, UIN...
  function InsertItem (line 3610) | int InsertItem(const LVITEM* pItem)
  function InsertItem (line 3616) | int InsertItem(int nItem, LPCTSTR lpszItem)
  function InsertItem (line 3622) | int InsertItem(int nItem, LPCTSTR lpszItem, int nImage)
  function GetNextItem (line 3628) | int GetNextItem(int nItem, int nFlags) const
  function BOOL (line 3634) | BOOL DeleteItem(int nItem)
  function BOOL (line 3640) | BOOL DeleteAllItems()
  function HitTest (line 3661) | int HitTest(LVHITTESTINFO* pHitTestInfo) const
  function HitTest (line 3667) | int HitTest(POINT pt, UINT* pFlags) const
  function BOOL (line 3678) | BOOL EnsureVisible(int nItem, BOOL bPartialOK)
  function BOOL (line 3684) | BOOL Scroll(int cx, int cy)
  function BOOL (line 3690) | BOOL Scroll(SIZE size)
  function BOOL (line 3696) | BOOL RedrawItems(int nFirst, int nLast)
  function BOOL (line 3702) | BOOL Arrange(UINT nCode)
  function CEdit (line 3708) | CEdit EditLabel(int nItem)
  function BOOL (line 3714) | BOOL Update(int nItem)
  function BOOL (line 3720) | BOOL SortItems(PFNLVCOMPARE pfnCompare, LPARAM lParamSort)
  function CImageList (line 3726) | CImageList RemoveImageList(int nImageList)
  function CImageList (line 3732) | CImageList CreateDragImage(int nItem, LPPOINT lpPoint)
  function SubItemHitTest (line 3744) | int SubItemHitTest(LPLVHITTESTINFO lpInfo) const
  function BOOL (line 3784) | BOOL SortItemsEx(PFNLVCOMPARE pfnCompare, LPARAM lParamSort)
  function InsertGroup (line 3790) | int InsertGroup(int nItem, PLVGROUP pGroup)
  function AddGroup (line 3796) | int AddGroup(PLVGROUP pGroup)
  function RemoveGroup (line 3801) | int RemoveGroup(int nGroupID)
  function MoveGroup (line 3807) | void MoveGroup(int nGroupID, int nItem)
  function MoveItemToGroup (line 3813) | void MoveItemToGroup(int nItem, int nGroupID)
  function EnableGroupView (line 3819) | int EnableGroupView(BOOL bEnable)
  function InsertGroupSorted (line 3831) | void InsertGroupSorted(PLVINSERTGROUPSORTED pInsertGroupSorted)
  function RemoveAllGroups (line 3837) | void RemoveAllGroups()
  function BOOL (line 3843) | BOOL HasGroup(int nGroupID)
  function BOOL (line 3849) | BOOL InsertMarkHitTest(LPPOINT lpPoint, LPLVINSERTMARK pInsertMark) const
  function BOOL (line 3855) | BOOL SetInfoTip(PLVSETINFOTIP pSetInfoTip)
  function CancelEditLabel (line 3861) | void CancelEditLabel()
  function UINT (line 3867) | UINT MapIndexToID(int nIndex) const
  function MapIDToIndex (line 3873) | int MapIDToIndex(UINT uID) const
  function BOOL (line 3879) | BOOL IsItemVisible(int nItem) const
  function HitTestEx (line 3886) | int HitTestEx(LPLVHITTESTINFO lpHitTestInfo) const
  function HitTestEx (line 3892) | int HitTestEx(POINT pt, UINT* pFlags) const
  function SubItemHitTestEx (line 3903) | int SubItemHitTestEx(LPLVHITTESTINFO lpHitTestInfo) const
  function BOOL (line 3911) | BOOL SelectItem(int nIndex)   // -1 to select none
  type CListViewCtrlT (line 3947) | typedef CListViewCtrlT<ATL::CWindow>   CListViewCtrl;
  function LPCTSTR (line 3975) | static LPCTSTR GetWndClassName()
  function UINT (line 3980) | UINT GetCount() const
  function SetIndent (line 3992) | void SetIndent(UINT nIndent)
  function CImageList (line 3998) | CImageList GetImageList(int nImageListType = TVSIL_NORMAL) const
  function CImageList (line 4004) | CImageList SetImageList(HIMAGELIST hImageList, int nImageListType = TVSI...
  function BOOL (line 4010) | BOOL GetItem(LPTVITEM pItem) const
  function BOOL (line 4016) | BOOL SetItem(LPTVITEM pItem)
  function BOOL (line 4022) | BOOL SetItem(HTREEITEM hItem, UINT nMask, LPCTSTR lpszItem, int nImage,
  function BOOL (line 4038) | BOOL GetItemText(HTREEITEM hItem, LPTSTR lpstrText, int nLen) const
  function BOOL (line 4052) | BOOL GetItemText(HTREEITEM hItem, BSTR& bstrText) const
  function BOOL (line 4089) | BOOL GetItemText(HTREEITEM hItem, ATL::CString& strText) const
  function BOOL (line 4116) | BOOL SetItemText(HTREEITEM hItem, LPCTSTR lpszItem)
  function BOOL (line 4122) | BOOL GetItemImage(HTREEITEM hItem, int& nImage, int& nSelectedImage) const
  function BOOL (line 4137) | BOOL SetItemImage(HTREEITEM hItem, int nImage, int nSelectedImage)
  function UINT (line 4143) | UINT GetItemState(HTREEITEM hItem, UINT nStateMask) const
  function BOOL (line 4149) | BOOL SetItemState(HTREEITEM hItem, UINT nState, UINT nStateMask)
  function DWORD_PTR (line 4155) | DWORD_PTR GetItemData(HTREEITEM hItem) const
  function BOOL (line 4165) | BOOL SetItemData(HTREEITEM hItem, DWORD_PTR dwData)
  function CEdit (line 4171) | CEdit GetEditControl() const
  function BOOL (line 4190) | BOOL ItemHasChildren(HTREEITEM hItem) const
  function CToolTipCtrl (line 4200) | CToolTipCtrl GetToolTips() const
  function BOOL (line 4219) | BOOL GetCheckState(HTREEITEM hItem) const
  function BOOL (line 4227) | BOOL SetCheckState(HTREEITEM hItem, BOOL bCheck)
  function UINT (line 4236) | UINT GetCheckStateEx(HTREEITEM hItem) const
  function BOOL (line 4244) | BOOL SetCheckStateEx(HTREEITEM hItem, UINT uCheckState)
  function COLORREF (line 4252) | COLORREF GetBkColor() const
  function COLORREF (line 4264) | COLORREF GetInsertMarkColor() const
  function GetItemHeight (line 4276) | int GetItemHeight() const
  function GetScrollTime (line 4288) | int GetScrollTime() const
  function COLORREF (line 4300) | COLORREF GetTextColor() const
  function BOOL (line 4312) | BOOL GetUnicodeFormat() const
  function COLORREF (line 4330) | COLORREF SetLineColor(COLORREF clrNew /*= CLR_DEFAULT*/)
  function BOOL (line 4336) | BOOL GetItem(LPTVITEMEX pItem) const
  function BOOL (line 4342) | BOOL SetItem(LPTVITEMEX pItem)
  function DWORD (line 4348) | DWORD GetExtendedStyle() const
  function BOOL (line 4361) | BOOL SetAutoScrollInfo(UINT uPixPerSec, UINT uUpdateTime)
  function DWORD (line 4367) | DWORD GetSelectedCount() const
  function HTREEITEM (line 4382) | HTREEITEM InsertItem(LPTVINSERTSTRUCT lpInsertStruct)
  function HTREEITEM (line 4388) | HTREEITEM InsertItem(LPCTSTR lpszItem, int nImage,
  function HTREEITEM (line 4395) | HTREEITEM InsertItem(LPCTSTR lpszItem, HTREEITEM hParent, HTREEITEM hIns...
  function HTREEITEM (line 4401) | HTREEITEM InsertItem(UINT nMask, LPCTSTR lpszItem, int nImage,
  function BOOL (line 4419) | BOOL DeleteItem(HTREEITEM hItem)
  function BOOL (line 4425) | BOOL DeleteAllItems()
  function BOOL (line 4431) | BOOL Expand(HTREEITEM hItem, UINT nCode = TVE_EXPAND)
  function HTREEITEM (line 4437) | HTREEITEM GetNextItem(HTREEITEM hItem, UINT nCode) const
  function HTREEITEM (line 4443) | HTREEITEM GetChildItem(HTREEITEM hItem) const
  function HTREEITEM (line 4449) | HTREEITEM GetNextSiblingItem(HTREEITEM hItem) const
  function HTREEITEM (line 4455) | HTREEITEM GetPrevSiblingItem(HTREEITEM hItem) const
  function HTREEITEM (line 4461) | HTREEITEM GetParentItem(HTREEITEM hItem) const
  function HTREEITEM (line 4467) | HTREEITEM GetFirstVisibleItem() const
  function HTREEITEM (line 4479) | HTREEITEM GetPrevVisibleItem(HTREEITEM hItem) const
  function HTREEITEM (line 4485) | HTREEITEM GetSelectedItem() const
  function HTREEITEM (line 4497) | HTREEITEM GetRootItem() const
  function HTREEITEM (line 4509) | HTREEITEM GetNextSelectedItem(HTREEITEM hItem) const
  function BOOL (line 4515) | BOOL Select(HTREEITEM hItem, UINT nCode)
  function BOOL (line 4521) | BOOL SelectItem(HTREEITEM hItem)
  function BOOL (line 4527) | BOOL SelectDropTarget(HTREEITEM hItem)
  function BOOL (line 4533) | BOOL SelectSetFirstVisible(HTREEITEM hItem)
  function CEdit (line 4539) | CEdit EditLabel(HTREEITEM hItem)
  function BOOL (line 4545) | BOOL EndEditLabelNow(BOOL bCancel)
  function HTREEITEM (line 4551) | HTREEITEM HitTest(TVHITTESTINFO* pHitTestInfo) const
  function HTREEITEM (line 4557) | HTREEITEM HitTest(POINT pt, UINT* pFlags) const
  function BOOL (line 4574) | BOOL EnsureVisible(HTREEITEM hItem)
  function CImageList (line 4586) | CImageList RemoveImageList(int nImageList)
  function CImageList (line 4592) | CImageList CreateDragImage(HTREEITEM hItem)
  function BOOL (line 4598) | BOOL SetInsertMark(HTREEITEM hTreeItem, BOOL bAfter)
  function BOOL (line 4604) | BOOL RemoveInsertMark()
  function HTREEITEM (line 4610) | HTREEITEM MapAccIDToHTREEITEM(UINT uID) const
  function UINT (line 4616) | UINT MapHTREEITEMToAccID(HTREEITEM hTreeItem) const
  function ShowInfoTip (line 4623) | void ShowInfoTip(HTREEITEM hItem)
  type CTreeViewCtrlT (line 4631) | typedef CTreeViewCtrlT<ATL::CWindow>   CTreeViewCtrl;
  function operator (line 4657) | operator HTREEITEM() { return m_hTreeItem; }
  function GetImageIndex (line 4729) | int GetImageIndex() const;
  function LPCTSTR (line 5258) | static LPCTSTR GetWndClassName()
  function BOOL (line 5263) | BOOL IsButtonEnabled(int nID) const
  function BOOL (line 5269) | BOOL IsButtonChecked(int nID) const
  function BOOL (line 5275) | BOOL IsButtonPressed(int nID) const
  function BOOL (line 5281) | BOOL IsButtonHidden(int nID) const
  function BOOL (line 5287) | BOOL IsButtonIndeterminate(int nID) const
  function GetState (line 5293) | int GetState(int nID) const
  function BOOL (line 5299) | BOOL SetState(int nID, UINT nState)
  function BOOL (line 5305) | BOOL GetButton(int nIndex, LPTBBUTTON lpButton) const
  function GetButtonCount (line 5311) | int GetButtonCount() const
  function BOOL (line 5329) | BOOL SetButtonSize(SIZE size)
  function BOOL (line 5335) | BOOL SetButtonSize(int cx, int cy)
  function BOOL (line 5341) | BOOL SetBitmapSize(SIZE size)
  function BOOL (line 5347) | BOOL SetBitmapSize(int cx, int cy)
  function CToolTipCtrl (line 5353) | CToolTipCtrl GetToolTips() const
  function SetNotifyWnd (line 5365) | void SetNotifyWnd(HWND hWnd)
  function GetRows (line 5371) | int GetRows() const
  function BOOL (line 5383) | BOOL SetCmdID(int nIndex, UINT nID)
  function DWORD (line 5389) | DWORD GetBitmapFlags() const
  function GetButtonText (line 5401) | int GetButtonText(int nID, LPTSTR lpstrText) const
  function SetStyle (line 5455) | void SetStyle(DWORD dwStyle)
  function DWORD (line 5461) | DWORD GetButtonSize() const
  function BOOL (line 5475) | BOOL GetRect(int nID, LPRECT lpRect) const
  function GetTextRows (line 5481) | int GetTextRows() const
  function BOOL (line 5493) | BOOL SetIndent(int nIndent)
  function BOOL (line 5499) | BOOL SetMaxTextRows(int nMaxTextRows)
  function BOOL (line 5505) | BOOL GetAnchorHighlight() const
  function BOOL (line 5523) | BOOL SetButtonInfo(int nID, LPTBBUTTONINFO lptbbi)
  function BOOL (line 5529) | BOOL SetButtonInfo(int nID, DWORD dwMask, BYTE Style, BYTE State, LPCTST...
  function GetHotItem (line 5546) | int GetHotItem() const
  function BOOL (line 5558) | BOOL IsButtonHighlighted(int nButtonID) const
  function DWORD (line 5564) | DWORD SetDrawTextFlags(DWORD dwMask, DWORD dwFlags)
  function BOOL (line 5570) | BOOL GetColorScheme(LPCOLORSCHEME lpcs) const
  function SetColorScheme (line 5576) | void SetColorScheme(LPCOLORSCHEME lpcs)
  function DWORD (line 5582) | DWORD GetExtendedStyle() const
  function GetInsertMark (line 5594) | void GetInsertMark(LPTBINSERTMARK lptbim) const
  function SetInsertMark (line 5600) | void SetInsertMark(LPTBINSERTMARK lptbim)
  function COLORREF (line 5606) | COLORREF GetInsertMarkColor() const
  function BOOL (line 5618) | BOOL GetMaxSize(LPSIZE lpSize) const
  function GetPadding (line 5624) | void GetPadding(LPSIZE lpSizePadding) const
  function GetString (line 5656) | int GetString(int nString, LPTSTR lpstrString, int cchMaxLen) const
  function GetStringBSTR (line 5662) | int GetStringBSTR(int nString, BSTR& bstrString) const
  function GetString (line 5688) | int GetString(int nString, ATL::CString& str) const
  function GetMetrics (line 5705) | void GetMetrics(LPTBMETRICS lptbm) const
  function SetMetrics (line 5711) | void SetMetrics(LPTBMETRICS lptbm)
  function SetWindowTheme (line 5717) | void SetWindowTheme(LPCWSTR lpstrTheme)
  function GetItemDropDownRect (line 5736) | void GetItemDropDownRect(int nIndex, LPRECT lpRect) const
  function AddBitmap (line 5779) | int AddBitmap(int nNumButtons, UINT nBitmapID)
  function AddBitmap (line 5789) | int AddBitmap(int nNumButtons, HBITMAP hBitmap)
  function BOOL (line 5798) | BOOL AddButtons(int nNumButtons, LPCTBBUTTON lpButtons)
  function BOOL (line 5804) | BOOL InsertButton(int nIndex, LPCTBBUTTON lpButton)
  function BOOL (line 5810) | BOOL InsertButton(int nIndex, int iCommand, BYTE Style, BYTE State, int ...
  function BOOL (line 5824) | BOOL InsertButton(int nIndex, int iCommand, BYTE Style, BYTE State, int ...
  function BOOL (line 5830) | BOOL AddButton(LPTBBUTTON lpButton)
  function BOOL (line 5835) | BOOL AddButton(int iCommand, BYTE Style, BYTE State, int iBitmap, INT_PT...
  function BOOL (line 5840) | BOOL AddButton(int iCommand, BYTE Style, BYTE State, int iBitmap, LPCTST...
  function BOOL (line 5845) | BOOL DeleteButton(int nIndex)
  function CommandToIndex (line 5861) | int CommandToIndex(UINT nID) const
  function SaveState (line 5867) | void SaveState(HKEY hKeyRoot, LPCTSTR lpszSubKey, LPCTSTR lpszValueName)
  function RestoreState (line 5877) | void RestoreState(HKEY hKeyRoot, LPCTSTR lpszSubKey, LPCTSTR lpszValueName)
  function Customize (line 5887) | void Customize()
  function AddString (line 5893) | int AddString(UINT nStringID)
  function AddStrings (line 5899) | int AddStrings(LPCTSTR lpszStrings)
  function AutoSize (line 5905) | void AutoSize()
  function BOOL (line 5911) | BOOL ChangeBitmap(int nID, int nBitmap)
  function LoadImages (line 5917) | int LoadImages(int nBitmapID)
  function LoadStdImages (line 5923) | int LoadStdImages(int nBitmapID)
  function BOOL (line 5929) | BOOL ReplaceBitmap(LPTBREPLACEBITMAP ptbrb)
  function HitTest (line 5935) | int HitTest(LPPOINT lpPoint) const
  function BOOL (line 5941) | BOOL InsertMarkHitTest(LPPOINT lpPoint, LPTBINSERTMARK lptbim) const
  function BOOL (line 5947) | BOOL InsertMarkHitTest(int x, int y, LPTBINSERTMARK lptbim) const
  function BOOL (line 5954) | BOOL MapAccelerator(TCHAR chAccel, int& nID) const
  function BOOL (line 5966) | BOOL MoveButton(int nOldPos, int nNewPos)
  function HRESULT (line 5972) | HRESULT GetObject(REFIID iid, LPVOID* ppvObject)
  type CToolBarCtrlT (line 5979) | typedef CToolBarCtrlT<ATL::CWindow>   CToolBarCtrl;
  function LPCTSTR (line 6007) | static LPCTSTR GetWndClassName()
  function GetParts (line 6012) | int GetParts(int nParts, int* pParts) const
  function BOOL (line 6018) | BOOL SetParts(int nParts, int* pWidths)
  function BOOL (line 6089) | BOOL GetRect(int nPane, LPRECT lpRect) const
  function BOOL (line 6096) | BOOL GetBorders(int* pBorders) const
  function BOOL (line 6102) | BOOL GetBorders(int& nHorz, int& nVert, int& nSpacing) const
  function SetMinHeight (line 6116) | void SetMinHeight(int nMin)
  function BOOL (line 6134) | BOOL GetUnicodeFormat() const
  function SetTipText (line 6153) | void SetTipText(int nPane, LPCTSTR lpstrText)
  function COLORREF (line 6160) | COLORREF SetBkColor(COLORREF clrBk)
  function HICON (line 6166) | HICON GetIcon(int nPane) const
  function BOOL (line 6173) | BOOL SetIcon(int nPane, HICON hIcon)
  type CStatusBarCtrlT (line 6181) | typedef CStatusBarCtrlT<ATL::CWindow>   CStatusBarCtrl;
  function LPCTSTR (line 6209) | static LPCTSTR GetWndClassName()
  function CImageList (line 6214) | CImageList GetImageList() const
  function BOOL (line 6232) | BOOL GetItem(int nItem, LPTCITEM pTabCtrlItem) const
  function BOOL (line 6238) | BOOL SetItem(int nItem, LPTCITEM pTabCtrlItem)
  function SetItem (line 6244) | int SetItem(int nItem, UINT mask, LPCTSTR lpszItem, DWORD dwState, DWORD...
  function BOOL (line 6257) | BOOL GetItemRect(int nItem, LPRECT lpRect) const
  function GetCurSel (line 6263) | int GetCurSel() const
  function SIZE (line 6275) | SIZE SetItemSize(SIZE size)
  function SetItemSize (line 6283) | void SetItemSize(int cx, int cy)
  function SetPadding (line 6289) | void SetPadding(SIZE size)
  function GetRowCount (line 6295) | int GetRowCount() const
  function SetToolTips (line 6310) | void SetToolTips(HWND hWndToolTip)
  function SetTooltips (line 6317) | void SetTooltips(HWND hWndToolTip) { SetToolTips(hWndToolTip); }
  function GetCurFocus (line 6319) | int GetCurFocus() const
  function BOOL (line 6331) | BOOL SetItemExtra(int cbExtra)
  function DWORD (line 6350) | DWORD SetExtendedStyle(DWORD dwExMask, DWORD dwExStyle)
  function BOOL (line 6356) | BOOL GetUnicodeFormat() const
  function InsertItem (line 6375) | int InsertItem(int nItem, UINT mask, LPCTSTR lpszItem, int iImage, LPARA...
  function InsertItem (line 6386) | int InsertItem(int nItem, LPCTSTR lpszItem)
  function AddItem (line 6395) | int AddItem(LPTCITEM pTabCtrlItem)
  function AddItem (line 6400) | int AddItem(UINT mask, LPCTSTR lpszItem, int iImage, LPARAM lParam)
  function AddItem (line 6405) | int AddItem(LPCTSTR lpszItem)
  function BOOL (line 6410) | BOOL DeleteItem(int nItem)
  function BOOL (line 6416) | BOOL DeleteAllItems()
  function AdjustRect (line 6422) | void AdjustRect(BOOL bLarger, LPRECT lpRect)
  function RemoveImage (line 6428) | void RemoveImage(int nImage)
  function HitTest (line 6434) | int HitTest(TC_HITTESTINFO* pHitTestInfo) const
  type CTabCtrlT (line 6453) | typedef CTabCtrlT<ATL::CWindow>   CTabCtrl;
  function LPCTSTR (line 6481) | static LPCTSTR GetWndClassName()
  function GetLineSize (line 6486) | int GetLineSize() const
  function GetPageSize (line 6498) | int GetPageSize() const
  function GetRangeMin (line 6510) | int GetRangeMin() const
  function GetRange (line 6534) | void GetRange(int& nMin, int& nMax) const
  function GetSelStart (line 6546) | int GetSelStart() const
  function GetSelection (line 6570) | void GetSelection(int& nMin, int& nMax) const
  function GetChannelRect (line 6582) | void GetChannelRect(LPRECT lprc) const
  function GetThumbRect (line 6588) | void GetThumbRect(LPRECT lprc) const
  function GetPos (line 6594) | int GetPos() const
  function UINT (line 6606) | UINT GetNumTics() const
  function GetTic (line 6618) | int GetTic(int nTic) const
  function BOOL (line 6624) | BOOL SetTic(int nTic)
  function GetTicPos (line 6630) | int GetTicPos(int nTic) const
  function SetTicFreq (line 6636) | void SetTicFreq(int nFreq)
  function GetThumbLength (line 6642) | int GetThumbLength() const
  function SetToolTips (line 6679) | void SetToolTips(HWND hWndTT)
  function SetTipSide (line 6685) | int SetTipSide(int nSide)
  function BOOL (line 6691) | BOOL GetUnicodeFormat() const
  type CTrackBarCtrlT (line 6723) | typedef CTrackBarCtrlT<ATL::CWindow>   CTrackBarCtrl;
  function LPCTSTR (line 6751) | static LPCTSTR GetWndClassName()
  function UINT (line 6756) | UINT GetAccel(int nAccel, UDACCEL* pAccel) const
  function BOOL (line 6762) | BOOL SetAccel(int nAccel, UDACCEL* pAccel)
  function UINT (line 6768) | UINT GetBase() const
  function SetPos (line 6803) | int SetPos(int nPos)
  function DWORD (line 6809) | DWORD GetRange() const
  function SetRange (line 6823) | void SetRange(int nLower, int nUpper)
  function SetRange32 (line 6829) | void SetRange32(int nLower, int nUpper)
  function GetRange32 (line 6835) | void GetRange32(int& nLower, int& nUpper) const
  function BOOL (line 6841) | BOOL GetUnicodeFormat() const
  function SetPos32 (line 6861) | int SetPos32(int nPos)
  type CUpDownCtrlT (line 6868) | typedef CUpDownCtrlT<ATL::CWindow>   CUpDownCtrl;
  function LPCTSTR (line 6896) | static LPCTSTR GetWndClassName()
  function DWORD (line 6901) | DWORD SetRange(int nLower, int nUpper)
  function SetPos (line 6907) | int SetPos(int nPos)
  function OffsetPos (line 6913) | int OffsetPos(int nPos)
  function SetStep (line 6919) | int SetStep(int nStep)
  function UINT (line 6925) | UINT GetPos() const
  function GetRange (line 6938) | void GetRange(int& nLower, int& nUpper) const
  function GetRangeLimit (line 6947) | int GetRangeLimit(BOOL bLowLimit) const
  function DWORD (line 6953) | DWORD SetRange32(int nMin, int nMax)
  function COLORREF (line 6959) | COLORREF SetBarColor(COLORREF clr)
  function COLORREF (line 6965) | COLORREF SetBkColor(COLORREF clr)
  function GetStep (line 6980) | int GetStep() const
  function COLORREF (line 6992) | COLORREF GetBarColor() const
  function SetState (line 7004) | int SetState(int nState)
  function StepIt (line 7012) | int StepIt()
  type CProgressBarCtrlT (line 7019) | typedef CProgressBarCtrlT<ATL::CWindow>   CProgressBarCtrl;
  function LPCTSTR (line 7047) | static LPCTSTR GetWndClassName()
  function DWORD (line 7052) | DWORD GetHotKey() const
  function SetHotKey (line 7066) | void SetHotKey(WORD wVirtualKeyCode, WORD wModifiers)
  function SetRules (line 7072) | void SetRules(WORD wInvalidComb, WORD wModifiers)
  type CHotKeyCtrlT (line 7079) | typedef CHotKeyCtrlT<ATL::CWindow>   CHotKeyCtrl;
  function LPCTSTR (line 7107) | static LPCTSTR GetWndClassName()
  function BOOL (line 7113) | BOOL Open(ATL::_U_STRINGorID FileName)
  function BOOL (line 7119) | BOOL Play(UINT nFrom, UINT nTo, UINT nRep)
  function BOOL (line 7125) | BOOL Stop()
  function BOOL (line 7131) | BOOL Close()
  function BOOL (line 7137) | BOOL Seek(UINT nTo)
  function BOOL (line 7144) | BOOL IsPlaying() const
  function LPCTSTR (line 7184) | static LPCTSTR GetWndClassName()
  function LPCTSTR (line 7193) | static LPCTSTR GetLibraryName()
  function GetLineCount (line 7202) | int GetLineCount() const
  function GetRect (line 7220) | void GetRect(LPRECT lpRect) const
  function DWORD (line 7226) | DWORD GetOptions() const
  function GetLine (line 7239) | int GetLine(int nIndex, LPTSTR lpszBuffer) const
  function GetLine (line 7245) | int GetLine(int nIndex, LPTSTR lpszBuffer, int nMaxLength) const
  function BOOL (line 7252) | BOOL CanUndo() const
  function GetSel (line 7264) | void GetSel(LONG& nStartChar, LONG& nEndChar) const
  function GetSel (line 7273) | void GetSel(CHARRANGE &cr) const
  function SetSel (line 7279) | int SetSel(LONG nStartChar, LONG nEndChar)
  function SetSel (line 7286) | int SetSel(CHARRANGE &cr)
  function SetSelAll (line 7292) | int SetSelAll()
  function SetSelNone (line 7297) | int SetSelNone()
  function DWORD (line 7302) | DWORD GetDefaultCharFormat(CHARFORMAT& cf) const
  function DWORD (line 7309) | DWORD GetSelectionCharFormat(CHARFORMAT& cf) const
  function DWORD (line 7316) | DWORD GetEventMask() const
  function DWORD (line 7328) | DWORD GetParaFormat(PARAFORMAT& pf) const
  function LONG (line 7335) | LONG GetSelText(LPTSTR lpstrBuff) const
  function BOOL (line 7341) | BOOL GetSelTextBSTR(BSTR& bstrText) const
  function LONG (line 7363) | LONG GetSelText(ATL::CString& strText) const
  function WORD (line 7382) | WORD GetSelectionType() const
  function COLORREF (line 7394) | COLORREF SetBackgroundColor()   // sets to system background
  function BOOL (line 7400) | BOOL SetCharFormat(CHARFORMAT& cf, WORD wFlags)
  function BOOL (line 7407) | BOOL SetDefaultCharFormat(CHARFORMAT& cf)
  function BOOL (line 7414) | BOOL SetSelectionCharFormat(CHARFORMAT& cf)
  function BOOL (line 7421) | BOOL SetWordCharFormat(CHARFORMAT& cf)
  function DWORD (line 7428) | DWORD SetEventMask(DWORD dwEventMask)
  function BOOL (line 7434) | BOOL SetParaFormat(PARAFORMAT& pf)
  function BOOL (line 7441) | BOOL SetTargetDevice(HDC hDC, int cxLineWidth)
  function GetTextLength (line 7447) | int GetTextLength() const
  function GetTextRange (line 7465) | int GetTextRange(TEXTRANGE* pTextRange) const
  function GetTextRange (line 7471) | int GetTextRange(LONG nStartChar, LONG nEndChar, LPTSTR lpstrText) const
  function DWORD (line 7481) | DWORD GetDefaultCharFormat(CHARFORMAT2& cf) const
  function BOOL (line 7488) | BOOL SetCharFormat(CHARFORMAT2& cf, WORD wFlags)
  function BOOL (line 7495) | BOOL SetDefaultCharFormat(CHARFORMAT2& cf)
  function DWORD (line 7502) | DWORD GetSelectionCharFormat(CHARFORMAT2& cf) const
  function BOOL (line 7509) | BOOL SetSelectionCharFormat(CHARFORMAT2& cf)
  function BOOL (line 7516) | BOOL SetWordCharFormat(CHARFORMAT2& cf)
  function DWORD (line 7523) | DWORD GetParaFormat(PARAFORMAT2& pf) const
  function BOOL (line 7530) | BOOL SetParaFormat(PARAFORMAT2& pf)
  function TEXTMODE (line 7537) | TEXTMODE GetTextMode() const
  function UNDONAMEID (line 7549) | UNDONAMEID GetUndoName() const
  function BOOL (line 7561) | BOOL CanRedo() const
  function UINT (line 7582) | UINT SetUndoLimit(UINT uUndoLimit)
  function SetPalette (line 7588) | void SetPalette(HPALETTE hPalette)
  function GetTextEx (line 7594) | int GetTextEx(GETTEXTEX* pGetTextEx, LPTSTR lpstrText) const
  function GetTextLengthEx (line 7612) | int GetTextLengthEx(GETTEXTLENGTHEX* pGetTextLengthEx) const
  function GetTextLengthEx (line 7618) | int GetTextLengthEx(DWORD dwFlags = GTL_DEFAULT, UINT uCodePage = CP_ACP...
  function EDITWORDBREAKPROC (line 7627) | EDITWORDBREAKPROC GetWordBreakProc() const
  function SetTextEx (line 7639) | int SetTextEx(SETTEXTEX* pSetTextEx, LPCTSTR lpstrText)
  function SetTextEx (line 7645) | int SetTextEx(LPCTSTR lpstrText, DWORD dwFlags = ST_DEFAULT, UINT uCodeP...
  function GetEditStyle (line 7654) | int GetEditStyle() const
  function GetScrollPos (line 7675) | void GetScrollPos(LPPOINT lpPoint) const
  function SetScrollPos (line 7682) | void SetScrollPos(LPPOINT lpPoint)
  function BOOL (line 7689) | BOOL GetZoom(int& nNum, int& nDen) const
  function BOOL (line 7695) | BOOL SetZoom(int nNum, int nDen)
  function BOOL (line 7703) | BOOL SetZoomOff()
  function WORD (line 7715) | WORD GetTypographyOptions() const
  function LineFromChar (line 7734) | int LineFromChar(LONG nIndex) const
  function POINT (line 7740) | POINT PosFromChar(LONG nChar) const
  function CharFromPos (line 7748) | int CharFromPos(POINT pt) const
  function EmptyUndoBuffer (line 7755) | void EmptyUndoBuffer()
  function BOOL (line 7773) | BOOL LineScroll(int nLines)
  function SetRect (line 7785) | void SetRect(LPCRECT lpRect)
  function BOOL (line 7791) | BOOL DisplayBand(LPRECT pDisplayRect)
  function LONG (line 7797) | LONG FindText(DWORD dwFlags, FINDTEXT& ft) const
  function LONG (line 7807) | LONG FindText(DWORD dwFlags, FINDTEXTEX& ft) const
  function RequestResize (line 7842) | void RequestResize()
  function LONG (line 7848) | LONG StreamIn(UINT uFormat, EDITSTREAM& es)
  function LONG (line 7854) | LONG StreamOut(UINT uFormat, EDITSTREAM& es)
  function DWORD (line 7860) | DWORD FindWordBreak(int nCode, LONG nStartChar)
  function ScrollCaret (line 7867) | void ScrollCaret()
  function BOOL (line 7886) | BOOL Undo()
  function Clear (line 7892) | void Clear()
  function Copy (line 7898) | void Copy()
  function Cut (line 7904) | void Cut()
  function Paste (line 7910) | void Paste()
  function IRichEditOle (line 7917) | IRichEditOle* GetOleInterface() const
  function BOOL (line 7925) | BOOL SetOleCallback(IRichEditOleCallback* pCallback)
  function BOOL (line 7931) | BOOL Redo()
  function StopGroupTyping (line 7937) | void StopGroupTyping()
  function BOOL (line 7949) | BOOL SetTabStops(int nTabStops, LPINT rgTabStops)
  function BOOL (line 7955) | BOOL SetTabStops()
  function BOOL (line 7961) | BOOL SetTabStops(const int& cxEachStop)    // takes an 'int'
  function AutoCorrectProc (line 7968) | AutoCorrectProc GetAutoCorrectProc() const
  function BOOL (line 7980) | BOOL CallAutoCorrectProc(WCHAR ch)
  function DWORD (line 7986) | DWORD GetEditStyleEx() const
  function DWORD (line 7998) | DWORD GetStoryType(int nStoryIndex) const
  function DWORD (line 8004) | DWORD SetStoryType(int nStoryIndex, DWORD dwStoryType)
  function DWORD (line 8010) | DWORD GetEllipsisMode() const
  function BOOL (line 8028) | BOOL GetEllipsisState() const
  function SetTouchOptions (line 8040) | void SetTouchOptions(int nTouchOptions, BOOL bEnable)
  function HRESULT (line 8046) | HRESULT InsertTable(TABLEROWPARMS* pRowParams, TABLECELLPARMS* pCellParams)
  function HRESULT (line 8052) | HRESULT GetTableParams(TABLEROWPARMS* pRowParams, TABLECELLPARMS* pCellP...
  function HRESULT (line 8058) | HRESULT SetTableParams(TABLEROWPARMS* pRowParams, TABLECELLPARMS* pCellP...
  function HRESULT (line 8064) | HRESULT InsertImage(RICHEDIT_IMAGE_PARAMETERS* pParams)
  function BOOL (line 8070) | BOOL SetUiaName(LPCTSTR lpstrName)
  type CRichEditCtrlT (line 8078) | typedef CRichEditCtrlT<ATL::CWindow>   CRichEditCtrl;
  function BOOL (line 8131) | BOOL HasSelection() const
  function DrawInsert (line 8180) | void DrawInsert(int nItem)
  function UINT (line 8186) | static UINT GetDragListMessage()
  type CDragListBoxT (line 8209) | typedef CDragListBoxT<ATL::CWindow>   CDragListBox;
  function OnCancelDrag (line 8254) | void OnCancelDrag(int /*nCtlID*/, HWND /*hWndDragList*/, POINT /*ptCurso...
  function OnDragging (line 8259) | int OnDragging(int /*nCtlID*/, HWND /*hWndDragList*/, POINT /*ptCursor*/)
  function OnDropped (line 8264) | void OnDropped(int /*nCtlID*/, HWND /*hWndDragList*/, POINT /*ptCursor*/)
  function LPCTSTR (line 8296) | static LPCTSTR GetWndClassName()
  function UINT (line 8301) | UINT GetBandCount() const
  function BOOL (line 8313) | BOOL SetBandInfo(int nBand, LPREBARBANDINFO lprbbi)
  function BOOL (line 8319) | BOOL GetBarInfo(LPREBARINFO lprbi) const
  function BOOL (line 8325) | BOOL SetBarInfo(LPREBARINFO lprbi)
  function CImageList (line 8331) | CImageList GetImageList() const
  function COLORREF (line 8363) | COLORREF GetTextColor() const
  function COLORREF (line 8375) | COLORREF GetBkColor() const
  function UINT (line 8387) | UINT GetBarHeight() const
  function CToolTipCtrl (line 8399) | CToolTipCtrl GetToolTips() const
  function GetBandBorders (line 8411) | void GetBandBorders(int nBand, LPRECT lpRect) const
  function BOOL (line 8418) | BOOL GetColorScheme(LPCOLORSCHEME lpColorScheme) const
  function SetColorScheme (line 8425) | void SetColorScheme(LPCOLORSCHEME lpColorScheme)
  function HPALETTE (line 8432) | HPALETTE GetPalette() const
  function BOOL (line 8444) | BOOL GetUnicodeFormat() const
  function SetWindowTheme (line 8466) | void SetWindowTheme(LPCWSTR lpstrTheme)
  function DWORD (line 8472) | DWORD GetExtendedStyle() const
  function BOOL (line 8485) | BOOL InsertBand(int nBand, LPREBARBANDINFO lprbbi)
  function BOOL (line 8491) | BOOL AddBand(LPREBARBANDINFO lprbbi)
  function BOOL (line 8496) | BOOL DeleteBand(int nBand)
  function BeginDrag (line 8508) | void BeginDrag(int nBand, DWORD dwPos)
  function BeginDrag (line 8514) | void BeginDrag(int nBand, int xPos, int yPos)
  function EndDrag (line 8520) | void EndDrag()
  function DragMove (line 8526) | void DragMove(DWORD dwPos)
  function DragMove (line 8532) | void DragMove(int xPos, int yPos)
  function GetDropTarget (line 8538) | void GetDropTarget(IDropTarget** ppDropTarget) const
  function MinimizeBand (line 8550) | void MinimizeBand(int nBand)
  function BOOL (line 8556) | BOOL SizeToRect(LPRECT lpRect)
  function IdToIndex (line 8562) | int IdToIndex(UINT uBandID) const
  function HitTest (line 8568) | int HitTest(LPRBHITTESTINFO lprbht) const
  function BOOL (line 8574) | BOOL ShowBand(int nBand, BOOL bShow)
  function BOOL (line 8580) | BOOL MoveBand(int nBand, int nNewPos)
  function PushChevron (line 8587) | void PushChevron(int nBand, LPARAM lAppValue)
  function LockBands (line 8594) | void LockBands(bool bLock)
  function BOOL (line 8623) | BOOL SetBandWidth(int nBand, int cxWidth)
  type CReBarCtrlT (line 8631) | typedef CReBarCtrlT<ATL::CWindow>   CReBarCtrl;
  function LPCTSTR (line 8659) | static LPCTSTR GetWndClassName()
  function CImageList (line 8664) | CImageList GetImageList() const
  function BOOL (line 8688) | BOOL GetUnicodeFormat() const
  function InsertItem (line 8707) | int InsertItem(const COMBOBOXEXITEM* lpcCBItem)
  function InsertItem (line 8713) | int InsertItem(UINT nMask, int nIndex, LPCTSTR lpszItem, int nImage, int...
  function AddItem (line 8743) | int AddItem(UINT nMask, LPCTSTR lpszItem, int nImage, int nSelImage, int...
  function DeleteItem (line 8753) | int DeleteItem(int nIndex)
  function BOOL (line 8759) | BOOL GetItem(PCOMBOBOXEXITEM pCBItem) const
  function BOOL (line 8765) | BOOL SetItem(const COMBOBOXEXITEM* lpcCBItem)
  function SetItem (line 8771) | int SetItem(int nIndex, UINT nMask, LPCTSTR lpszItem, int nImage, int nS...
  function BOOL (line 8787) | BOOL GetItemText(int nIndex, LPTSTR lpszItem, int nLen) const
  function BOOL (line 8801) | BOOL GetItemText(int nIndex, BSTR& bstrText) const
  function BOOL (line 8839) | BOOL GetItemText(int nIndex, ATL::CString& strText) const
  function BOOL (line 8867) | BOOL SetItemText(int nIndex, LPCTSTR lpszItem)
  function CComboBox (line 8873) | CComboBox GetComboCtrl() const
  function InsertString (line 8898) | int InsertString(int /*nIndex*/, LPCTSTR /*lpszString*/)
  function Dir (line 8904) | int Dir(UINT /*attr*/, LPCTSTR /*lpszWildCard*/)
  function FindString (line 8910) | int FindString(int /*nStartAfter*/, LPCTSTR /*lpszString*/) const
  type CComboBoxExT (line 8917) | typedef CComboBoxExT<ATL::CWindow>   CComboBoxEx;
  function LPCTSTR (line 8945) | static LPCTSTR GetWndClassName()
  function COLORREF (line 8950) | COLORREF GetColor(int nColorType) const
  function COLORREF (line 8956) | COLORREF SetColor(int nColorType, COLORREF clr)
  function BOOL (line 8962) | BOOL GetCurSel(LPSYSTEMTIME lpSysTime) const
  function BOOL (line 8968) | BOOL SetCurSel(LPSYSTEMTIME lpSysTime)
  function BOOL (line 8998) | BOOL SetMaxSelCount(int nMax)
  function GetMonthDelta (line 9004) | int GetMonthDelta() const
  function DWORD (line 9016) | DWORD GetRange(LPSYSTEMTIME lprgSysTimeArray) const
  function BOOL (line 9022) | BOOL SetRange(DWORD dwFlags, LPSYSTEMTIME lprgSysTimeArray)
  function BOOL (line 9028) | BOOL GetSelRange(LPSYSTEMTIME lprgSysTimeArray) const
  function BOOL (line 9034) | BOOL SetSelRange(LPSYSTEMTIME lprgSysTimeArray)
  function BOOL (line 9040) | BOOL GetToday(LPSYSTEMTIME lpSysTime) const
  function SetToday (line 9046) | void SetToday(LPSYSTEMTIME lpSysTime)
  function BOOL (line 9052) | BOOL GetMinReqRect(LPRECT lpRectInfo) const
  function GetMaxTodayWidth (line 9058) | int GetMaxTodayWidth() const
  function DWORD (line 9077) | DWORD GetCurrentView() const
  function DWORD (line 9089) | DWORD GetCalendarCount() const
  function CALID (line 9101) | CALID GetCALID() const
  function GetCalendarBorder (line 9113) | int GetCalendarBorder() const
  function BOOL (line 9133) | BOOL SetDayState(int nMonths, LPMONTHDAYSTATE lpDayStateArray)
  function DWORD (line 9139) | DWORD HitTest(PMCHITTESTINFO pMCHitTest) const
  function SizeRectToMin (line 9146) | void SizeRectToMin(LPRECT lpRect)
  type CMonthCalendarCtrlT (line 9154) | typedef CMonthCalendarCtrlT<ATL::CWindow>   CMonthCalendarCtrl;
  function LPCTSTR (line 9182) | static LPCTSTR GetWndClassName()
  function BOOL (line 9187) | BOOL SetFormat(LPCTSTR lpszFormat)
  function COLORREF (line 9193) | COLORREF GetMonthCalColor(int nColorType) const
  function COLORREF (line 9199) | COLORREF SetMonthCalColor(int nColorType, COLORREF clr)
  function DWORD (line 9205) | DWORD GetRange(LPSYSTEMTIME lpSysTimeArray) const
  function BOOL (line 9211) | BOOL SetRange(DWORD dwFlags, LPSYSTEMTIME lpSysTimeArray)
  function DWORD (line 9217) | DWORD GetSystemTime(LPSYSTEMTIME lpSysTime) const
  function BOOL (line 9223) | BOOL SetSystemTime(DWORD dwFlags, LPSYSTEMTIME lpSysTime)
  function CMonthCalendarCtrl (line 9229) | CMonthCalendarCtrl GetMonthCal() const
  function DWORD (line 9254) | DWORD SetMonthCalStyle(DWORD dwStyle)
  function GetDateTimePickerInfo (line 9260) | void GetDateTimePickerInfo(LPDATETIMEPICKERINFO lpPickerInfo) const
  function BOOL (line 9266) | BOOL GetIdealSize(LPSIZE lpSize) const
  function CloseMonthCal (line 9272) | void CloseMonthCal()
  type CDateTimePickerCtrlT (line 9280) | typedef CDateTimePickerCtrlT<ATL::CWindow>   CDateTimePickerCtrl;
  function HRESULT (line 9298) | HRESULT FlatSB_Uninitialize()
  function BOOL (line 9306) | BOOL FlatSB_GetScrollProp(UINT uIndex, LPINT lpnValue) const
  function FlatSB_GetScrollPos (line 9321) | int FlatSB_GetScrollPos(int nBar) const
  function BOOL (line 9335) | BOOL FlatSB_GetScrollRange(int nBar, LPINT lpMinPos, LPINT lpMaxPos) const
  function BOOL (line 9349) | BOOL FlatSB_GetScrollInfo(int nBar, LPSCROLLINFO lpScrollInfo) const
  function BOOL (line 9371) | BOOL FlatSB_EnableScrollBar(UINT uSBFlags, UINT uArrowFlags = ESB_ENABLE...
  type CFlatScrollBarT (line 9393) | typedef CFlatScrollBarT<ATL::CWindow>   CFlatScrollBar;
  function LPCTSTR (line 9421) | static LPCTSTR GetWndClassName()
  function BOOL (line 9426) | BOOL IsBlank() const
  function SetAddress (line 9438) | void SetAddress(DWORD dwAddress)
  function ClearAddress (line 9444) | void ClearAddress()
  function SetRange (line 9450) | void SetRange(int nField, WORD wRange)
  function SetRange (line 9456) | void SetRange(int nField, BYTE nMin, BYTE nMax)
  function SetFocus (line 9462) | void SetFocus(int nField)
  type CIPAddressCtrlT (line 9469) | typedef CIPAddressCtrlT<ATL::CWindow>   CIPAddressCtrl;
  function LPCTSTR (line 9497) | static LPCTSTR GetWndClassName()
  function GetButtonSize (line 9502) | int GetButtonSize() const
  function DWORD (line 9514) | DWORD GetButtonState(int nButton) const
  function COLORREF (line 9521) | COLORREF GetBkColor() const
  function GetBorder (line 9533) | int GetBorder() const
  function GetPos (line 9545) | int GetPos() const
  function SetChild (line 9558) | void SetChild(HWND hWndChild)
  function RecalcSize (line 9570) | void RecalcSize()
  function GetDropTarget (line 9576) | void GetDropTarget(IDropTarget** ppDropTarget)
  type CPagerCtrlT (line 9584) | typedef CPagerCtrlT<ATL::CWindow>   CPagerCtrl;
  function LPCTSTR (line 9612) | static LPCTSTR GetWndClassName()
  function BOOL (line 9627) | BOOL GetItem(PLITEM pLItem) const
  function BOOL (line 9633) | BOOL SetItem(PLITEM pLItem)
  function BOOL (line 9647) | BOOL HitTest(PLHITTESTINFO pLHitTestInfo) const
  type CLinkCtrlT (line 9654) | typedef CLinkCtrlT<ATL::CWindow>   CLinkCtrl;
  function DWORD (line 9716) | DWORD OnPrePaint(int /*idCtrl*/, LPNMCUSTOMDRAW /*lpNMCustomDraw*/)
  function DWORD (line 9721) | DWORD OnPostPaint(int /*idCtrl*/, LPNMCUSTOMDRAW /*lpNMCustomDraw*/)
  function DWORD (line 9726) | DWORD OnPreErase(int /*idCtrl*/, LPNMCUSTOMDRAW /*lpNMCustomDraw*/)
  function DWORD (line 9731) | DWORD OnPostErase(int /*idCtrl*/, LPNMCUSTOMDRAW /*lpNMCustomDraw*/)
  function DWORD (line 9736) | DWORD OnItemPrePaint(int /*idCtrl*/, LPNMCUSTOMDRAW /*lpNMCustomDraw*/)
  function DWORD (line 9741) | DWORD OnItemPostPaint(int /*idCtrl*/, LPNMCUSTOMDRAW /*lpNMCustomDraw*/)
  function DWORD (line 9746) | DWORD OnItemPreErase(int /*idCtrl*/, LPNMCUSTOMDRAW /*lpNMCustomDraw*/)
  function DWORD (line 9751) | DWORD OnItemPostErase(int /*idCtrl*/, LPNMCUSTOMDRAW /*lpNMCustomDraw*/)
  function DWORD (line 9756) | DWORD OnSubItemPrePaint(int /*idCtrl*/, LPNMCUSTOMDRAW /*lpNMCustomDraw*/)

FILE: Examples/WhisperDesktop/Utils/WTL/atlddx.h
  function namespace (line 27) | namespace WTL
  function BOOL (line 395) | static BOOL _AtlSimpleFloatParse(LPCTSTR lpszText, double& d)
  function DDX_Check (line 523) | void DDX_Check(UINT nID, int& nValue, BOOL bSave)
  function DDX_Check (line 544) | void DDX_Check(UINT nID, bool& bCheck, BOOL bSave)
  function DDX_Radio (line 557) | void DDX_Radio(UINT nID, int& nValue, BOOL bSave)
  function _setSel (line 626) | void _setSel(WTL::CListViewCtrl& tCtrl, INT iSel)
  function OnDataExchangeError (line 648) | void OnDataExchangeError(UINT nCtrlID, BOOL /*bSave*/)
  function OnDataValidateError (line 656) | void OnDataValidateError(UINT nCtrlID, BOOL /*bSave*/, _XData& /*data*/)

FILE: Examples/WhisperDesktop/Utils/WTL/atlgdi.h
  function namespace (line 57) | namespace WTL
  function HPEN (line 158) | HPEN CreatePenIndirect(LPLOGPEN lpLogPen)
  function BOOL (line 165) | BOOL DeleteObject()
  function GetLogPen (line 175) | int GetLogPen(LOGPEN* pLogPen) const
  function GetLogPen (line 181) | bool GetLogPen(LOGPEN& LogPen) const
  type CPenT (line 201) | typedef CPenT<false>   CPenHandle;
  type CPenT (line 202) | typedef CPenT<true>    CPen;
  function Attach (line 231) | void Attach(HBRUSH hBrush)
  function HBRUSH (line 238) | HBRUSH Detach()
  function HBRUSH (line 250) | HBRUSH CreateSolidBrush(COLORREF crColor)
  function HBRUSH (line 257) | HBRUSH CreateHatchBrush(int nIndex, COLORREF crColor)
  function HBRUSH (line 264) | HBRUSH CreateBrushIndirect(const LOGBRUSH* lpLogBrush)
  function HBRUSH (line 271) | HBRUSH CreatePatternBrush(HBITMAP hBitmap)
  function HBRUSH (line 278) | HBRUSH CreateDIBPatternBrush(HGLOBAL hPackedDIB, UINT nUsage)
  function HBRUSH (line 288) | HBRUSH CreateDIBPatternBrush(const void* lpPackedDIB, UINT nUsage)
  function HBRUSH (line 295) | HBRUSH CreateSysColorBrush(int nIndex)
  function BOOL (line 302) | BOOL DeleteObject()
  function GetLogBrush (line 312) | int GetLogBrush(LOGBRUSH* pLogBrush) const
  function GetLogBrush (line 318) | bool GetLogBrush(LOGBRUSH& LogBrush) const
  type CBrushT (line 325) | typedef CBrushT<false>   CBrushHandle;
  type CBrushT (line 326) | typedef CBrushT<true>    CBrush;
  function class (line 332) | class CLogFont : public LOGFONT
  function HFONT (line 351) | HFONT CreateFontIndirect()
  function SetBold (line 356) | void SetBold()
  function MakeLarger (line 371) | void MakeLarger(int iScale)
  function SetCaptionFont (line 427) | void SetCaptionFont()
  function SetMenuFont (line 434) | void SetMenuFont()
  function SetStatusFont (line 441) | void SetStatusFont()
  function SetMessageBoxFont (line 448) | void SetMessageBoxFont()
  function Copy (line 455) | void Copy(const LOGFONT* pLogFont)
  function Attach (line 523) | void Attach(HFONT hFont)
  function HFONT (line 530) | HFONT Detach()
  function HFONT (line 542) | HFONT CreateFontIndirect(const LOGFONT* lpLogFont)
  function HFONT (line 549) | HFONT CreateFontIndirectEx(CONST ENUMLOGFONTEXDV* penumlfex)
  function HFONT (line 556) | HFONT CreateFont(int nHeight, int nWidth, int nEscapement,
  function BOOL (line 604) | BOOL DeleteObject()
  function GetLogFont (line 614) | int GetLogFont(LOGFONT* pLogFont) const
  function GetLogFont (line 620) | bool GetLogFont(LOGFONT& LogFont) const
  type CFontT (line 627) | typedef CFontT<false>   CFontHandle;
  type CFontT (line 628) | typedef CFontT<true>    CFont;
  function Attach (line 657) | void Attach(HBITMAP hBitmap)
  function HBITMAP (line 664) | HBITMAP Detach()
  function HBITMAP (line 676) | HBITMAP LoadBitmap(ATL::_U_STRINGorID bitmap)
  function HBITMAP (line 683) | HBITMAP LoadOEMBitmap(UINT nIDBitmap) // for OBM_/OCR_/OIC_
  function HBITMAP (line 697) | HBITMAP CreateBitmap(int nWidth, int nHeight, UINT nPlanes, UINT nBitsPe...
  function HBITMAP (line 704) | HBITMAP CreateBitmapIndirect(LPBITMAP lpBitmap)
  function HBITMAP (line 711) | HBITMAP CreateCompatibleBitmap(HDC hDC, int nWidth, int nHeight)
  function HBITMAP (line 718) | HBITMAP CreateDiscardableBitmap(HDC hDC, int nWidth, int nHeight)
  function BOOL (line 725) | BOOL DeleteObject()
  function GetBitmap (line 735) | int GetBitmap(BITMAP* pBitMap) const
  function GetBitmap (line 741) | bool GetBitmap(BITMAP& bm) const
  function GetSize (line 747) | bool GetSize(SIZE& size) const
  function DWORD (line 758) | DWORD GetBitmapBits(DWORD dwCount, LPVOID lpBits) const
  function DWORD (line 764) | DWORD SetBitmapBits(DWORD dwCount, const void* lpBits)
  function BOOL (line 770) | BOOL GetBitmapDimension(LPSIZE lpSize) const
  function HBITMAP (line 783) | HBITMAP CreateDIBitmap(HDC hDC, CONST BITMAPINFOHEADER* lpbmih, DWORD dw...
  function HBITMAP (line 790) | HBITMAP CreateDIBSection(HDC hDC, CONST BITMAPINFO* lpbmi, UINT uColorUs...
  function GetDIBits (line 797) | int GetDIBits(HDC hDC, UINT uStartScan, UINT cScanLines,  LPVOID lpvBits...
  function SetDIBits (line 803) | int SetDIBits(HDC hDC, UINT uStartScan, UINT cScanLines, CONST VOID* lpv...
  type CBitmapT (line 810) | typedef CBitmapT<false>   CBitmapHandle;
  type CBitmapT (line 811) | typedef CBitmapT<true>    CBitmap;
  function Attach (line 840) | void Attach(HPALETTE hPalette)
  function HPALETTE (line 847) | HPALETTE Detach()
  function HPALETTE (line 859) | HPALETTE CreatePalette(LPLOGPALETTE lpLogPalette)
  function HPALETTE (line 866) | HPALETTE CreateHalftonePalette(HDC hDC)
  function BOOL (line 874) | BOOL DeleteObject()
  function UINT (line 892) | UINT GetPaletteEntries(UINT nStartIndex, UINT nNumEntries, LPPALETTEENTR...
  function UINT (line 898) | UINT SetPaletteEntries(UINT nStartIndex, UINT nNumEntries, LPPALETTEENTR...
  function AnimatePalette (line 905) | void AnimatePalette(UINT nStartIndex, UINT nNumEntries, LPPALETTEENTRY l...
  function BOOL (line 911) | BOOL ResizePalette(UINT nNumEntries)
  function UINT (line 917) | UINT GetNearestPaletteIndex(COLORREF crColor) const
  type CPaletteT (line 924) | typedef CPaletteT<false>   CPaletteHandle;
  type CPaletteT (line 925) | typedef CPaletteT<true>    CPalette;
  function Attach (line 954) | void Attach(HRGN hRgn)
  function HRGN (line 961) | HRGN Detach()
  function HRGN (line 973) | HRGN CreateRectRgn(int x1, int y1, int x2, int y2)
  function HRGN (line 980) | HRGN CreateRectRgnIndirect(LPCRECT lpRect)
  function HRGN (line 987) | HRGN CreateEllipticRgn(int x1, int y1, int x2, int y2)
  function HRGN (line 994) | HRGN CreateEllipticRgnIndirect(LPCRECT lpRect)
  function HRGN (line 1001) | HRGN CreatePolygonRgn(const POINT* lpPoints, int nCount, int nMode)
  function HRGN (line 1008) | HRGN CreatePolyPolygonRgn(const POINT* lpPoints, const INT* lpPolyCounts...
  function HRGN (line 1015) | HRGN CreateRoundRectRgn(int x1, int y1, int x2, int y2, int x3, int y3)
  function HRGN (line 1022) | HRGN CreateFromPath(HDC hDC)
  function HRGN (line 1030) | HRGN CreateFromData(const XFORM* lpXForm, int nCount, const RGNDATA* pRg...
  function BOOL (line 1037) | BOOL DeleteObject()
  function SetRectRgn (line 1047) | void SetRectRgn(int x1, int y1, int x2, int y2)
  function SetRectRgn (line 1053) | void SetRectRgn(LPCRECT lpRect)
  function CombineRgn (line 1059) | int CombineRgn(HRGN hRgnSrc1, HRGN hRgnSrc2, int nCombineMode)
  function CombineRgn (line 1065) | int CombineRgn(HRGN hRgnSrc, int nCombineMode)
  function CopyRgn (line 1071) | int CopyRgn(HRGN hRgnSrc)
  function BOOL (line 1077) | BOOL EqualRgn(HRGN hRgn) const
  function OffsetRgn (line 1083) | int OffsetRgn(int x, int y)
  function OffsetRgn (line 1089) | int OffsetRgn(POINT point)
  function GetRgnBox (line 1095) | int GetRgnBox(LPRECT lpRect) const
  function BOOL (line 1101) | BOOL PtInRegion(int x, int y) const
  function BOOL (line 1107) | BOOL PtInRegion(POINT point) const
  function BOOL (line 1113) | BOOL RectInRegion(LPCRECT lpRect) const
  function GetRegionData (line 1119) | int GetRegionData(LPRGNDATA lpRgnData, int nDataSize) const
  type CRgnT (line 1126) | typedef CRgnT<false>   CRgnHandle;
  type CRgnT (line 1127) | typedef CRgnT<true>    CRgn;
  type CDCT (line 1135) | typedef CDCT<false>   CDCHandle;
  type CDCT (line 1136) | typedef CDCT<true>    CDC;
  function Attach (line 1162) | void Attach(HDC hDC)
  function HDC (line 1169) | HDC Detach()
  function HDC (line 1217) | HDC CreateDC(LPCTSTR lpszDriverName, LPCTSTR lpszDeviceName, LPCTSTR lps...
  function BOOL (line 1231) | BOOL DeleteDC()
  function SaveDC (line 1242) | int SaveDC()
  function BOOL (line 1248) | BOOL RestoreDC(int nSavedDC)
  function GetDeviceCaps (line 1254) | int GetDeviceCaps(int nIndex) const
  function UINT (line 1260) | UINT SetBoundsRect(LPCRECT lpRectBounds, UINT flags)
  function UINT (line 1266) | UINT GetBoundsRect(LPRECT lpRectBounds, UINT flags) const
  function BOOL (line 1272) | BOOL ResetDC(const DEVMODE* lpDevMode)
  function BOOL (line 1279) | BOOL GetBrushOrg(LPPOINT lpPoint) const
  function EnumObjects (line 1297) | int EnumObjects(int nObjectType, int (CALLBACK* lpfn)(LPVOID, LPARAM), L...
  function HPEN (line 1308) | HPEN SelectPen(HPEN hPen)
  function HBRUSH (line 1315) | HBRUSH SelectBrush(HBRUSH hBrush)
  function HFONT (line 1322) | HFONT SelectFont(HFONT hFont)
  function HBITMAP (line 1329) | HBITMAP SelectBitmap(HBITMAP hBitmap)
  function SelectRgn (line 1336) | int SelectRgn(HRGN hRgn)       // special return for regions
  function HPEN (line 1344) | HPEN SelectStockPen(int nPen)
  function HBRUSH (line 1351) | HBRUSH SelectStockBrush(int nBrush)
  function HFONT (line 1357) | HFONT SelectStockFont(int nFont)
  function HPALETTE (line 1363) | HPALETTE SelectStockPalette(int nPalette, BOOL bForceBackground)
  function COLORREF (line 1370) | COLORREF GetNearestColor(COLORREF crColor) const
  function HPALETTE (line 1376) | HPALETTE SelectPalette(HPALETTE hPalette, BOOL bForceBackground)
  function UINT (line 1383) | UINT RealizePalette()
  function UpdateColors (line 1389) | void UpdateColors()
  function COLORREF (line 1432) | COLORREF SetBkColor(COLORREF crColor)
  function SetBkMode (line 1438) | int SetBkMode(int nBkMode)
  function SetPolyFillMode (line 1444) | int SetPolyFillMode(int nPolyFillMode)
  function SetROP2 (line 1450) | int SetROP2(int nDrawMode)
  function SetStretchBltMode (line 1456) | int SetStretchBltMode(int nStretchMode)
  function COLORREF (line 1462) | COLORREF SetTextColor(COLORREF crColor)
  function BOOL (line 1468) | BOOL GetColorAdjustment(LPCOLORADJUSTMENT lpColorAdjust) const
  function BOOL (line 1474) | BOOL SetColorAdjustment(const COLORADJUSTMENT* lpColorAdjust)
  function BOOL (line 1487) | BOOL GetViewportOrg(LPPOINT lpPoint) const
  function SetMapMode (line 1493) | int SetMapMode(int nMapMode)
  function BOOL (line 1519) | BOOL GetViewportExt(LPSIZE lpSize) const
  function BOOL (line 1544) | BOOL GetWindowOrg(LPPOINT lpPoint) const
  function BOOL (line 1569) | BOOL GetWindowExt(LPSIZE lpSize) const
  function BOOL (line 1600) | BOOL DPtoLP(LPRECT lpRect) const
  function BOOL (line 1606) | BOOL DPtoLP(LPSIZE lpSize) const
  function BOOL (line 1625) | BOOL LPtoDP(LPRECT lpRect) const
  function BOOL (line 1631) | BOOL LPtoDP(LPSIZE lpSize) const
  function DPtoHIMETRIC (line 1647) | void DPtoHIMETRIC(LPSIZE lpSize)
  function HIMETRICtoDP (line 1669) | void HIMETRICtoDP(LPSIZE lpSize)
  function LPtoHIMETRIC (line 1691) | void LPtoHIMETRIC(LPSIZE lpSize)
  function HIMETRICtoLP (line 1697) | void HIMETRICtoLP(LPSIZE lpSize)
  function BOOL (line 1704) | BOOL FillRgn(HRGN hRgn, HBRUSH hBrush)
  function BOOL (line 1710) | BOOL FrameRgn(HRGN hRgn, HBRUSH hBrush, int nWidth, int nHeight)
  function BOOL (line 1716) | BOOL InvertRgn(HRGN hRgn)
  function BOOL (line 1722) | BOOL PaintRgn(HRGN hRgn)
  function GetClipBox (line 1729) | int GetClipBox(LPRECT lpRect) const
  function GetClipRgn (line 1735) | int GetClipRgn(CRgn& region) const
  function BOOL (line 1748) | BOOL PtVisible(int x, int y) const
  function BOOL (line 1754) | BOOL PtVisible(POINT point) const
  function BOOL (line 1760) | BOOL RectVisible(LPCRECT lpRect) const
  function SelectClipRgn (line 1766) | int SelectClipRgn(HRGN hRgn)
  function ExcludeClipRect (line 1772) | int ExcludeClipRect(int x1, int y1, int x2, int y2)
  function ExcludeClipRect (line 1778) | int ExcludeClipRect(LPCRECT lpRect)
  function ExcludeUpdateRgn (line 1784) | int ExcludeUpdateRgn(HWND hWnd)
  function IntersectClipRect (line 1790) | int IntersectClipRect(int x1, int y1, int x2, int y2)
  function IntersectClipRect (line 1796) | int IntersectClipRect(LPCRECT lpRect)
  function OffsetClipRgn (line 1802) | int OffsetClipRgn(int x, int y)
  function OffsetClipRgn (line 1808) | int OffsetClipRgn(SIZE size)
  function SelectClipRgn (line 1814) | int SelectClipRgn(HRGN hRgn, int nMode)
  function BOOL (line 1821) | BOOL GetCurrentPosition(LPPOINT lpPoint) const
  function BOOL (line 1839) | BOOL LineTo(int x, int y)
  function BOOL (line 1845) | BOOL LineTo(POINT point)
  function BOOL (line 1851) | BOOL Arc(int x1, int y1, int x2, int y2, int x3, int y3, int x4, int y4)
  function BOOL (line 1857) | BOOL Arc(LPCRECT lpRect, POINT ptStart, POINT ptEnd)
  function BOOL (line 1865) | BOOL Polyline(const POINT* lpPoints, int nCount)
  function BOOL (line 1871) | BOOL AngleArc(int x, int y, int nRadius, float fStartAngle, float fSweep...
  function BOOL (line 1877) | BOOL ArcTo(int x1, int y1, int x2, int y2, int x3, int y3, int x4, int y4)
  function BOOL (line 1883) | BOOL ArcTo(LPCRECT lpRect, POINT ptStart, POINT ptEnd)
  function SetArcDirection (line 1896) | int SetArcDirection(int nArcDirection)
  function BOOL (line 1902) | BOOL PolyDraw(const POINT* lpPoints, const BYTE* lpTypes, int nCount)
  function BOOL (line 1908) | BOOL PolylineTo(const POINT* lpPoints, int nCount)
  function BOOL (line 1914) | BOOL PolyPolyline(const POINT* lpPoints,
  function BOOL (line 1921) | BOOL PolyBezier(const POINT* lpPoints, int nCount)
  function BOOL (line 1927) | BOOL PolyBezierTo(const POINT* lpPoints, int nCount)
  function BOOL (line 1934) | BOOL FillRect(LPCRECT lpRect, HBRUSH hBrush)
  function BOOL (line 1940) | BOOL FillRect(LPCRECT lpRect, int nColorIndex)
  function BOOL (line 1946) | BOOL FrameRect(LPCRECT lpRect, HBRUSH hBrush)
  function BOOL (line 1952) | BOOL InvertRect(LPCRECT lpRect)
  function BOOL (line 1958) | BOOL DrawIcon(int x, int y, HICON hIcon)
  function BOOL (line 1964) | BOOL DrawIcon(POINT point, HICON hIcon)
  function BOOL (line 2007) | BOOL Chord(int x1, int y1, int x2, int y2, int x3, int y3, int x4, int y4)
  function BOOL (line 2013) | BOOL Chord(LPCRECT lpRect, POINT ptStart, POINT ptEnd)
  function DrawFocusRect (line 2019) | void DrawFocusRect(LPCRECT lpRect)
  function BOOL (line 2025) | BOOL Ellipse(int x1, int y1, int x2, int y2)
  function BOOL (line 2031) | BOOL Ellipse(LPCRECT lpRect)
  function BOOL (line 2037) | BOOL Pie(int x1, int y1, int x2, int y2, int x3, int y3, int x4, int y4)
  function BOOL (line 2043) | BOOL Pie(LPCRECT lpRect, POINT ptStart, POINT ptEnd)
  function BOOL (line 2049) | BOOL Polygon(const POINT* lpPoints, int nCount)
  function BOOL (line 2055) | BOOL PolyPolygon(const POINT* lpPoints, const INT* lpPolyCounts, int nCo...
  function BOOL (line 2061) | BOOL Rectangle(int x1, int y1, int x2, int y2)
  function BOOL (line 2067) | BOOL Rectangle(LPCRECT lpRect)
  function BOOL (line 2073) | BOOL RoundRect(int x1, int y1, int x2, int y2, int x3, int y3)
  function BOOL (line 2079) | BOOL RoundRect(LPCRECT lpRect, POINT point)
  function BOOL (line 2086) | BOOL PatBlt(int x, int y, int nWidth, int nHeight, DWORD dwRop)
  function BOOL (line 2092) | BOOL BitBlt(int x, int y, int nWidth, int nHeight, HDC hSrcDC,
  function BOOL (line 2099) | BOOL StretchBlt(int x, int y, int nWidth, int nHeight, HDC hSrcDC, int x...
  function COLORREF (line 2105) | COLORREF GetPixel(int x, int y) const
  function COLORREF (line 2111) | COLORREF GetPixel(POINT point) const
  function COLORREF (line 2117) | COLORREF SetPixel(int x, int y, COLORREF crColor)
  function COLORREF (line 2123) | COLORREF SetPixel(POINT point, COLORREF crColor)
  function BOOL (line 2129) | BOOL FloodFill(int x, int y, COLORREF crColor)
  function BOOL (line 2135) | BOOL ExtFloodFill(int x, int y, COLORREF crColor, UINT nFillType)
  function BOOL (line 2141) | BOOL MaskBlt(int x, int y, int nWidth, int nHeight, HDC hSrcDC, int xSrc...
  function BOOL (line 2147) | BOOL PlgBlt(LPPOINT lpPoint, HDC hSrcDC, int xSrc, int ySrc, int nWidth,...
  function BOOL (line 2153) | BOOL SetPixelV(int x, int y, COLORREF crColor)
  function BOOL (line 2159) | BOOL SetPixelV(POINT point, COLORREF crColor)
  function BOOL (line 2165) | BOOL TransparentBlt(int x, int y, int nWidth, int nHeight, HDC hSrcDC, i...
  function BOOL (line 2171) | BOOL GradientFill(const PTRIVERTEX pVertices, DWORD nVertices, void* pMe...
  function BOOL (line 2177) | BOOL GradientFillRect(RECT& rect, COLORREF clr1, COLORREF clr2, bool bHo...
  function BOOL (line 2202) | BOOL AlphaBlend(int x, int y, int nWidth, int nHeight, HDC hSrcDC, int x...
  function BOOL (line 2212) | BOOL DitherBlt(int x, int y, int nWidth, int nHeight, HDC hSrcDC, HBITMA...
  function DrawText (line 2348) | int DrawText(LPCTSTR lpstrText, int cchText, LPRECT lpRect, UINT uFormat)
  function DrawText (line 2355) | int DrawText(LPTSTR lpstrText, int cchText, LPRECT lpRect, UINT uFormat)
  function DrawShadowText (line 2368) | int DrawShadowText(LPCWSTR lpstrText, int cchText, LPRECT lpRect, DWORD ...
  function BOOL (line 2375) | BOOL GetTextExtent(LPCTSTR lpszString, int nCount, LPSIZE lpSize) const
  function BOOL (line 2397) | BOOL GrayString(HBRUSH hBrush, BOOL (CALLBACK* lpfnOutput)(HDC, LPARAM, ...
  function UINT (line 2409) | UINT SetTextAlign(UINT nFlags)
  function GetTextFace (line 2415) | int GetTextFace(LPTSTR lpszFacename, int nCount) const
  function BOOL (line 2428) | BOOL GetTextFace(BSTR& bstrFace) const
  function GetTextFace (line 2452) | int GetTextFace(ATL::CString& strFace) const
  function BOOL (line 2469) | BOOL GetTextMetrics(LPTEXTMETRIC lpMetrics) const
  function SetTextJustification (line 2475) | int SetTextJustification(int nBreakExtra, int nBreakCount)
  function SetTextCharacterExtra (line 2487) | int SetTextCharacterExtra(int nCharExtra)
  function BOOL (line 2494) | BOOL DrawEdge(LPRECT lpRect, UINT nEdge, UINT nFlags)
  function BOOL (line 2500) | BOOL DrawFrameControl(LPRECT lpRect, UINT nType, UINT nState)
  function BOOL (line 2507) | BOOL ScrollDC(int dx, int dy, LPCRECT lpRectScroll, LPCRECT lpRectClip, ...
  function BOOL (line 2514) | BOOL GetCharWidth(UINT nFirstChar, UINT nLastChar, LPINT lpBuffer) const
  function BOOL (line 2521) | BOOL GetCharWidth32(UINT nFirstChar, UINT nLastChar, LPINT lpBuffer) const
  function DWORD (line 2527) | DWORD SetMapperFlags(DWORD dwFlag)
  function BOOL (line 2533) | BOOL GetAspectRatioFilter(LPSIZE lpSize) const
  function BOOL (line 2539) | BOOL GetCharABCWidths(UINT nFirstChar, UINT nLastChar, LPABC lpabc) const
  function DWORD (line 2545) | DWORD GetFontData(DWORD dwTable, DWORD dwOffset, LPVOID lpData, DWORD cb...
  function GetKerningPairs (line 2551) | int GetKerningPairs(int nPairs, LPKERNINGPAIR lpkrnpair) const
  function UINT (line 2557) | UINT GetOutlineTextMetrics(UINT cbData, LPOUTLINETEXTMETRIC lpotm) const
  function DWORD (line 2563) | DWORD GetGlyphOutline(UINT nChar, UINT nFormat, LPGLYPHMETRICS lpgm, DWO...
  function BOOL (line 2569) | BOOL GetCharABCWidths(UINT nFirstChar, UINT nLastChar, LPABCFLOAT lpABCF...
  function BOOL (line 2575) | BOOL GetCharWidth(UINT nFirstChar, UINT nLastChar, float* lpFloatBuffer)...
  function Escape (line 2582) | int Escape(int nEscape, int nCount, LPCSTR lpszInData, LPVOID lpOutData)
  function Escape (line 2588) | int Escape(int nEscape, int nInputSize, LPCSTR lpszInputData,
  function DrawEscape (line 2595) | int DrawEscape(int nEscape, int nInputSize, LPCSTR lpszInputData)
  function StartDoc (line 2602) | int StartDoc(LPCTSTR lpszDocName)  // old Win3.0 version
  function StartDoc (line 2610) | int StartDoc(LPDOCINFO lpDocInfo)
  function StartPage (line 2616) | int StartPage()
  function EndPage (line 2622) | int EndPage()
  function SetAbortProc (line 2628) | int SetAbortProc(BOOL (CALLBACK* lpfn)(HDC, int))
  function AbortDoc (line 2634) | int AbortDoc()
  function EndDoc (line 2640) | int EndDoc()
  function BOOL (line 2647) | BOOL PlayMetaFile(HMETAFILE hMF)
  function BOOL (line 2660) | BOOL PlayMetaFile(HENHMETAFILE hEnhMetaFile, LPCRECT lpBounds)
  function BOOL (line 2666) | BOOL AddMetaFileComment(UINT nDataSize, const BYTE* pCommentData) // can...
  function EnumMetaFileProc (line 2673) | static int CALLBACK EnumMetaFileProc(HDC hDC, HANDLETABLE* pHandleTable,...
  function BOOL (line 2761) | BOOL AbortPath()
  function BOOL (line 2767) | BOOL BeginPath()
  function BOOL (line 2773) | BOOL CloseFigure()
  function BOOL (line 2779) | BOOL EndPath()
  function BOOL (line 2785) | BOOL FillPath()
  function BOOL (line 2791) | BOOL FlattenPath()
  function BOOL (line 2797) | BOOL StrokeAndFillPath()
  function BOOL (line 2803) | BOOL StrokePath()
  function BOOL (line 2809) | BOOL WidenPath()
  function BOOL (line 2815) | BOOL GetMiterLimit(PFLOAT pfMiterLimit) const
  function BOOL (line 2821) | BOOL SetMiterLimit(float fMiterLimit)
  function GetPath (line 2827) | int GetPath(LPPOINT lpPoints, LPBYTE lpTypes, int nCount) const
  function BOOL (line 2833) | BOOL SelectClipPath(int nMode)
  function CBrushHandle (line 2840) | static CBrushHandle PASCAL GetHalftoneBrush()
  function FillSolidRect (line 2919) | void FillSolidRect(LPCRECT lpRect, COLORREF clr)
  function FillSolidRect (line 2932) | void FillSolidRect(int x, int y, int cx, int cy, COLORREF clr)
  function Draw3dRect (line 2940) | void Draw3dRect(LPCRECT lpRect, COLORREF clrTopLeft, COLORREF clrBottomR...
  function Draw3dRect (line 2946) | void Draw3dRect(int x, int y, int cx, int cy, COLORREF clrTopLeft, COLOR...
  function SetDIBitsToDevice (line 2955) | int SetDIBitsToDevice(int x, int y, DWORD dwWidth, DWORD dwHeight, int x...
  function StretchDIBits (line 2961) | int StretchDIBits(int x, int y, int nWidth, int nHeight, int xSrc, int y...
  function UINT (line 2967) | UINT GetDIBColorTable(UINT uStartIndex, UINT cEntries, RGBQUAD* pColors)...
  function UINT (line 2973) | UINT SetDIBColorTable(UINT uStartIndex, UINT cEntries, CONST RGBQUAD* pC...
  function ChoosePixelFormat (line 2981) | int ChoosePixelFormat(CONST PIXELFORMATDESCRIPTOR* ppfd)
  function DescribePixelFormat (line 2987) | int DescribePixelFormat(int iPixelFormat, UINT nBytes, LPPIXELFORMATDESC...
  function BOOL (line 2999) | BOOL SetPixelFormat(int iPixelFormat, CONST PIXELFORMATDESCRIPTOR* ppfd)
  function BOOL (line 3005) | BOOL SwapBuffers()
  function HGLRC (line 3011) | HGLRC wglCreateContext()
  function HGLRC (line 3017) | HGLRC wglCreateLayerContext(int iLayerPlane)
  function BOOL (line 3023) | BOOL wglMakeCurrent(HGLRC hglrc)
  function BOOL (line 3029) | BOOL wglUseFontBitmaps(DWORD dwFirst, DWORD dwCount, DWORD listBase)
  function BOOL (line 3035) | BOOL wglUseFontOutlines(DWORD dwFirst, DWORD dwCount, DWORD listBase, FL...
  function BOOL (line 3041) | BOOL wglDescribeLayerPlane(int iPixelFormat, int iLayerPlane, UINT nByte...
  function wglSetLayerPaletteEntries (line 3047) | int wglSetLayerPaletteEntries(int iLayerPlane, int iStart, int cEntries,...
  function wglGetLayerPaletteEntries (line 3053) | int wglGetLayerPaletteEntries(int iLayerPlane, int iStart, int cEntries,...
  function BOOL (line 3059) | BOOL wglRealizeLayerPalette(int iLayerPlane, BOOL bRealize)
  function BOOL (line 3065) | BOOL wglSwapLayerBuffers(UINT uPlanes)
  function COLORREF (line 3078) | COLORREF SetDCPenColor(COLORREF clr)
  function COLORREF (line 3090) | COLORREF SetDCBrushColor(COLORREF clr)
  function DWORD (line 3096) | DWORD GetFontUnicodeRanges(LPGLYPHSET lpgs) const
  function DWORD (line 3102) | DWORD GetGlyphIndices(LPCTSTR lpstr, int cch, LPWORD pgi, DWORD dwFlags)...
  function BOOL (line 3108) | BOOL GetTextExtentPointI(LPWORD pgiIn, int cgi, LPSIZE lpSize) const
  function BOOL (line 3114) | BOOL GetTextExtentExPointI(LPWORD pgiIn, int cgi, int nMaxExtent, LPINT ...
  function BOOL (line 3120) | BOOL GetCharWidthI(UINT giFirst, UINT cgi, LPWORD pgi, LPINT lpBuffer) c...
  function BOOL (line 3126) | BOOL GetCharABCWidthsI(UINT giFirst, UINT cgi, LPWORD pgi, LPABC lpabc) ...
  function BOOL (line 3132) | BOOL ColorCorrectPalette(HPALETTE hPalette, DWORD dwFirstEntry, DWORD dw...
  function class (line 3143) | class CPaintDC : public CDC
  function class (line 3167) | class CClientDC : public CDC
  function class (line 3188) | class CWindowDC : public CDC
  function class (line 3209) | class CMemoryDC : public CDC
  function class (line 3241) | class CEnhMetaFileInfo

FILE: Examples/WhisperDesktop/Utils/WTL/atluser.h
  function namespace (line 49) | namespace WTL
  function _FixTrackMenuPopupX (line 238) | static int _FixTrackMenuPopupX(int x, int y)
  function BOOL (line 261) | BOOL GetMenuInfo(LPMENUINFO lpMenuInfo) const
  function BOOL (line 267) | BOOL SetMenuInfo(LPCMENUINFO lpMenuInfo)
  function BOOL (line 280) | BOOL AppendMenu(UINT nFlags, HMENU hSubMenu, LPCTSTR lpszNewItem)
  function BOOL (line 287) | BOOL AppendMenu(UINT nFlags, UINT_PTR nIDNewItem, HBITMAP hBmp)
  function BOOL (line 293) | BOOL AppendMenu(UINT nFlags, HMENU hSubMenu, HBITMAP hBmp)
  function UINT (line 300) | UINT CheckMenuItem(UINT nIDCheckItem, UINT nCheck)
  function UINT (line 306) | UINT EnableMenuItem(UINT nIDEnableItem, UINT nEnable)
  function BOOL (line 312) | BOOL HiliteMenuItem(HWND hWnd, UINT uIDHiliteItem, UINT uHilite)
  function GetMenuItemCount (line 318) | int GetMenuItemCount() const
  function UINT (line 330) | UINT GetMenuState(UINT nID, UINT nFlags) const
  function GetMenuString (line 336) | int GetMenuString(UINT nIDItem, LPTSTR lpString, int nMaxCount, UINT nFl...
  function GetMenuStringLen (line 342) | int GetMenuStringLen(UINT nIDItem, UINT nFlags) const
  function BOOL (line 348) | BOOL GetMenuString(UINT nIDItem, BSTR& bstrText, UINT nFlags) const
  function GetMenuString (line 375) | int GetMenuString(UINT nIDItem, ATL::CString& strText, UINT nFlags) const
  function CMenuHandle (line 393) | CMenuHandle GetSubMenu(int nPos) const
  function BOOL (line 405) | BOOL InsertMenu(UINT nPosition, UINT nFlags, HMENU hSubMenu, LPCTSTR lps...
  function BOOL (line 412) | BOOL InsertMenu(UINT nPosition, UINT nFlags, UINT_PTR nIDNewItem, HBITMA...
  function BOOL (line 418) | BOOL InsertMenu(UINT nPosition, UINT nFlags, HMENU hSubMenu, HBITMAP hBmp)
  function BOOL (line 431) | BOOL ModifyMenu(UINT nPosition, UINT nFlags, HMENU hSubMenu, LPCTSTR lps...
  function BOOL (line 438) | BOOL ModifyMenu(UINT nPosition, UINT nFlags, UINT_PTR nIDNewItem, HBITMA...
  function BOOL (line 444) | BOOL ModifyMenu(UINT nPosition, UINT nFlags, HMENU hSubMenu, HBITMAP hBmp)
  function BOOL (line 451) | BOOL RemoveMenu(UINT nPosition, UINT nFlags)
  function BOOL (line 457) | BOOL SetMenuItemBitmaps(UINT nPosition, UINT nFlags, HBITMAP hBmpUncheck...
  function BOOL (line 463) | BOOL CheckMenuRadioItem(UINT nIDFirst, UINT nIDLast, UINT nIDItem, UINT ...
  function BOOL (line 469) | BOOL GetMenuItemInfo(UINT uItem, BOOL bByPosition, LPMENUITEMINFO lpmii)...
  function BOOL (line 475) | BOOL SetMenuItemInfo(UINT uItem, BOOL bByPosition, LPMENUITEMINFO lpmii)
  function BOOL (line 481) | BOOL InsertMenuItem(UINT uItem, BOOL bByPosition, LPMENUITEMINFO lpmii)
  function BOOL (line 499) | BOOL GetMenuItemRect(HWND hWnd, UINT uItem, LPRECT lprcItem) const
  function MenuItemFromPoint (line 505) | int MenuItemFromPoint(HWND hWnd, POINT point) const
  function BOOL (line 512) | BOOL SetMenuContextHelpId(DWORD dwContextHelpId)
  function DWORD (line 518) | DWORD GetMenuContextHelpId() const
  function Attach (line 551) | void Attach(HACCEL hAccel)
  function HACCEL (line 558) | HACCEL Detach()
  function HACCEL (line 570) | HACCEL LoadAccelerators(ATL::_U_STRINGorID accel)
  function HACCEL (line 577) | HACCEL CreateAcceleratorTable(LPACCEL pAccel, int cEntries)
  function DestroyObject (line 585) | void DestroyObject()
  function CopyAcceleratorTable (line 595) | int CopyAcceleratorTable(LPACCEL lpAccelDst, int cEntries)
  function BOOL (line 608) | BOOL TranslateAccelerator(HWND hWnd, LPMSG pMsg)
  type CAcceleratorT (line 617) | typedef CAcceleratorT<false>   CAcceleratorHandle;
  type CAcceleratorT (line 618) | typedef CAcceleratorT<true>    CAccelerator;
  function Attach (line 646) | void Attach(HICON hIcon)
  function HICON (line 653) | HICON Detach()
  function HICON (line 665) | HICON LoadIcon(ATL::_U_STRINGorID icon)
  function HICON (line 679) | HICON LoadOEMIcon(LPCTSTR lpstrIconName)
  function HICON (line 687) | HICON CreateIcon(int nWidth, int nHeight, BYTE cPlanes, BYTE cBitsPixel,...
  function HICON (line 713) | HICON CreateIconIndirect(PICONINFO pIconInfo)
  function HICON (line 721) | HICON ExtractIcon(LPCTSTR lpszExeFileName, UINT nIconIndex)
  function HICON (line 729) | HICON ExtractAssociatedIcon(HINSTANCE hInst, LPTSTR lpIconPath, LPWORD l...
  function BOOL (line 738) | BOOL DestroyIcon()
  function HICON (line 748) | HICON CopyIcon()
  function HICON (line 754) | HICON DuplicateIcon()
  function BOOL (line 760) | BOOL DrawIcon(HDC hDC, int x, int y)
  function BOOL (line 766) | BOOL DrawIcon(HDC hDC, POINT pt)
  function BOOL (line 784) | BOOL GetIconInfo(PICONINFO pIconInfo) const
  function BOOL (line 792) | BOOL GetIconInfoEx(PICONINFOEX pIconInfo) const
  function HRESULT (line 801) | HRESULT LoadIconMetric(ATL::_U_STRINGorID icon, int lims)
  function HRESULT (line 808) | HRESULT LoadIconWithScaleDown(ATL::_U_STRINGorID icon, int cx, int cy)
  function HRESULT (line 815) | HRESULT LoadOEMIconMetric(LPCTSTR lpstrIconName, int lims)
  function HRESULT (line 822) | HRESULT LoadOEMIconWithScaleDown(LPCTSTR lpstrIconName, int cx, int cy)
  function IsOEMIcon (line 832) | static bool IsOEMIcon(LPCTSTR lpstrIconName)
  type CIconT (line 845) | typedef CIconT<false>   CIconHandle;
  type CIconT (line 846) | typedef CIconT<true>    CIcon;
  function Attach (line 879) | void Attach(HCURSOR hCursor)
  function HCURSOR (line 886) | HCURSOR Detach()
  function HCURSOR (line 898) | HCURSOR LoadCursor(ATL::_U_STRINGorID cursor)
  function HCURSOR (line 905) | HCURSOR LoadSysCursor(LPCTSTR lpstrCursorName)
  function HCURSOR (line 919) | HCURSOR LoadOEMCursor(LPCTSTR lpstrCursorName)
  function HCURSOR (line 931) | HCURSOR LoadCursorFromFile(LPCTSTR pstrFilename)
  function HCURSOR (line 939) | HCURSOR CreateCursor(int xHotSpot, int yHotSpot, int nWidth, int nHeight...
  function BOOL (line 963) | BOOL DestroyCursor()
  function HCURSOR (line 973) | HCURSOR CopyCursor()
  function BOOL (line 979) | BOOL GetCursorInfo(LPCURSORINFO pCursorInfo)
  type CCursorT (line 987) | typedef CCursorT<false>   CCursorHandle;
  type CCursorT (line 988) | typedef CCursorT<true>    CCursor;
  function class (line 996) | class CResource
  type _AtlToolBarData (line 1081) | struct _AtlToolBarData
  function HACCEL (line 1096) | inline HACCEL AtlLoadAccelerators(ATL::_U_STRINGorID table)
  function HMENU (line 1101) | inline HMENU AtlLoadMenu(ATL::_U_STRINGorID menu)
  function HBITMAP (line 1106) | inline HBITMAP AtlLoadBitmap(ATL::_U_STRINGorID bitmap)
  function HBITMAP (line 1112) | inline HBITMAP AtlLoadSysBitmap(ATL::_U_STRINGorID bitmap)
  function HCURSOR (line 1122) | inline HCURSOR AtlLoadCursor(ATL::_U_STRINGorID cursor)
  function HCURSOR (line 1127) | inline HCURSOR AtlLoadSysCursor(LPCTSTR lpCursorName)
  function HICON (line 1138) | inline HICON AtlLoadIcon(ATL::_U_STRINGorID icon)
  function HICON (line 1143) | inline HICON AtlLoadSysIcon(LPCTSTR lpIconName)
  function HBITMAP (line 1156) | inline HBITMAP AtlLoadBitmapImage(ATL::_U_STRINGorID bitmap, UINT fuLoad...
  function HBITMAP (line 1172) | inline HBITMAP AtlLoadSysBitmapImage(WORD wBitmapID, UINT fuLoad = LR_DE...
  function AtlLoadString (line 1200) | inline bool AtlLoadString(UINT uID, BSTR& bstrText)

FILE: Examples/WhisperDesktop/Utils/WTL/atlwinx.h
  function namespace (line 87) | namespace WTL
  function LONG_PTR (line 354) | inline LONG_PTR SetClassLongPtrA(HWND hWnd, int nIndex, LONG_PTR dwNewLong)
  function LONG_PTR (line 362) | inline LONG_PTR SetClassLongPtrW(HWND hWnd, int nIndex, LONG_PTR dwNewLong)
  function LONG_PTR (line 370) | inline LONG_PTR GetClassLongPtrA(HWND hWnd, int nIndex)
  function LONG_PTR (line 378) | inline LONG_PTR GetClassLongPtrW(HWND hWnd, int nIndex)
  function namespace (line 388) | namespace WTL
  function BOOL (line 415) | BOOL DragDetect(POINT pt)
  function BOOL (line 421) | BOOL DragDetect()
  function CWindowEx (line 430) | CWindowEx GetAncestor(UINT uFlags) const
  function BOOL (line 455) | BOOL StopFlashWindowEx()
  function DWORD (line 466) | DWORD GetClassLong(int nIndex) const
  function DWORD (line 472) | DWORD SetClassLong(int nIndex, LONG dwNewLong)
  function ULONG_PTR (line 478) | ULONG_PTR GetClassLongPtr(int nIndex) const
  function ULONG_PTR (line 484) | ULONG_PTR SetClassLongPtr(int nIndex, LONG_PTR dwNewLong)
  function BOOL (line 491) | BOOL SetLayeredWindowAttributes(COLORREF crlKey, BYTE byteAlpha, DWORD d...
  function BOOL (line 499) | BOOL UpdateLayeredWindow(HDC hdcDst, LPPOINT pptDst, LPSIZE psize, HDC h...
  function BOOL (line 515) | BOOL GetLayeredWindowAttributes(COLORREF* pcrlKey, BYTE* pbyteAlpha, DWO...
  function BOOL (line 524) | BOOL StartTrackMouseLeave()
  function BOOL (line 535) | BOOL StartTrackMouse(DWORD dwFlags, DWORD dwHoverTime = HOVER_DEFAULT)
  function BOOL (line 547) | BOOL CancelTrackMouse(DWORD dwType)
  function GetWindowText (line 560) | int GetWindowText(ATL::CString& strText) const
  function UINT (line 570) | UINT GetDlgItemText(int nID, ATL::CString& strText) const
  function UINT (line 594) | UINT DlgGetDefID() const
  function DlgReposition (line 613) | void DlgReposition()

FILE: Examples/WhisperDesktop/Utils/logger.cpp
  function colorIndex (line 17) | static int colorIndex( const sToken& tok )
  function printTime (line 27) | void printTime( CStringA& rdi, Whisper::sTimeSpan time, bool comma )
  function HRESULT (line 40) | HRESULT logNewSegments( const iTranscribeResult* results, size_t newSegm...

FILE: Examples/WhisperDesktop/Utils/logger.h
  function logError (line 13) | inline void logError( const char8_t* pszFormat, ... )
  function logWarning (line 17) | inline void logWarning( const char8_t* pszFormat, ... )
  function logInfo (line 21) | inline void logInfo( const char8_t* pszFormat, ... )
  function logDebug (line 25) | inline void logDebug( const char8_t* pszFormat, ... )

FILE: Examples/WhisperDesktop/Utils/miscUtils.cpp
  function wchar_t (line 6) | wchar_t* formatMessage( HRESULT hr )
  function CString (line 21) | CString formatErrorMessage( HRESULT hr )
  function reportFatalError (line 37) | void reportFatalError( const char* what, HRESULT hr )
  type sImplString (line 49) | struct sImplString
  function HRESULT (line 62) | HRESULT implParse( const CString& s, eModelImplementation& rdi )
  function LPCTSTR (line 74) | LPCTSTR implString( eModelImplementation i )
  function implPopulateCombobox (line 82) | void implPopulateCombobox( CComboBox& cb, Whisper::eModelImplementation ...
  function implGetValue (line 96) | Whisper::eModelImplementation implGetValue( CComboBox& cb )
  function HRESULT (line 119) | HRESULT ThreadPoolWork::create( iThreadPoolCallback* cb )
  function HRESULT (line 133) | HRESULT ThreadPoolWork::post()
  function makeUtf16 (line 141) | void makeUtf16( CString& rdi, const char* utf8 )
  function makeUtf8 (line 150) | void makeUtf8( CStringA& rdi, const CString& utf16 )
  function getOpenFileName (line 160) | bool getOpenFileName( HWND owner, LPCTSTR title, LPCTSTR filter, CString...
  function getSaveFileName (line 190) | bool getSaveFileName( HWND owner, LPCTSTR title, LPCTSTR filter, CString...
  function reportError (line 221) | void reportError( HWND owner, LPCTSTR text, LPCTSTR title, HRESULT hr )
  function HRESULT (line 237) | HRESULT writeUtf8Bom( CAtlFile& file )
  function isInvalidTranslate (line 243) | bool isInvalidTranslate( HWND owner, uint32_t lang, bool translate )

FILE: Examples/WhisperDesktop/Utils/miscUtils.h
  function __interface (line 19) | __interface iThreadPoolCallback
  function class (line 24) | class ThreadPoolWork
  function wchar_t (line 55) | inline const wchar_t* cstr( const CString& s ) { return s; }
  function HRESULT (line 58) | inline HRESULT getLastHr()
  function flipRgb (line 66) | inline uint32_t flipRgb( uint32_t val )
  function isChecked (line 74) | inline bool isChecked( CButton& btn )

FILE: Examples/WhisperDesktop/WhisperDesktop.cpp
  function HRESULT (line 8) | static HRESULT dialogLoadModel( AppState& appState )
  function HRESULT (line 21) | static HRESULT dialogTranscribe( AppState& appState )
  function HRESULT (line 27) | static HRESULT dialogCapture( AppState& appState )
  function wWinMain (line 42) | int __stdcall wWinMain( HINSTANCE hInstance, HINSTANCE hPrevInstance, LP...

FILE: Examples/main/main.cpp
  function HRESULT (line 13) | static HRESULT loadWhisperModel( const wchar_t* path, const std::wstring...
  function to_timestamp (line 33) | std::string to_timestamp( sTimeSpan ts, bool comma = false )
  function colorIndex (line 46) | static int colorIndex( const sToken& tok )
  function HRESULT (line 55) | HRESULT __cdecl newSegmentCallback( iContext* context, uint32_t n_new, v...
  function HRESULT (line 141) | HRESULT __cdecl beginSegmentCallback( iContext* context, void* user_data...
  function HRESULT (line 148) | HRESULT setupConsoleColors()
  function setPrompt (line 167) | static void __stdcall setPrompt( const int* ptr, int length, void* pv )
  function wmain (line 174) | int wmain( int argc, wchar_t* argv[] )

FILE: Examples/main/miscUtils.cpp
  function utf8 (line 5) | std::string utf8( const std::wstring& utf16 )
  function utf16 (line 13) | std::wstring utf16( const std::string& u8 )
  function wchar_t (line 23) | wchar_t* formatMessage( HRESULT hr )
  function printError (line 38) | void printError( const char* what, HRESULT hr )

FILE: Examples/main/params.cpp
  function whisper_print_usage (line 24) | void whisper_print_usage( int argc, wchar_t** argv, const whisper_params...
  function pfnListAdapter (line 58) | static void __stdcall pfnListAdapter( const wchar_t* name, void* )
  function listGpus (line 63) | static void listGpus()

FILE: Examples/main/textWriter.cpp
  function HRESULT (line 12) | HRESULT replaceExtension( CString& path, LPCTSTR inputPath, LPCTSTR ext )
  class Writer (line 24) | class Writer
    method HRESULT (line 31) | HRESULT write( Whisper::iContext* context, LPCTSTR audioPath, LPCTSTR ...
  function HRESULT (line 51) | HRESULT writeUtf8Bom( CAtlFile& file )
  function printTime (line 57) | void printTime( CStringA& rdi, Whisper::sTimeSpan time, bool comma = fal...
  function HRESULT (line 86) | HRESULT writeString( CAtlFile& file, const CStringA& line )
  class TextWriter (line 94) | class TextWriter : public Writer
    method HRESULT (line 98) | HRESULT impl( const Whisper::sSegment* const segments, const size_t le...
    method TextWriter (line 126) | TextWriter( bool tt ) : timestamps( tt ) { }
  class SubRipWriter (line 130) | class SubRipWriter : public Writer
    method HRESULT (line 132) | HRESULT impl( const Whisper::sSegment* const segments, const size_t le...
  class VttWriter (line 156) | class VttWriter : public Writer
    method HRESULT (line 158) | HRESULT impl( const Whisper::sSegment* const segments, const size_t le...
  function HRESULT (line 185) | HRESULT writeText( Whisper::iContext* context, LPCTSTR audioPath, bool t...
  function HRESULT (line 191) | HRESULT writeSubRip( Whisper::iContext* context, LPCTSTR audioPath )
  function HRESULT (line 197) | HRESULT writeWebVTT( Whisper::iContext* context, LPCTSTR audioPath )

FILE: Tools/CompressShaders/Cabinet.cs
  class Cabinet (line 13) | [Obsolete]
    type eCompressionAlgorithm (line 18) | enum eCompressionAlgorithm: uint
    method CreateCompressor (line 28) | [DllImport( "Cabinet.dll", SetLastError = true )]
    method CloseCompressor (line 31) | [DllImport( "Cabinet.dll", SetLastError = true )]
    method Compress (line 34) | [DllImport( "Cabinet.dll", SetLastError = true )]
    method compressBuffer (line 39) | public static byte[] compressBuffer( byte[] src )

FILE: Tools/CompressShaders/CompressShaders.cs
  type sShaderBinary (line 4) | record struct sShaderBinary
  class FoundShaders (line 19) | sealed class FoundShaders
    method FoundShaders (line 25) | public FoundShaders( IEnumerable<sShaderBinary> found )
    method findIndex (line 57) | int findIndex( string name, bool wave64 )
  class Program (line 71) | class Program
    method getSolutionRoot (line 73) | static string getSolutionRoot( [CallerFilePath] string? path = null )
    method shadersBinDir (line 87) | static string shadersBinDir( string root )
    method readShaders (line 92) | static IEnumerable<sShaderBinary> readShaders( string root )
    method writeHeader (line 99) | static void writeHeader( string root, IEnumerable<string> names )
    method writeCpp (line 124) | static void writeCpp( string root, IEnumerable<string> names )
    method writePayloadIDs (line 130) | static void writePayloadIDs( StreamWriter stream, string varName, int[...
    method writePayload (line 147) | static void writePayload( string root, FoundShaders shaders, out int c...
    method mainImpl (line 218) | static void mainImpl()
    method Main (line 231) | static int Main( string[] args )

FILE: Tools/CompressShaders/DetectFp64.cs
  class DetectFp64 (line 6) | static class DetectFp64
    type DXBCHeader (line 8) | struct DXBCHeader
    method usesFp64 (line 20) | public static bool usesFp64( ReadOnlySpan<byte> dxbc )

FILE: Tools/CompressShaders/LZ4.cs
  class LZ4 (line 7) | static class LZ4
    method compressBuffer (line 14) | public static byte[] compressBuffer( byte[] src )

FILE: Tools/CompressShaders/LanguageCodes.cs
  class LanguageCodes (line 6) | static class LanguageCodes
    type Row (line 8) | record struct Row
    method makeKey (line 16) | static uint makeKey( string str )
    method load (line 33) | static IEnumerable<Row> load( string path )
    method writeCpp (line 54) | static void writeCpp( string inl, Row[] data )
    method titleCase (line 64) | static string titleCase( this string name ) =>
    method writeCs (line 67) | static void writeCs( string cs, Row[] data )
    method produce (line 90) | static void produce( string tsv, string inl, string cs )
    method produce (line 97) | public static void produce( string solutionRoot )

FILE: Tools/CompressShaders/ShaderNames.cs
  class ShaderNames (line 1) | static class ShaderNames
    method write (line 3) | public static void write( string path, IEnumerable<string> names )

FILE: Tools/CompressTables/CompressTables.cs
  class Program (line 7) | internal class Program
    method getSolutionRoot (line 9) | static string getSolutionRoot( [CallerFilePath] string? path = null )
    method writeArray (line 17) | static void writeArray( byte[] compressed, string path )
    method Main (line 35) | static void Main( string[] args )

FILE: Tools/PerfSummary/LogParser.cs
  type eInputClip (line 6) | enum eInputClip: byte
  type eWhisperModel (line 11) | enum eWhisperModel: byte
  type LogName (line 17) | struct LogName
    method ToString (line 23) | public override string ToString() => $"{clip}-{model}-{gpu}";
    method tryParse (line 25) | public static LogName? tryParse( string path )
    method LogName (line 39) | LogName( string[] fields )
  type LogData (line 47) | record class LogData
  class LogParser (line 59) | static class LogParser
    method parse (line 61) | public static IEnumerable<LogData> parse( string folder )
    type eSection (line 72) | enum eSection: byte
    method tryParseSection (line 84) | static bool tryParseSection( ref eSection? section, string line )
    method tryParseTime (line 97) | static bool tryParseTime( ref double? val, string key, string line )
    method parseMemory (line 130) | static double parseMemory( Match m, int iv )
    method tryParseMemory (line 148) | static bool tryParseMemory( ref double? ram, ref double? vram, string ...
    method parseFile (line 158) | static LogData parseFile( in LogName name, string path )

FILE: Tools/PerfSummary/PerfSummary.cs
  class Program (line 5) | internal class Program
    method getSolutionRoot (line 7) | static string getSolutionRoot( [CallerFilePath] string? path = null )
    method Main (line 15) | static void Main( string[] args )

FILE: Tools/PerfSummary/Summary.cs
  class Summary (line 5) | static class Summary
    method print (line 7) | public static void print( LogData[] logs, string folder )
    method makeFields (line 25) | static IEnumerable<string> makeFields( this LogData log )
    method print (line 38) | static string print( this double v ) =>
    method relative (line 41) | static string relative( this LogData log )
    method clip (line 60) | static string clip( this LogData log )
    method gpu (line 70) | static string gpu( this LogData log )
    method fields (line 82) | static void fields( this StreamWriter writer, IEnumerable<string> fiel...

FILE: Tools/compareTraces/CommandLineArgs.cpp
  function printUsage (line 5) | static bool printUsage()

FILE: Tools/compareTraces/CommandLineArgs.h
  type CommandLineArgs (line 3) | struct CommandLineArgs

FILE: Tools/compareTraces/TraceReader.cpp
  function sTraceItem (line 5) | const sTraceItem& TraceReader::operator[]( size_t idx ) const
  function CStringA (line 12) | CStringA TraceReader::getName( const sTraceItem& item ) const
  function HRESULT (line 23) | HRESULT TraceReader::open( LPCTSTR path )

FILE: Tools/compareTraces/TraceReader.h
  function namespace (line 6) | namespace Tracing

FILE: Tools/compareTraces/compare.cpp
  function tensorDims (line 22) | inline int tensorDims( __m128i vec )
  function printSize (line 33) | int printSize( __m128i vec )
  class Comparer (line 59) | class Comparer
    method diffBuffers (line 64) | bool diffBuffers( size_t i, const sTraceItem& a, const sTraceItem& b, ...
    method diffTensors (line 87) | bool diffTensors( size_t i, const sTraceItem& a, const sTraceItem& b, ...
    method Comparer (line 134) | Comparer( TraceReader& t1, TraceReader& t2 ) :
    method compare (line 137) | bool compare( size_t i )
  class PrintSummary (line 169) | class PrintSummary : public Comparer
    method PrintSummary (line 175) | PrintSummary( TraceReader& a, TraceReader& b ) : Comparer( a, b ) { }
  class PrintDiff (line 196) | class PrintDiff : public Comparer
    method PrintDiff (line 201) | PrintDiff( TraceReader& a, TraceReader& b ) : Comparer( a, b ) { }
  function storeSize (line 220) | std::array<uint32_t, 4> storeSize( __m128i v )
  function storeStrides (line 227) | std::array<size_t, 4> storeStrides( __m128i v )
  function HRESULT (line 302) | HRESULT compareTraces( const CommandLineArgs& arguments )

FILE: Tools/compareTraces/compareTraces.cpp
  function wmain (line 6) | int wmain( int argc, wchar_t* argv[] )

FILE: Tools/compareTraces/stdafx.cpp
  function wchar_t (line 5) | wchar_t* formatMessage( HRESULT hr )
  function printError (line 20) | void printError( HRESULT hr )

FILE: Tools/compareTraces/stdafx.h
  function __m128i (line 18) | inline __m128i load16( const int* rsi )
  function __m128i (line 22) | inline __m128i load16( const uint32_t* rsi )
  function __m128i (line 26) | inline __m128i load( const std::array<uint32_t, 4>& arr )
  function vectorEqual (line 31) | inline bool vectorEqual( __m128i a, __m128i b )
  function wchar_t (line 40) | inline const wchar_t* cstr( const CString& s ) { return s; }

FILE: Tools/compareTraces/testUtils.cpp
  function __m256 (line 10) | __forceinline __m256 load( const float* rsi )
  function __m256 (line 15) | __forceinline __m256 load( const uint16_t* rsi )
  function loadPartial (line 21) | __forceinline void loadPartial( const uint16_t* x, const uint16_t* y, si...
  function __m128 (line 73) | inline __m128 loadFloat2( const float* rsi )
  function __m128 (line 77) | inline __m128 loadFloat3( const float* rsi )
  function loadPartial (line 83) | __forceinline void loadPartial( const float* x, const float* y, size_t c...
  function horizontalMaximum (line 133) | __forceinline float horizontalMaximum( __m256 v )
  function horizontalSum (line 142) | __forceinline double horizontalSum( __m256 v )
  function __m256 (line 153) | __m256 maskInfNan( __m256 diff, __m256 a, __m256 b )
  class DiffAcc (line 162) | class DiffAcc
    method add (line 169) | __forceinline void add( __m256 a, __m256 b )
    method sTensorDiff (line 179) | __forceinline sTensorDiff reduce( size_t count )
  function sTensorDiff (line 190) | static sTensorDiff __declspec( noinline ) diffVectors( const E* a, const...
  function sTensorDiff (line 211) | sTensorDiff DirectCompute::computeDiff( const float* a, const float* b, ...
  function sTensorDiff (line 216) | sTensorDiff DirectCompute::computeDiff( const uint16_t* a, const uint16_...

FILE: Whisper/API/MfStructs.h
  function namespace (line 3) | namespace Whisper

FILE: Whisper/API/SpecialTokens.h
  function namespace (line 3) | namespace Whisper

FILE: Whisper/API/TranscribeStructs.h
  function namespace (line 5) | namespace Whisper
  type sTimeInterval (line 50) | struct sTimeInterval
  type sSegment (line 56) | struct sSegment
  type eTokenFlags (line 66) | enum eTokenFlags : uint32_t
  type sToken (line 77) | struct sToken
  type sTranscribeLength (line 98) | struct sTranscribeLength
  function eResultFlags (line 103) | enum struct eResultFlags : uint32_t
  function eSpeakerChannel (line 128) | enum struct eSpeakerChannel : uint8_t

FILE: Whisper/API/iContext.cl.h
  function namespace (line 10) | namespace Whisper

FILE: Whisper/API/iContext.h
  function namespace (line 9) | namespace Whisper

FILE: Whisper/API/iMediaFoundation.cl.h
  type IMFSourceReader (line 5) | struct IMFSourceReader
  function namespace (line 7) | namespace Whisper

FILE: Whisper/API/iMediaFoundation.h
  type IMFSourceReader (line 4) | struct IMFSourceReader
  function namespace (line 6) | namespace Whisper

FILE: Whisper/API/iTranscribeResult.cl.h
  function namespace (line 5) | namespace Whisper

FILE: Whisper/API/iTranscribeResult.h
  function namespace (line 4) | namespace Whisper

FILE: Whisper/API/loggerApi.h
  type struct (line 7) | enum struct
  function eLoggerFlags (line 14) | enum struct eLoggerFlags : uint8_t

FILE: Whisper/API/sFullParams.h
  function namespace (line 5) | namespace Whisper

FILE: Whisper/API/sLanguageList.h
  function namespace (line 4) | namespace Whisper

FILE: Whisper/API/sLoadModelCallbacks.h
  function namespace (line 3) | namespace Whisper

FILE: Whisper/API/sModelSetup.h
  type struct (line 6) | enum struct
  function eGpuModelFlags (line 20) | enum struct eGpuModelFlags : uint32_t

FILE: Whisper/CPU/BufferAllocator.cpp
  function HRESULT (line 7) | HRESULT BufferAllocator::create( size_t cb )
  function roundUpAlloc (line 19) | __forceinline size_t roundUpAlloc( size_t cb )
  function roundUpVirtualAlloc (line 55) | __forceinline size_t roundUpVirtualAlloc( size_t cb )
  function HRESULT (line 63) | HRESULT VirtualAllocator::create( size_t cb )

FILE: Whisper/CPU/BufferAllocator.h
  function namespace (line 5) | namespace CpuCompute

FILE: Whisper/CPU/DecoderTensors.cpp
  class CompatContext (line 8) | class CompatContext
    method CompatContext (line 14) | CompatContext( std::vector<ggml_tensor>& dest, size_t layers ) :
    method add (line 23) | void add( const Tensor& rsi, ggml_tensor*& res )
    method add2 (line 31) | void add2( const TensorPair& rsi, ggml_tensor*& w, ggml_tensor*& b )
    method isComplete (line 37) | bool isComplete() const

FILE: Whisper/CPU/DecoderTensors.h
  function namespace (line 9) | namespace CpuCompute

FILE: Whisper/CPU/HybridLoader.cpp
  function populateDecodeTensorsMap (line 6) | static void populateDecodeTensorsMap( CAtlMap<CStringA, Tensor*>& map, i...
  function HRESULT (line 61) | HRESULT HybridLoader::setupTensor( const CStringA& name, int n_dims, int...
  function HRESULT (line 105) | HRESULT HybridLoader::completeLoad( ComLight::iReadStream* stream, iLoad...

FILE: Whisper/CPU/HybridLoader.h
  function namespace (line 7) | namespace CpuCompute

FILE: Whisper/CPU/KvTensors.h
  function namespace (line 6) | namespace CpuCompute

FILE: Whisper/CPU/KvTensorsCpu.cpp
  function HRESULT (line 6) | HRESULT KvTensors::create( const Whisper::sModelParams& mp )

FILE: Whisper/CPU/LargeBuffer.cpp
  function HRESULT (line 13) | HRESULT LargeBuffer::allocate( size_t cb )
  function HRESULT (line 23) | HRESULT LargeBuffer::setReadOnly( size_t cb )

FILE: Whisper/CPU/LargeBuffer.h
  function namespace (line 3) | namespace CpuCompute

FILE: Whisper/CPU/MlContext.h
  function namespace (line 5) | namespace CpuCompute

FILE: Whisper/CPU/MlContextCpu.cpp
  function Tensor (line 11) | Tensor MlContext::createTensor( eDataType type, const std::array<uint32_...
  function Tensor (line 18) | Tensor MlContext::createTensor( eDataType type, std::initializer_list<ui...
  function Tensor (line 41) | Tensor MlContext::addRows( const Tensor& d_te, const Tensor& d_pe, const...
  class DispatchHelper3 (line 66) | class DispatchHelper3
    method DispatchHelper3 (line 71) | DispatchHelper3() = default;
    method DispatchHelper3 (line 72) | DispatchHelper3( uint32_t x, uint32_t y, uint32_t z )
    method groupsCount (line 79) | size_t groupsCount() const
    method unpack (line 86) | std::array<uint32_t, 3> unpack( size_t idx ) const
    method next (line 96) | void next( std::array<uint32_t, 3>& i ) const
  type NormContext (line 119) | struct NormContext : public iComputeRange
    method HRESULT (line 127) | HRESULT __stdcall compute( size_t i, size_t end ) const override final
  function Tensor (line 143) | Tensor MlContext::norm( const Tensor& arg )
  function Tensor (line 192) | Tensor MlContext::mulMat( const Tensor& a, const Tensor& b )
  type SoftMaxContext (line 305) | struct SoftMaxContext : public iComputeRange
    method HRESULT (line 311) | HRESULT __stdcall compute( size_t i, size_t end ) const override final
  function copyElement (line 333) | __forceinline void copyElement( R* rdi, const S* rsi )
  function copyRow (line 354) | __forceinline void copyRow( R* rdi, const S* rsi, size_t length )
  function copyImpl (line 371) | static void __declspec( noinline ) copyImpl( R* rdi, const S* rsi, const...
  function HRESULT (line 406) | HRESULT MlContext::copyImpl( Tensor& result, const Tensor& source )
  function Tensor (line 460) | Tensor MlContext::copy( const Tensor& a, eDataType type, std::initialize...
  function Tensor (line 495) | Tensor MlContext::permute( const Tensor& a, uint8_t axis0, uint8_t axis1...
  function Tensor (line 560) | Tensor MlContext::add( const Tensor& a, const Tensor& b )

FILE: Whisper/CPU/ParallelForRunner.cpp
  function HRESULT (line 20) | HRESULT ParallelForRunner::setThreadsCount( int threads )
  function HRESULT (line 115) | HRESULT ParallelForRunner::parallelFor( iComputeRange& compute, size_t l...

FILE: Whisper/CPU/ParallelForRunner.h
  function namespace (line 4) | namespace CpuCompute

FILE: Whisper/CPU/Tensor.h
  function namespace (line 15) | namespace CpuCompute

FILE: Whisper/CPU/TensorCpu.cpp
  class sTensorMemoryHeader (line 10) | class alignas( 32 ) sTensorMemoryHeader
    method reset (line 15) | void reset( ptrdiff_t rc )
    method increment (line 20) | void increment()
    method decrement (line 25) | bool decrement()
  function sTensorMemoryHeader (line 33) | inline sTensorMemoryHeader* getMemBlockHeader( void* pv )
    method reset (line 15) | void reset( ptrdiff_t rc )
    method increment (line 20) | void increment()
    method decrement (line 25) | bool decrement()
  function releaseBlock (line 41) | inline void releaseBlock( sTensorMemoryHeader* pointer )
  function HRESULT (line 149) | HRESULT Tensor::create( eDataType type, const std::array<uint32_t, 4>& s...
  function HRESULT (line 189) | static HRESULT arrayFromList( std::array<uint32_t, 4>& arr, std::initial...
  function HRESULT (line 210) | HRESULT Tensor::create( eDataType type, std::initializer_list<uint32_t> ...
  function Tensor (line 248) | Tensor Tensor::fromData( void* pointer, eDataType type, uint32_t length )
  function HRESULT (line 263) | HRESULT Tensor::attach( void* pointer, eDataType type, std::initializer_...
  function Tensor (line 285) | Tensor Tensor::reshape3d( uint32_t ne0, uint32_t ne1, uint32_t ne2 ) const
  function isAlignedProperly (line 302) | bool isAlignedProperly( __m128i r0, __m128i r1, __m128i mask )
  function ggml_tensor (line 349) | ggml_tensor Tensor::ggml() const

FILE: Whisper/CPU/mulMat.cpp
  function HRESULT (line 9) | static HRESULT mulMatImpl( Tensor& result, const Tensor& a, const Tensor...
  function HRESULT (line 16) | HRESULT CpuCompute::mulMat( Tensor& result, const Tensor& a, const Tenso...

FILE: Whisper/CPU/mulMat.h
  function namespace (line 5) | namespace CpuCompute

FILE: Whisper/CPU/mulMat.kernel.hpp
  type ResultTile (line 7) | struct ResultTile
    method fmadd (line 13) | __forceinline void fmadd( __m256 a, __m256 b )
    method kernelPartial (line 18) | __forceinline void kernelPartial( const std::array<__m256, panelHeight...
  function setZero (line 26) | __forceinline void setZero( std::array<__m256, 1>& dest )
  function setZero (line 30) | __forceinline void setZero( std::array<__m256, 2>& dest )
  function setZero (line 35) | __forceinline void setZero( std::array<__m256, 3>& dest )
  function setZero (line 41) | __forceinline void setZero( std::array<__m256, 4>& dest )
  function setZero (line 48) | __forceinline void setZero( std::array<__m256, 6>& dest )
  function setZero (line 57) | __forceinline void setZero( std::array<__m256, 8>& dest )
  function __m256 (line 239) | __forceinline __m256 loadUpcasted( const uint16_t* rsi )
  function loadPanel (line 248) | __forceinline void loadPanel( const uint16_t* rsi, std::array<__m256, 1>...
  function loadPanel (line 252) | __forceinline void loadPanel( const uint16_t* rsi, std::array<__m256, 2>...
  function loadPanel (line 257) | __forceinline void loadPanel( const uint16_t* rsi, std::array<__m256, 3>...
  function loadPanel (line 263) | __forceinline void loadPanel( const uint16_t* rsi, std::array<__m256, 4>...

FILE: Whisper/CPU/mulMatImpl.avx2.cpp
  function __m256i (line 14) | __forceinline __m256i load( const void* rsi )
  function storeLow (line 48) | __forceinline void storeLow( void* rdi, __m256i v )
  function storeHigh (line 67) | __forceinline void storeHigh( void* rdi, __m256i v )
  function prefetch (line 86) | __forceinline void prefetch( const uint8_t* p )
  function transpose8Avx2 (line 91) | __forceinline void transpose8Avx2( uint16_t* rdiWords, size_t w, const u...
  function transpose8PartialAvx2 (line 197) | __forceinline void transpose8PartialAvx2( uint16_t* rdiWords, size_t w, ...
  function HRESULT (line 317) | HRESULT MulMatBase::transposePanelAvx2( uint16_t* rdi, size_t i, size_t ...

FILE: Whisper/CPU/mulMatImpl.cpp
  function checkAvx2Support (line 17) | bool checkAvx2Support()
  function divRoundUp (line 25) | inline uint32_t divRoundUp( uint32_t a, uint32_t b )
  function HRESULT (line 96) | HRESULT MulMatBase::run( ParallelForRunner& pfor )
  function HRESULT (line 112) | HRESULT __stdcall MulMatImpl<panelHeightRegs, tileWidthFloats>::compute(...
  class MulMatImpl<4, 1> (line 206) | class MulMatImpl<4, 1>
  class MulMatImpl<1, 1> (line 207) | class MulMatImpl<1, 1>
  class MulMatImpl<4, 2> (line 208) | class MulMatImpl<4, 2>
  class MulMatImpl<1, 2> (line 209) | class MulMatImpl<1, 2>
  class MulMatImpl<2, 3> (line 210) | class MulMatImpl<2, 3>
  class MulMatImpl<1, 3> (line 211) | class MulMatImpl<1, 3>
  class MulMatImpl<2, 4> (line 212) | class MulMatImpl<2, 4>
  class MulMatImpl<1, 4> (line 213) | class MulMatImpl<1, 4>

FILE: Whisper/CPU/mulMatImpl.h
  function namespace (line 10) | namespace CpuCompute

FILE: Whisper/CPU/mulMatImpl.panel.cpp
  function HRESULT (line 8) | HRESULT MulMatBase::transposePanel( uint16_t* rdi, size_t i, size_t m2, ...
  function HRESULT (line 64) | HRESULT MulMatBase::copyPanelColumnMajor8( uint16_t* rdi, size_t i, size...
  function __m128i (line 90) | __forceinline __m128i load8Partial( const uint16_t* x, size_t len )
  function __m256i (line 126) | __forceinline __m256i load16Partial( const uint16_t* rsi, size_t len )
  function HRESULT (line 148) | HRESULT MulMatBase::copyPanelColumnMajor16( uint16_t* rdi, size_t i, siz...
  function HRESULT (line 183) | HRESULT MulMatBase::copyPanelColumnMajor32( uint16_t* rdi, size_t i, siz...
  function HRESULT (line 240) | HRESULT MulMatBase::gatherPanel( uint16_t* rdi, size_t i, size_t m2, siz...

FILE: Whisper/CPU/mulMatUtils.hpp
  function __m128i (line 6) | __forceinline __m128i f16Load( const uint16_t* rsi )
  function transpose8 (line 13) | __forceinline void transpose8( uint16_t* rdi, size_t w, const uint16_t* ...
  function transpose8Partial (line 93) | inline void transpose8Partial( uint16_t* rdi, size_t w, size_t h, const ...
  function copyColumnMajor (line 199) | __forceinline void copyColumnMajor( uint16_t* rdi, size_t w, const uint1...
  function __m128i (line 230) | __forceinline __m128i loadPartial( const uint16_t* x, size_t count )
  function copyColumnMajorPartial (line 268) | inline void copyColumnMajorPartial( uint16_t* rdi, size_t w, size_t h, c...
  function zeroAlignedMemory (line 285) | __forceinline void zeroAlignedMemory( void* pv, size_t cb )

FILE: Whisper/CPU/simdUtils.cpp
  function __m256 (line 11) | __forceinline __m256 load8( const uint16_t* rsi )
  function loadPartial (line 17) | __forceinline void loadPartial( const uint16_t* x, const uint16_t* y, si...
  function __m256 (line 71) | __forceinline __m256 loadPartial( const uint16_t* x, size_t count )
  function __m128 (line 109) | __forceinline __m128 loadFloat2( const float* rsi )
  function __m128 (line 113) | __forceinline __m128 loadFloat3( const float* rsi )
  function __m256 (line 120) | __forceinline __m256 loadPartial( const float* rsi, size_t count )
  function storeFloat2 (line 155) | __forceinline void storeFloat2( float* rdi, __m128 vec )
  function storePartial (line 160) | __forceinline void storePartial( float* rdi, __m256 vec, size_t count )
  function addF16to32 (line 191) | void addF16to32( float* rdi, const uint16_t* a, const uint16_t* b, size_...
  function addF16to32 (line 213) | void addF16to32( float* rdi, const uint16_t* a, const float* b, size_t l...
  function horizontalSum (line 243) | __forceinline float horizontalSum( __m256 vec )
  function norm (line 253) | void norm( float* rdi, float* temp, const float* rsi, size_t length )
  function fmaRepeatRow (line 319) | void fmaRepeatRow( float* rdi, size_t len, const float* w, const float* ...
  function addRepeatScaleRow (line 369) | void __vectorcall addRepeatScaleRow( float* rdi, size_t len, const float...
  function addRepeatRow (line 422) | void addRepeatRow( float* rdi, size_t len, const float* b, size_t lenPat...
  function __m256 (line 473) | __forceinline __m256 gelu( __m256 x, const DirectCompute::LookupTablesDa...
  function addRepeatGeluRow (line 485) | void addRepeatGeluRow( float* rdi, size_t len, const float* b, size_t le...
  function scaleRow (line 538) | void __vectorcall scaleRow( float* rdi, size_t len, const __m256 scale )
  function horizontalMax (line 561) | __forceinline float horizontalMax( __m256 vec )
  function _cvtsh_ss (line 570) | __forceinline float _cvtsh_ss( uint16_t f16 )
  function _cvtss_sh (line 577) | __forceinline uint16_t _cvtss_sh( float f, int rounding )
  function LookupTablesData (line 586) | const LookupTablesData& getLookupTables()
  function softMax (line 592) | void softMax( float* rdi, size_t length, const float inputScale )
  function floatsUpcast (line 655) | void floatsUpcast( float* rdi, const uint16_t* rsi, size_t length )
  function floatsDowncast (line 670) | void floatsDowncast( uint16_t* rdi, const float* rsi, size_t length )
  function addRowInPlace (line 694) | void addRowInPlace( float* rdi, const float* rsi, size_t length )
  function addRow (line 717) | void addRow( float* rdi, const float* a, const float* b, size_t length )

FILE: Whisper/CPU/simdUtils.h
  function class (line 7) | class AlignedSpan
  function tempBufferForFloats (line 23) | inline size_t tempBufferForFloats( size_t count )
  function namespace (line 42) | namespace DirectCompute
  function __m256 (line 55) | __forceinline __m256 loadTailMaskFloats( size_t remainder )

FILE: Whisper/D3D/Binder.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/D3D/MappedResource.cpp
  function HRESULT (line 13) | HRESULT MappedResource::map( ID3D11Resource* res, bool reading )

FILE: Whisper/D3D/MappedResource.h
  function namespace (line 5) | namespace DirectCompute

FILE: Whisper/D3D/RenderDoc/renderDoc.cpp
  function isKeyPressed (line 35) | inline bool isKeyPressed( int vKey )

FILE: Whisper/D3D/RenderDoc/renderDoc.h
  function namespace (line 3) | namespace DirectCompute

FILE: Whisper/D3D/RenderDoc/renderdoc_app.h
  type RENDERDOC_CaptureOption (line 75) | typedef enum RENDERDOC_CaptureOption {
  type RENDERDOC_InputButton (line 236) | typedef enum RENDERDOC_InputButton {
  type RENDERDOC_OverlayBits (line 324) | typedef enum RENDERDOC_OverlayBits {
  type pRENDERDOC_RemoveHooks (line 363) | typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown;
  type pRENDERDOC_SetCaptureFilePathTemplate (line 395) | typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathT...
  type pRENDERDOC_GetCaptureFilePathTemplate (line 396) | typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathT...
  type pRENDERDOC_IsTargetControlConnected (line 436) | typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessCon...
  type RENDERDOC_Version (line 560) | typedef enum RENDERDOC_Version {
  type RENDERDOC_API_1_6_0 (line 605) | typedef struct RENDERDOC_API_1_6_0
  type RENDERDOC_API_1_6_0 (line 685) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0;
  type RENDERDOC_API_1_6_0 (line 686) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1;
  type RENDERDOC_API_1_6_0 (line 687) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2;
  type RENDERDOC_API_1_6_0 (line 688) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0;
  type RENDERDOC_API_1_6_0 (line 689) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1;
  type RENDERDOC_API_1_6_0 (line 690) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2;
  type RENDERDOC_API_1_6_0 (line 691) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0;
  type RENDERDOC_API_1_6_0 (line 692) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0;
  type RENDERDOC_API_1_6_0 (line 693) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0;
  type RENDERDOC_API_1_6_0 (line 694) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1;
  type RENDERDOC_API_1_6_0 (line 695) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2;
  type RENDERDOC_API_1_6_0 (line 696) | typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0;

FILE: Whisper/D3D/createBuffer.cpp
  function HRESULT (line 6) | HRESULT DirectCompute::createBuffer( eBufferUse use, size_t totalBytes, ...

FILE: Whisper/D3D/createBuffer.h
  function namespace (line 5) | namespace DirectCompute

FILE: Whisper/D3D/createDevice.cpp
  function HRESULT (line 8) | HRESULT DirectCompute::createDevice( const std::wstring& adapterName, ID...
  function HRESULT (line 38) | HRESULT DirectCompute::cloneDevice( ID3D11Device* source, ID3D11Device**...
  function merge3 (line 63) | inline bool merge3( uint32_t flags, eGpuModelFlags enabled, eGpuModelFla...
  function HRESULT (line 73) | HRESULT DirectCompute::validateFlags( uint32_t flags )
  function HRESULT (line 91) | HRESULT DirectCompute::queryDeviceInfo( sGpuInfo& rdi, ID3D11Device* dev...

FILE: Whisper/D3D/createDevice.h
  function namespace (line 6) | namespace DirectCompute

FILE: Whisper/D3D/device.h
  function namespace (line 6) | namespace DirectCompute

FILE: Whisper/D3D/downloadBuffer.cpp
  type BufferInfo (line 8) | struct BufferInfo
    method HRESULT (line 14) | HRESULT create( ID3D11ShaderResourceView* srv )
    method HRESULT (line 28) | HRESULT download( void* rdi )
  function dxgiSizeof (line 46) | size_t dxgiSizeof( DXGI_FORMAT fmt )
  function HRESULT (line 58) | HRESULT DirectCompute::downloadBuffer( ID3D11ShaderResourceView* srv, st...

FILE: Whisper/D3D/downloadBuffer.h
  function namespace (line 3) | namespace DirectCompute

FILE: Whisper/D3D/enums.cpp
  function DXGI_FORMAT (line 6) | DXGI_FORMAT DirectCompute::viewFormat( eDataType dt )

FILE: Whisper/D3D/enums.h
  type struct (line 7) | enum struct
  function elementSize (line 14) | inline size_t elementSize( eDataType dt )
  function eBufferUse (line 23) | enum struct eBufferUse : uint8_t

FILE: Whisper/D3D/listGPUs.cpp
  type DirectCompute (line 7) | namespace DirectCompute
    function HRESULT (line 9) | static HRESULT createFactory( CComPtr<IDXGIFactory1>& rdi )
    function setName (line 17) | inline void setName( std::wstring& rdi, const DXGI_ADAPTER_DESC1& desc )
    function parseGpuIndex (line 25) | std::optional<uint32_t> parseGpuIndex( const std::wstring& requestedNa...
    function selectAdapter (line 48) | CComPtr<IDXGIAdapter1> selectAdapter( const std::wstring& requestedName )
  function HRESULT (line 110) | HRESULT COMLIGHTCALL Whisper::listGPUs( pfnListAdapters pfn, void* pv )

FILE: Whisper/D3D/listGPUs.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/D3D/sGpuInfo.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/D3D/shaderNames.h
  function namespace (line 5) | namespace DirectCompute

FILE: Whisper/D3D/shaders.cpp
  function HRESULT (line 17) | HRESULT DirectCompute::createComputeShaders( std::vector<CComPtr<ID3D11C...

FILE: Whisper/D3D/shaders.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/DllMain.cpp
  function BOOL (line 3) | BOOL __stdcall DllMain( HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvR...

FILE: Whisper/Hybrid/HybridContext.cpp
  function threadsCount (line 14) | int threadsCount( int t )
  type eModelType (line 42) | enum struct eModelType : uint8_t
  function HRESULT (line 51) | static HRESULT detectModelType( const Whisper::sModelParams& modelParams...
  type RamMB (line 75) | struct alignas( 2 ) RamMB
    method RamMB (line 78) | constexpr RamMB( uint8_t d, uint8_t dl ) : dec( d ), decLayer( dl ) { }
    method __m128i (line 80) | __m128i loadBytes() const
  function HRESULT (line 103) | HRESULT HybridContext::create()
  class HybridContext::SetAllocatorRaii (line 124) | class HybridContext::SetAllocatorRaii
    method SetAllocatorRaii (line 131) | SetAllocatorRaii( HybridContext* owner, CpuCompute::iArenaAllocator& a...
  function HRESULT (line 144) | HRESULT HybridContext::decode( const int* tokens, const int n_tokens, co...

FILE: Whisper/Hybrid/HybridContext.h
  function class (line 9) | class HybridContext

FILE: Whisper/Hybrid/KeyValueDownloader.cpp
  function HRESULT (line 4) | HRESULT KeyValueDownloader::create( const Whisper::sModelParams& mp )
  function HRESULT (line 19) | HRESULT KeyValueDownloader::download( const DirectCompute::KeyValueBuffe...

FILE: Whisper/Hybrid/KeyValueDownloader.h
  function class (line 7) | class KeyValueDownloader

FILE: Whisper/MF/AudioBuffer.h
  function namespace (line 4) | namespace Whisper

FILE: Whisper/MF/AudioCapture.cpp
  type Strings (line 13) | struct Strings
  function HRESULT (line 18) | HRESULT getAllocString( IMFActivate* activate, const GUID& id, CString& ...
  function HRESULT (line 29) | HRESULT getInfo( IMFActivate* activate, Strings& rdi )
  function HRESULT (line 36) | HRESULT __stdcall supplyDevices( Whisper::pfnFoundCaptureDevices pfn, vo...
  function HRESULT (line 74) | HRESULT __stdcall Whisper::captureDeviceList( pfnFoundCaptureDevices pfn...
  class Capture (line 102) | class Capture : public ComLight::ObjectRoot<iAudioCapture>
    method getReader (line 108) | getReader( IMFSourceReader** pp ) const noexcept override final
    method getParams (line 116) | getParams() const noexcept override final
  function HRESULT (line 124) | HRESULT Capture::open( iMediaFoundation* owner, const wchar_t* endpoint,...
  function HRESULT (line 156) | HRESULT __stdcall Whisper::captureOpen( iMediaFoundation* owner, const w...

FILE: Whisper/MF/AudioCapture.h
  function namespace (line 4) | namespace Whisper

FILE: Whisper/MF/MediaFoundation.cpp
  type Whisper (line 13) | namespace Whisper
    class AudioReader (line 15) | class AudioReader : public ComLight::ObjectRoot<iAudioReader>
      method getReader (line 22) | getReader( IMFSourceReader** pp ) const noexcept override final
      method requestedStereo (line 30) | requestedStereo() const noexcept override final
      method getDuration (line 34) | getDuration( int64_t& rdi ) const noexcept override final
      method HRESULT (line 50) | HRESULT open( iMediaFoundation* owner, LPCTSTR path, bool stereo )
      method HRESULT (line 64) | HRESULT open( iMediaFoundation* owner, IMFByteStream* stream, bool s...
      method setPreciseSamplesCount (line 77) | void setPreciseSamplesCount( int64_t count ) const
    function setPreciseSamplesCount (line 83) | void setPreciseSamplesCount( const iAudioReader* ar, int64_t count )
    class MediaFoundation (line 89) | class MediaFoundation : public ComLight::ObjectRoot<iMediaFoundation>
      method loadAudioFile (line 94) | loadAudioFile( LPCTSTR path, bool stereo, iAudioBuffer** pp ) const ...
      method openAudioFile (line 98) | openAudioFile( LPCTSTR path, bool stereo, iAudioReader** pp ) noexce...
      method loadAudioFileData (line 110) | loadAudioFileData( const void* data, uint64_t size, bool stereo, iAu...
      method listCaptureDevices (line 139) | listCaptureDevices( pfnFoundCaptureDevices pfn, void* pv ) noexcept ...
      method openCaptureDevice (line 143) | openCaptureDevice( LPCTSTR endpoint, const sCaptureParams& capturePa...
      method HRESULT (line 149) | HRESULT FinalConstruct()
  function HRESULT (line 165) | HRESULT COMLIGHTCALL Whisper::initMediaFoundation( iMediaFoundation** pp )

FILE: Whisper/MF/PcmReader.cpp
  type Whisper (line 7) | namespace Whisper
    function __interface (line 9) | __interface iSampleHandler
  function copyMono (line 23) | __forceinline void copyMono( PcmMonoChunk* rdi, const AudioBuffer& rsi, ...
  function copyStereo (line 31) | __forceinline void copyStereo( PcmStereoChunk* rdi, const AudioBuffer& r...
  type HandlerMono (line 38) | struct HandlerMono : iSampleHandler
    method appendPcm (line 40) | void appendPcm( AudioBuffer& rdi, const float* rsi, size_t countFloats...
    method copyChunk (line 44) | void copyChunk( PcmMonoChunk* pMono, const AudioBuffer& rsi, size_t so...
    method copyChunk (line 48) | void copyChunk( PcmMonoChunk* pMono, const AudioBuffer& rsi, size_t so...
    method moveBufferData (line 52) | void moveBufferData( AudioBuffer& rdi, size_t amount ) const override ...
    method readerChannelsCount (line 65) | uint32_t readerChannelsCount() const override { return 1; }
  type HandlerDownmixedStereo (line 67) | struct HandlerDownmixedStereo : HandlerMono
    method appendPcm (line 69) | void appendPcm( AudioBuffer& rdi, const float* rsi, size_t countFloats...
    method readerChannelsCount (line 73) | uint32_t readerChannelsCount() const override final { return 2; }
  type HandlerStereo (line 75) | struct HandlerStereo : iSampleHandler
    method appendPcm (line 77) | void appendPcm( AudioBuffer& rdi, const float* rsi, size_t countFloats...
    method copyChunk (line 81) | void copyChunk( PcmMonoChunk* pMono, const AudioBuffer& rsi, size_t so...
    method copyChunk (line 86) | void copyChunk( PcmMonoChunk* pMono, const AudioBuffer& rsi, size_t so...
    method moveBufferData (line 91) | void moveBufferData( AudioBuffer& rdi, size_t amount ) const override ...
    method readerChannelsCount (line 109) | uint32_t readerChannelsCount() const override final { return 2; }
  function __m128i (line 115) | __forceinline __m128i load( const GUID& guid )
  function HRESULT (line 121) | HRESULT getDecoderInputSubtype( IMFSourceReader* reader, __m128i& rdi )
  function HRESULT (line 154) | HRESULT isMp3Decoder( IMFSourceReader* reader )
  function HRESULT (line 165) | HRESULT getPreciseDuration( IMFSourceReader* reader, size_t& length, boo...
  function HRESULT (line 247) | HRESULT getDuration( IMFSourceReader* reader, size_t& length, bool mono,...
  function HRESULT (line 313) | HRESULT PcmReader::readNextSample()
  function HRESULT (line 393) | HRESULT PcmReader::readChunk( PcmMonoChunk& mono, PcmStereoChunk* stereo )

FILE: Whisper/MF/PcmReader.h
  function namespace (line 8) | namespace Whisper

FILE: Whisper/MF/loadAudioFile.cpp
  type Whisper (line 12) | namespace Whisper
    class MediaFileBuffer (line 14) | class MediaFileBuffer : public ComLight::ObjectRoot<iAudioBuffer>
      method countSamples (line 19) | countSamples() const noexcept override final
      method getPcmMono (line 23) | getPcmMono() const noexcept override final
      method getPcmStereo (line 29) | getPcmStereo() const noexcept override final
      method getTime (line 35) | getTime( int64_t& rdi ) const noexcept override final
    function HRESULT (line 44) | HRESULT MediaFileBuffer::load( LPCTSTR path, bool stereo )
  function HRESULT (line 141) | HRESULT COMLIGHTCALL Whisper::loadAudioFile( LPCTSTR path, bool stereo, ...

FILE: Whisper/MF/loadAudioFile.h
  function namespace (line 4) | namespace Whisper

FILE: Whisper/MF/mfStartup.cpp
  type sCoInitStatus (line 9) | struct sCoInitStatus
  function HRESULT (line 21) | static HRESULT coInitialize()
  function coUninitialize (line 51) | static void coUninitialize()
  function HRESULT (line 93) | HRESULT MfStartupRaii::startup()

FILE: Whisper/MF/mfStartup.h
  function namespace (line 3) | namespace Whisper

FILE: Whisper/MF/mfUtils.cpp
  function HRESULT (line 5) | HRESULT Whisper::createMediaType( bool stereo, IMFMediaType** pp )
  function HRESULT (line 28) | HRESULT Whisper::getStreamDuration( IMFSourceReader* reader, int64_t& du...
  function HRESULT (line 45) | HRESULT Whisper::validateCurrentMediaType( IMFSourceReader* reader, uint...

FILE: Whisper/MF/mfUtils.h
  function namespace (line 8) | namespace Whisper

FILE: Whisper/ML/ConstantBuffer.cpp
  function HRESULT (line 6) | HRESULT ConstantBuffer::create()
  function copy32 (line 18) | __forceinline void copy32( __m128i* rdi, const TensorShape& ts )
  function HRESULT (line 25) | HRESULT ConstantBuffer::update( const TensorShape& t0 )
  function HRESULT (line 35) | HRESULT ConstantBuffer::update( const TensorShape& t0, const TensorShape...
  function HRESULT (line 46) | HRESULT ConstantBuffer::update( const TensorShape& t0, const TensorShape...

FILE: Whisper/ML/ConstantBuffer.h
  function namespace (line 5) | namespace DirectCompute

FILE: Whisper/ML/Context.ops.cpp
  function Tensor (line 6) | Tensor MlContext::createTensor( eDataType type, const std::array<uint32_...
  function Tensor (line 13) | Tensor MlContext::createTensor( eDataType type, std::initializer_list<ui...
  function Tensor (line 26) | Tensor MlContext::conv_1d_1s( const Tensor& a, const Tensor& b )
  function Tensor (line 38) | Tensor MlContext::conv_1d_2s( const Tensor& a, const Tensor& b )
  function canRepeat (line 55) | inline bool canRepeat( const TensorShape& t0, const TensorShape& t1 )
  function Tensor (line 64) | Tensor MlContext::cwiseBinary( const Tensor& a, const Tensor& b, eComput...
  function Tensor (line 72) | Tensor __declspec( noinline ) MlContext::view2d( const Tensor& a, uint32...
  function Tensor (line 85) | Tensor MlContext::transpose( const Tensor& a )
  function Tensor (line 104) | Tensor MlContext::norm( const Tensor& a )
  function Tensor (line 111) | Tensor MlContext::mulMat( const Tensor& a, const Tensor& b )
  function Tensor (line 134) | Tensor MlContext::mulMatEx( const Tensor& a, const Tensor& b, const char...
  function Tensor (line 167) | Tensor MlContext::permute( const Tensor& a, uint8_t axis0, uint8_t axis1...
  function Tensor (line 194) | Tensor MlContext::flashAttention( const Tensor& q, const Tensor& k, cons...
  function Tensor (line 229) | Tensor MlContext::copy( const Tensor& a, eDataType type, std::initialize...

FILE: Whisper/ML/DbgNanTest.cpp
  function HRESULT (line 6) | HRESULT DbgNanTest::create()

FILE: Whisper/ML/DbgNanTest.h
  function namespace (line 3) | namespace DirectCompute

FILE: Whisper/ML/Device.cpp
  function HRESULT (line 11) | HRESULT Device::create( uint32_t flags, const std::wstring& adapter )
  function HRESULT (line 32) | HRESULT Device::createClone( const Device& source )
  function __m128i (line 63) | __m128i __declspec( noinline ) DirectCompute::bufferMemoryUsage( ID3D11B...
  function __m128i (line 78) | __m128i __declspec( noinline ) DirectCompute::resourceMemoryUsage( ID3D1...
  function ID3D11Device (line 94) | ID3D11Device* DirectCompute::device()
  function ID3D11DeviceContext (line 102) | ID3D11DeviceContext* DirectCompute::context()
  function sGpuInfo (line 110) | const sGpuInfo& DirectCompute::gpuInfo()
  function LookupTables (line 118) | const LookupTables& DirectCompute::lookupTables()
  function DbgNanTest (line 154) | const DbgNanTest& DirectCompute::getNanTestBuffers()

FILE: Whisper/ML/Device.h
  function namespace (line 7) | namespace DirectCompute

FILE: Whisper/ML/LookupTables.cpp
  function HRESULT (line 10) | HRESULT uploadLookupTable( const std::array<uint16_t, 0x10000>& rsi, CCo...
  function HRESULT (line 31) | HRESULT LookupTables::create()
  function HRESULT (line 49) | HRESULT LookupTables::createClone( const LookupTables& source )
  function __m128i (line 62) | __m128i LookupTables::getMemoryUsage() const

FILE: Whisper/ML/LookupTables.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/ML/LookupTablesData.cpp
  function fp32 (line 10) | inline float fp32( uint16_t f16 )
  function fp16 (line 17) | inline uint16_t fp16( float fp32 )
  function computeGelu (line 28) | inline float computeGelu( float x )

FILE: Whisper/ML/LookupTablesData.h
  function namespace (line 5) | namespace DirectCompute

FILE: Whisper/ML/MlContext.cpp
  type sFlashAttentionConstants (line 163) | struct sFlashAttentionConstants
  type sFlashAttnDispatchInfo (line 172) | struct sFlashAttnDispatchInfo
  function sFlashAttnDispatchInfo (line 178) | sFlashAttnDispatchInfo makeFlashAttentionConstants( CComPtr<ID3D11Buffer...
  function roundUp32 (line 250) | inline uint32_t roundUp32( uint32_t x )
  function Tensor (line 373) | Tensor MlContext::add( const Tensor& a, const Tensor& b )
  function bitcast (line 413) | uint32_t bitcast( float val )
  function canAddRows (line 578) | inline bool canAddRows( const Tensor& tokenEmbedding, const Tensor& posi...
  function Tensor (line 588) | Tensor MlContext::addRows( const Tensor& tokenEmbedding, const Tensor& p...
  function Tensor (line 620) | Tensor MlContext::reshapePanels( const Tensor& a )
  function Tensor (line 677) | Tensor MlContext::mulMatTiledEx( const Tensor& a, const Tensor& b )
  function Tensor (line 703) | Tensor MlContext::mulMatByRowTiledEx( const Tensor& a, const Tensor& b )
  function __m128i (line 741) | __m128i MlContext::getMemoryUse() const

FILE: Whisper/ML/MlContext.dbg.cpp
  function dbgPrintSizeDiff (line 9) | static void dbgPrintSizeDiff( const char* what, __m128i ref, __m128i gpu )

FILE: Whisper/ML/MlContext.h
  function namespace (line 9) | namespace DirectCompute

FILE: Whisper/ML/Reshaper.cpp
  type Constants (line 11) | struct Constants
  function HRESULT (line 23) | HRESULT DirectCompute::Reshaper::createConstants()
  function HRESULT (line 30) | HRESULT DirectCompute::Reshaper::makePanels( Tensor& tensor, eDataType d...

FILE: Whisper/ML/Reshaper.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/ML/TempBuffers.cpp
  function HRESULT (line 10) | HRESULT TempBuffers::Buffer::resize( DXGI_FORMAT format, size_t elements...
  function TensorGpuViews (line 28) | const TensorGpuViews& TempBuffers::fp16( size_t countElements, bool zero...
  function TensorGpuViews (line 36) | const TensorGpuViews& TempBuffers::fp16_2( size_t countElements, bool ze...
  function TensorGpuViews (line 44) | const TensorGpuViews& TempBuffers::fp32( size_t countElements, bool zero...
  function __m128i (line 52) | __m128i TempBuffers::getMemoryUse() const

FILE: Whisper/ML/TempBuffers.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/ML/Tensor.cpp
  function Tensor (line 30) | Tensor& Tensor::operator=( const Tensor& that )
  function Tensor (line 42) | Tensor& Tensor::operator=( Tensor&& that ) noexcept
  function HRESULT (line 68) | HRESULT Tensor::create( const ggml_tensor& ggml, eBufferUse usage, bool ...
  function HRESULT (line 124) | HRESULT Tensor::createImmutable( eDataType type, const std::array<int, 4...
  function HRESULT (line 157) | HRESULT Tensor::create( eDataType type, std::initializer_list<uint32_t> ...
  function HRESULT (line 214) | HRESULT Tensor::create( eDataType type, std::initializer_list<uint32_t> ...
  function HRESULT (line 220) | HRESULT Tensor::create( eDataType type, const std::array<uint32_t, 4>& s...
  function eDataType (line 226) | eDataType Tensor::getType() const
  function Tensor (line 329) | Tensor Tensor::reshape3d( uint32_t ne0, uint32_t ne1, uint32_t ne2 ) const

FILE: Whisper/ML/Tensor.h
  function namespace (line 6) | namespace DirectCompute

FILE: Whisper/ML/TensorEx.cpp
  function HRESULT (line 8) | HRESULT TensorEx::create( const ggml_tensor& ggml, eBufferUse usage, boo...
  function HRESULT (line 46) | HRESULT TensorEx::create( eDataType type, eBufferUse usage, const std::a...
  function HRESULT (line 57) | HRESULT TensorEx::getViewSize( uint32_t& cbElement, uint32_t& countEleme...
  function HRESULT (line 75) | HRESULT TensorEx::download( void* rdi, size_t cb ) const
  function HRESULT (line 90) | HRESULT TensorEx::download( void* rdi ) const

FILE: Whisper/ML/TensorEx.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/ML/TensorGpuViews.cpp
  function HRESULT (line 5) | HRESULT TensorGpuViews::create( ID3D11Buffer* gpuBuffer, DXGI_FORMAT for...

FILE: Whisper/ML/TensorGpuViews.h
  function namespace (line 5) | namespace DirectCompute

FILE: Whisper/ML/TensorShape.cpp
  function HRESULT (line 23) | HRESULT TensorShape::create( const ggml_tensor& ggml )

FILE: Whisper/ML/TensorShape.h
  type ggml_tensor (line 6) | struct ggml_tensor
  function namespace (line 9) | namespace DirectCompute
  function isSameShape (line 97) | inline bool isSameShape( const TensorShape& t0, const TensorShape& t1 )
  function isSameShapeAndLayout (line 105) | inline bool isSameShapeAndLayout( const TensorShape& t0, const TensorSha...

FILE: Whisper/ML/TensorsArena.cpp
  function roundUpPower2 (line 6) | static inline uint32_t roundUpPower2( uint32_t x )
  function Tensor (line 51) | Tensor PooledTensor::tensor( eDataType type, const std::array<uint32_t, ...
  function Tensor (line 82) | Tensor TensorsArena::ArenaImpl::tensor( const std::array<uint32_t, 4>& ne )
  function Tensor (line 104) | Tensor TensorsArena::tensor( eDataType type, const std::array<uint32_t, ...
  function __m128i (line 122) | __m128i TensorsArena::ArenaImpl::getMemoryUse() const
  function __m128i (line 134) | __m128i TensorsArena::getMemoryUse() const
  function HRESULT (line 142) | HRESULT PooledTensor::zeroMemory()
  function HRESULT (line 157) | HRESULT TensorsArena::ArenaImpl::zeroMemory()
  function HRESULT (line 164) | HRESULT TensorsArena::zeroMemory()

FILE: Whisper/ML/TensorsArena.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/ML/mlUtils.cpp
  type DirectCompute (line 9) | namespace DirectCompute
    function zeroMemory (line 11) | void zeroMemory( ID3D11UnorderedAccessView* uav, uint32_t length, bool...
    function fillTensorWithNaN (line 29) | void fillTensorWithNaN( ID3D11UnorderedAccessView* uav )
    function scanTensorForNaN (line 38) | bool scanTensorForNaN( ID3D11ShaderResourceView* tensor, uint32_t leng...
    function HRESULT (line 76) | HRESULT cloneResourceView( ID3D11ShaderResourceView* rsi, ID3D11Shader...

FILE: Whisper/ML/mlUtils.h
  function namespace (line 3) | namespace DirectCompute

FILE: Whisper/ML/reshapedMultiply.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/ML/tensorOpsTests.h
  function namespace (line 4) | namespace DirectCompute

FILE: Whisper/ML/testUtils.cpp
  function __m256 (line 11) | __forceinline __m256 load( const float* rsi )
  function __m256 (line 16) | __forceinline __m256 load( const uint16_t* rsi )
  function loadPartial (line 22) | __forceinline void loadPartial( const uint16_t* x, const uint16_t* y, si...
  function __m128 (line 74) | inline __m128 loadFloat2( const float* rsi )
  function __m128 (line 78) | inline __m128 loadFloat3( const float* rsi )
  function loadPartial (line 84) | __forceinline void loadPartial( const float* x, const float* y, size_t c...
  function horizontalMaximum (line 134) | __forceinline float horizontalMaximum( __m256 v )
  function horizontalSum (line 143) | __forceinline double horizontalSum( __m256 v )
  function __m256 (line 154) | __m256 maskInfNan( __m256 diff, __m256 a, __m256 b )
  class DiffAcc (line 163) | class DiffAcc
    method add (line 170) | __forceinline void add( __m256 a, __m256 b )
    method sTensorDiff (line 180) | __forceinline sTensorDiff reduce( size_t count )
  function sTensorDiff (line 191) | static sTensorDiff __declspec( noinline ) diffVectors( const E* a, const...
  function sTensorDiff (line 212) | sTensorDiff DirectCompute::computeDiff( const float* a, const float* b, ...
  function sTensorDiff (line 217) | sTensorDiff DirectCompute::computeDiff( const uint16_t* a, const uint16_...
  function HRESULT (line 231) | HRESULT DirectCompute::dbgWriteBinaryFile( LPCTSTR fileName, const void*...
  function sTensorDiff (line 246) | sTensorDiff DirectCompute::computeDiff( const Tensor& a, const Tensor& b )
  function printUniqueTensorSize (line 328) | void printUniqueTensorSize( const char* name, const int* lhs, const int*...

FILE: Whisper/ML/testUtils.h
  function namespace (line 8) | namespace std

FILE: Whisper/Utils/CpuProfiler.cpp
  function qpcNow (line 8) | inline int64_t qpcNow()
  class CpuTimescale (line 15) | class CpuTimescale
    method CpuTimescale (line 25) | CpuTimescale() :
    method computeTicks (line 30) | inline uint64_t computeTicks( uint64_t tsc )

FILE: Whisper/Utils/CpuProfiler.h
  function namespace (line 3) | namespace Whisper

FILE: Whisper/Utils/DelayExecution.cpp
  function HRESULT (line 10) | inline HRESULT sleepImpl( HANDLE timer )

FILE: Whisper/Utils/DelayExecution.h
  function class (line 5) | class DelayExecution

FILE: Whisper/Utils/GpuProfiler.cpp
  function HRESULT (line 31) | HRESULT GpuProfiler::Queue::create()
  function getTimestamp (line 48) | static uint64_t getTimestamp( ID3D11Query* query, const DelayExecution& ...
  function D3D11_QUERY_DATA_TIMESTAMP_DISJOINT (line 63) | static D3D11_QUERY_DATA_TIMESTAMP_DISJOINT waitForDisjointData( ID3D11Qu...
  function makeTagKey (line 126) | static inline uint32_t makeTagKey( uint16_t cs, uint16_t tag )
  function HRESULT (line 205) | HRESULT GpuProfiler::create( size_t maxDepth )
  function HRESULT (line 325) | HRESULT GpuProfilerSimple::create()
  function HRESULT (line 341) | HRESULT GpuProfilerSimple::time( uint64_t& rdi ) const

FILE: Whisper/Utils/GpuProfiler.h
  type struct (line 8) | enum struct
  type struct (line 19) | enum struct
  function class (line 21) | class GpuProfiler

FILE: Whisper/Utils/GpuProfilerSimple.h
  function namespace (line 5) | namespace DirectCompute

FILE: Whisper/Utils/LZ4/lz4.c
  function LZ4_isAligned (line 290) | static int LZ4_isAligned(const void* ptr, size_t alignment)
  type BYTE (line 302) | typedef  uint8_t BYTE;
  type U16 (line 303) | typedef uint16_t U16;
  type U32 (line 304) | typedef uint32_t U32;
  type S32 (line 305) | typedef  int32_t S32;
  type U64 (line 306) | typedef uint64_t U64;
  type uptrval (line 307) 
Condensed preview — 485 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (3,726K chars).
[
  {
    "path": ".gitignore",
    "chars": 459,
    "preview": ".vs/\nComLightLib/x64/\nWhisper/x64/\nx64/\nTools/CompressShaders/bin/\nTools/CompressShaders/obj/\nWhisper/D3D/shaderData-Deb"
  },
  {
    "path": "ComLightLib/ComLightLib.vcxproj",
    "chars": 5490,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project DefaultTargets=\"Build\" ToolsVersion=\"15.0\" xmlns=\"http://schemas.microso"
  },
  {
    "path": "ComLightLib/ComLightLib.vcxproj.filters",
    "chars": 1097,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"4.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuil"
  },
  {
    "path": "ComLightLib/Exception.hpp",
    "chars": 880,
    "preview": "#pragma once\n\nnamespace ComLight\n{\n\tclass Exception : public std::runtime_error\n\t{\n\t\t// I don't like C++ exceptions too "
  },
  {
    "path": "ComLightLib/Readme.txt",
    "chars": 123,
    "preview": "Copy-pasted from there:\nhttps://github.com/Const-me/ComLightInterop/tree/master/ComLightLib\nWith only a few minor chang"
  },
  {
    "path": "ComLightLib/client/CComPtr.hpp",
    "chars": 1900,
    "preview": "#pragma once\n\nnamespace ComLight\n{\n\t// COM smart pointer, very comparable to CComPtr from ATL\n\ttemplate <class I>\n\tclass"
  },
  {
    "path": "ComLightLib/comLightClient.h",
    "chars": 551,
    "preview": "#pragma once\n#include \"comLightCommon.h\"\n#include \"client/CComPtr.hpp\"\n#include \"utils/typeTraits.hpp\"\n\nnamespace ComLig"
  },
  {
    "path": "ComLightLib/comLightCommon.h",
    "chars": 159,
    "preview": "#pragma once\n#include \"hresult.h\"\n\n#ifdef _MSC_VER\n#include <guiddef.h>\n#else\n#include \"pal/guiddef.h\"\nusing LPCTSTR = c"
  },
  {
    "path": "ComLightLib/comLightServer.h",
    "chars": 480,
    "preview": "#pragma once\n#include \"comLightCommon.h\"\n#include \"client/CComPtr.hpp\"\n\n#include \"server/ObjectRoot.hpp\"\n#include \"serve"
  },
  {
    "path": "ComLightLib/hresult.h",
    "chars": 725,
    "preview": "#pragma once\n#include <stdint.h>\n#ifdef _MSC_VER\n#include <winerror.h>\n#include <OleCtl.h>\n#else\n#include \"pal/hresult.h"
  },
  {
    "path": "ComLightLib/pal/guiddef.h",
    "chars": 393,
    "preview": "#pragma once\n#include <stdint.h>\n#include <array>\n#ifndef GUID_DEFINED\n#define GUID_DEFINED\n#endif\n\nstruct GUID\n{\n\tuint3"
  },
  {
    "path": "ComLightLib/pal/hresult.h",
    "chars": 5162,
    "preview": "#pragma once\n#include <stdint.h>\nusing HRESULT = int32_t;\n#define _HRESULT_TYPEDEF_(_sc) ((HRESULT)_sc)\n#define SEVERITY"
  },
  {
    "path": "ComLightLib/server/Object.hpp",
    "chars": 3468,
    "preview": "#pragma once\n#include <type_traits>\n#include \"../comLightClient.h\"\n#include \"../utils/typeTraits.hpp\"\n#include \"../Excep"
  },
  {
    "path": "ComLightLib/server/ObjectRoot.hpp",
    "chars": 1196,
    "preview": "#pragma once\n#include \"RefCounter.hpp\"\n#include \"../comLightCommon.h\"\n#include \"../utils/typeTraits.hpp\"\n\nnamespace ComL"
  },
  {
    "path": "ComLightLib/server/RefCounter.hpp",
    "chars": 922,
    "preview": "#pragma once\n#include <atomic>\n#include <assert.h>\n#include <limits.h>\n\nnamespace ComLight\n{\n\t// Very base class of obje"
  },
  {
    "path": "ComLightLib/server/freeThreadedMarshaller.cpp",
    "chars": 576,
    "preview": "#include \"freeThreadedMarshaller.h\"\n#ifdef _MSC_VER\n#include <combaseapi.h>\n\nHRESULT ComLight::details::createFreeThread"
  },
  {
    "path": "ComLightLib/server/freeThreadedMarshaller.h",
    "chars": 1504,
    "preview": "#pragma once\n#ifdef _MSC_VER\n#include \"../comLightCommon.h\"\n\nnamespace ComLight\n{\n\tnamespace details\n\t{\n\t\tHRESULT create"
  },
  {
    "path": "ComLightLib/server/interfaceMap.h",
    "chars": 1135,
    "preview": "#pragma once\n#include \"../utils/typeTraits.hpp\"\n\n// Unlike ATL, the interface map is optional for ComLight.\n// If you wo"
  },
  {
    "path": "ComLightLib/streams.h",
    "chars": 1725,
    "preview": "#pragma once\n#include <vector>\n#include \"comLightCommon.h\"\n\n// COM interfaces to marshal streams across the interop.\nnam"
  },
  {
    "path": "ComLightLib/unknwn.h",
    "chars": 1013,
    "preview": "#pragma once\n#include <type_traits>\n\n// Calling conventions\n#ifdef _MSC_VER\n#define COMLIGHTCALL __stdcall\n#define DECLS"
  },
  {
    "path": "ComLightLib/utils/guid_parse.hpp",
    "chars": 3190,
    "preview": "// https://github.com/tobias-loew/constexpr-GUID-cpp-11\n\n//-------------------------------------------------------------"
  },
  {
    "path": "ComLightLib/utils/typeTraits.hpp",
    "chars": 2481,
    "preview": "#pragma once\n#include <type_traits>\n\nnamespace ComLight\n{\n\tnamespace details\n\t{\n\t\ttemplate<class TResult, class TValue>\n"
  },
  {
    "path": "ComputeShaders/ComputeShaders.cpp",
    "chars": 27,
    "preview": "void fnComputeShaders()\n{\n}"
  },
  {
    "path": "ComputeShaders/ComputeShaders.vcxproj",
    "chars": 9755,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msb"
  },
  {
    "path": "ComputeShaders/ComputeShaders.vcxproj.filters",
    "chars": 2831,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"4.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuil"
  },
  {
    "path": "ComputeShaders/Readme.txt",
    "chars": 755,
    "preview": "This project compiles all the compute shaders which implement the model.\n\nMany shaders come in 2 versions, something.hl"
  },
  {
    "path": "ComputeShaders/add.hlsl",
    "chars": 99,
    "preview": "inline float compute( float a, float b )\n{\n\treturn a + b;\n}\n\n#include \"componentwiseBinaryOp.hlsli\""
  },
  {
    "path": "ComputeShaders/addInPlace.hlsl",
    "chars": 929,
    "preview": "#ifndef THREADS\n#define THREADS 512\n#endif\n\nBuffer<float> arg0: register( t0 );\nRWBuffer<float> result: register( u0 );\n"
  },
  {
    "path": "ComputeShaders/addRepeat.hlsl",
    "chars": 1979,
    "preview": "// Compute tensor = tensor + repeat( pattern, tensor ) in 1 shot, without VRAM allocations\n// Dispatch [ nb[ 1 ], nb[ 2 "
  },
  {
    "path": "ComputeShaders/addRepeat64.hlsl",
    "chars": 44,
    "preview": "#define THREADS 64\n#include \"addRepeat.hlsl\""
  },
  {
    "path": "ComputeShaders/addRepeatEx.hlsl",
    "chars": 2391,
    "preview": "// An equivalent of \"addRepeat.hlsl\" followed by \"addInPlace.hlsl\".\n// Merging into a single shader saves some global me"
  },
  {
    "path": "ComputeShaders/addRepeatGelu.hlsl",
    "chars": 2525,
    "preview": "// Compute tensor = GELU( tensor + repeat( pattern, tensor ) ) in 1 shot, without VRAM allocations\n// Dispatch [ nb[ 1 ]"
  },
  {
    "path": "ComputeShaders/addRepeatGelu64.hlsl",
    "chars": 48,
    "preview": "#define THREADS 64\n#include \"addRepeatGelu.hlsl\""
  },
  {
    "path": "ComputeShaders/addRepeatScale.hlsl",
    "chars": 2069,
    "preview": "// Compute tensor = ( tensor + repeat( pattern, tensor ) ) * scale in 1 shot, without VRAM allocations\n// Dispatch [ nb["
  },
  {
    "path": "ComputeShaders/addRows.hlsl",
    "chars": 1240,
    "preview": "#ifndef THREADS\n#define THREADS 256\n#endif\n\n// dec.tokenEmbedding tensor\nBuffer<float> tokenEmbedding: register( t0 );\n/"
  },
  {
    "path": "ComputeShaders/componentwiseBinaryOp.hlsli",
    "chars": 1093,
    "preview": "Buffer<float> arg0: register( t0 );\nBuffer<float> arg1: register( t1 );\nRWBuffer<float> result: register( u0 );\n\ncbuffer"
  },
  {
    "path": "ComputeShaders/convolutionMain.hlsl",
    "chars": 1838,
    "preview": "// ggml_compute_forward_conv_1d_1s_f16_f32, GGML_TASK_COMPUTE implementation\n// Dispatch [ ne10, ne02, 1 ] thread groups"
  },
  {
    "path": "ComputeShaders/convolutionMain2.hlsl",
    "chars": 1690,
    "preview": "// ggml_compute_forward_conv_1d_2s_f16_f32, GGML_TASK_COMPUTE implementation\n// Dispatch [ ne10 / 2, ne02, 1 ] thread gr"
  },
  {
    "path": "ComputeShaders/convolutionMain2Fixed.hlsl",
    "chars": 3289,
    "preview": "// Optimized version of convolutionMain2.hlsl for kernel size = 3\n// Dispatch [ ( ( ne10 / 2 ) + TILE_Y - 1 ) / TILE_Y, "
  },
  {
    "path": "ComputeShaders/convolutionPrep1.hlsl",
    "chars": 978,
    "preview": "// ggml_compute_forward_conv_1d_1s_f16_f32, prepare kernel data (src0)\n// Dispatch [ ne01, ne02, 1 ] thread groups\nBuffe"
  },
  {
    "path": "ComputeShaders/convolutionPrep2.hlsl",
    "chars": 1020,
    "preview": "// ggml_compute_forward_conv_1d_1s_f16_f32, prepare source data (src1)\n// Dispatch [ ne11, 1, 1 ] thread groups\nBuffer<f"
  },
  {
    "path": "ComputeShaders/copyConvert.hlsl",
    "chars": 1337,
    "preview": "// ggml_compute_forward_dup_f32 when we only need to convert types, but not reshape the tensor\n// Dispatch [ ne01, ne02,"
  },
  {
    "path": "ComputeShaders/copyTranspose.hlsl",
    "chars": 1674,
    "preview": "// ggml_compute_forward_dup_f32 when we actually need to reshape the tensor\n// Dispatch [ ne01, ne02, ne03 ] thread grou"
  },
  {
    "path": "ComputeShaders/dbgFindNaN.hlsl",
    "chars": 2058,
    "preview": "// When reset = TRUE, write zero to the output buffer\n// When reset = FALSE, test input tensor for NaN, when found at le"
  },
  {
    "path": "ComputeShaders/diagMaskInf.hlsl",
    "chars": 806,
    "preview": "// ggml_compute_forward_diag_mask_inf_f32\nRWBuffer<float> result: register( u0 );\n\ncbuffer Constants: register( b0 )\n{\n\t"
  },
  {
    "path": "ComputeShaders/flashAttention.hlsl",
    "chars": 4234,
    "preview": "// Ported from ggml_compute_forward_flash_attn_f16\n// Dispatch with [ neq1*neq2*neq3, 1, 1 ] thread groups\n\n#include \"fl"
  },
  {
    "path": "ComputeShaders/flashAttentionCommon.hlsli",
    "chars": 1877,
    "preview": "// Ported from ggml_compute_forward_flash_attn_f16\n// Dispatch with [ neq1*neq2*neq3, 1, 1 ] thread groups\nBuffer<float>"
  },
  {
    "path": "ComputeShaders/flashAttentionCompat1.hlsl",
    "chars": 3032,
    "preview": "// Dispatch with [ neq1*neq2*neq3, 1, 1 ] thread groups\n#include \"flashAttentionCommon.hlsli\"\n#include \"groupReduce.hlsl"
  },
  {
    "path": "ComputeShaders/flashAttentionCompat2.hlsl",
    "chars": 2836,
    "preview": "// Dispatch with [ ( neq1*neq2*neq3 + 31 ) / 32, 1, 1 ] thread groups\n#include \"flashAttentionCommon.hlsli\"\nBuffer<uint>"
  },
  {
    "path": "ComputeShaders/flashAttentionCompat3.hlsl",
    "chars": 3005,
    "preview": "// Dispatch with [ neq1*neq2*neq3, 1, 1 ] thread groups\n#include \"flashAttentionCommon.hlsli\"\n#include \"groupReduce.hlsl"
  },
  {
    "path": "ComputeShaders/fmaRepeat1.hlsl",
    "chars": 2230,
    "preview": "// Implementation of fmaRepeat() when both source arguments have same size and strides\n// Dispatch [ nb[ 1 ], nb[ 2 ], n"
  },
  {
    "path": "ComputeShaders/fmaRepeat164.hlsl",
    "chars": 45,
    "preview": "#define THREADS 64\n#include \"fmaRepeat1.hlsl\""
  },
  {
    "path": "ComputeShaders/fmaRepeat2.hlsl",
    "chars": 1509,
    "preview": "// Implementation of fmaRepeat() when source arguments have different shape or VRAM layout\n// Dispatch [ nb[ 1 ], nb[ 2 "
  },
  {
    "path": "ComputeShaders/fp64Utils.hlsli",
    "chars": 894,
    "preview": "// TODO: compile another version of these shader, and use it on GPUs with ExtendedDoublesShaderInstructions flag, will b"
  },
  {
    "path": "ComputeShaders/groupReduce.hlsli",
    "chars": 3778,
    "preview": "groupshared float sharedAccumulators[ 32 ];\n\n// Compute horisontal sum of the numbers. The result is only correct on the"
  },
  {
    "path": "ComputeShaders/groupReduce64.hlsli",
    "chars": 1284,
    "preview": "groupshared float sharedAccumulators[ 64 ];\n\n// Compute horisontal sum of the numbers. The result is only correct on the"
  },
  {
    "path": "ComputeShaders/matReshapePanels.hlsl",
    "chars": 3206,
    "preview": "// This shader reshapes a matrix into the shape expected by mulMatTiledEx.hlsl and mulMatByRowTiledEx.hlsl compute shade"
  },
  {
    "path": "ComputeShaders/miscUtils.hlsli",
    "chars": 2218,
    "preview": "// When GPUs are converting FP32 to FP16, they always truncate towards 0, documented there:\n// https://learn.microsoft.c"
  },
  {
    "path": "ComputeShaders/mulMatByRow.hlsl",
    "chars": 1299,
    "preview": "// Matrix * row product, like [ E0, E1, E2, E3 ] * [ E0, 1, E2, E3 ] = [ E1, 1, E2, E3 ]\n// Dispatch [ E1, E2, E3 ] grou"
  },
  {
    "path": "ComputeShaders/mulMatByRow64.hlsl",
    "chars": 2652,
    "preview": "// Matrix * row product, like [ E0, E1, E2, E3 ] * [ E0, 1, E2, E3 ] = [ E1, 1, E2, E3 ]\n// Dispatch [ E1, E2, E3 ] grou"
  },
  {
    "path": "ComputeShaders/mulMatByRowTiled.hlsl",
    "chars": 5429,
    "preview": "// Matrix * row product, like [ E0, E1, E2, E3 ] * [ E0, 1, E2, E3 ] = [ E1, 1, E2, E3 ]\n// Dispatch [ ( E1 + TILE_Y - 1"
  },
  {
    "path": "ComputeShaders/mulMatByRowTiledEx.hlsl",
    "chars": 3794,
    "preview": "// matrix*row vector product, needs first argument reshaped into a sequence of horizontal column major panels\n#ifndef TI"
  },
  {
    "path": "ComputeShaders/mulMatByScalar.hlsl",
    "chars": 1086,
    "preview": "// Matrix * scalar product, like [ 1, E1, E2, E3 ] * [ 1, 1, E2, E3 ] = [ E1, 1, E2, E3 ]\n// Dispatch [ E2, E3, 1 ] thre"
  },
  {
    "path": "ComputeShaders/mulMatDotMain.hlsl",
    "chars": 2697,
    "preview": "// GGML_TASK_COMPUTE step for matrix*matrix product, where nb01 >= nb00;\n// Dispatch with [ ne11, ne01*ne02*ne03 ] threa"
  },
  {
    "path": "ComputeShaders/mulMatDotReshape.hlsl",
    "chars": 933,
    "preview": "// GGML_TASK_INIT step for matrix*matrix product, where nb01 >= nb00;\n// Dispatch with [ ne11, ne12 ] groups\nBuffer<floa"
  },
  {
    "path": "ComputeShaders/mulMatMadMain.hlsl",
    "chars": 4265,
    "preview": "// GGML_TASK_COMPUTE step for matrix*matrix product, where nb01 < nb00\nBuffer<float> arg0: register( t0 );\nBuffer<float>"
  },
  {
    "path": "ComputeShaders/mulMatTiled.hlsl",
    "chars": 9009,
    "preview": "// This compute shader implements matrix*matrix product, using tiling and many other tricks to improve the performance\n/"
  },
  {
    "path": "ComputeShaders/mulMatTiledEx.hlsl",
    "chars": 6823,
    "preview": "// This compute shader implements yet another version of matrix*matrix product\n// For optimal VRAM access pattern, it re"
  },
  {
    "path": "ComputeShaders/norm.hlsl",
    "chars": 2124,
    "preview": "// Ported from ggml_compute_forward_norm_f32\n// Dispatch [ ne01, ne02, ne03 ] thread groups of this shader\nBuffer<float>"
  },
  {
    "path": "ComputeShaders/normCompat.hlsl",
    "chars": 2021,
    "preview": "// Ported from ggml_compute_forward_norm_f32\n// Dispatch [ ( ne01 + 31 ) / 32, ne02, ne03 ] thread groups of this shader"
  },
  {
    "path": "ComputeShaders/normFixed.hlsl",
    "chars": 3323,
    "preview": "// Ported from ggml_compute_forward_norm_f32\n// Dispatch [ ne01, ne02, ne03 ] thread groups of this shader\nBuffer<float>"
  },
  {
    "path": "ComputeShaders/normFixed64.hlsl",
    "chars": 44,
    "preview": "#define THREADS 64\n#include \"normFixed.hlsl\""
  },
  {
    "path": "ComputeShaders/repeatUtils.hlsli",
    "chars": 811,
    "preview": "inline uint rowOffset( uint3 idx, uint4 strides )\n{\n\treturn idx[ 0 ] * strides[ 1 ] + idx[ 1 ] * strides[ 2 ] + idx[ 2 ]"
  },
  {
    "path": "ComputeShaders/scaleInPlace.hlsl",
    "chars": 531,
    "preview": "RWBuffer<float> buffer: register( u0 );\n\ncbuffer Constants: register( b0 )\n{\n\tuint4 src0_elements: packoffset( c0 );\n\tui"
  },
  {
    "path": "ComputeShaders/softMax.hlsl",
    "chars": 2419,
    "preview": "// Dispatch [ nr, 1, 1 ] thread groups of this shader\nRWBuffer<float> result: register( u0 );\n\ncbuffer Constants: regist"
  },
  {
    "path": "ComputeShaders/softMax64.hlsl",
    "chars": 42,
    "preview": "#define THREADS 64\n#include \"softMax.hlsl\""
  },
  {
    "path": "ComputeShaders/softMaxCompat.hlsl",
    "chars": 1185,
    "preview": "// ggml_compute_forward_soft_max_f32\n// Dispatch [ ( nr + 31 ) / 32, 1, 1 ] thread groups of this shader\nRWBuffer<float>"
  },
  {
    "path": "ComputeShaders/softMaxFixed.hlsl",
    "chars": 2102,
    "preview": "// Special softMax shader for matrices with rows of 1500 elements.\n// Uses group shared buffer of that length to save gl"
  },
  {
    "path": "ComputeShaders/softMaxLong.hlsl",
    "chars": 302,
    "preview": "// This version is for the \"dec.probs\" shader tag\n// The input tensor has a size [ 51865, 3 ], a very long tensor with j"
  },
  {
    "path": "ComputeShaders/zeroMemory.hlsl",
    "chars": 926,
    "preview": "RWBuffer<float> result: register( u0 );\n\ncbuffer Constants: register( b0 )\n{\n\tuint elements: packoffset( c0.x );\n\tbool w"
  },
  {
    "path": "Examples/MicrophoneCS/CaptureThread.cs",
    "chars": 1391,
    "preview": "using System.Runtime.ExceptionServices;\nusing Whisper;\n\nnamespace MicrophoneCS\n{\n\tsealed class CaptureThread: CaptureCa"
  },
  {
    "path": "Examples/MicrophoneCS/CommandLineArgs.cs",
    "chars": 6874,
    "preview": "using System.Globalization;\nusing System.Reflection;\nusing Whisper;\n\nnamespace MicrophoneCS\n{\n\tsealed record class Comm"
  },
  {
    "path": "Examples/MicrophoneCS/MicrophoneCS.cs",
    "chars": 1668,
    "preview": "using Whisper;\n\nnamespace MicrophoneCS\n{\n\tstatic class Program\n\t{\n\t\tstatic int Main( string[] args )\n\t\t{\n\t\t\ttry\n\t\t\t{\n\t\t"
  },
  {
    "path": "Examples/MicrophoneCS/MicrophoneCS.csproj",
    "chars": 791,
    "preview": "<Project Sdk=\"Microsoft.NET.Sdk\">\n\n\t<PropertyGroup>\n\t\t<OutputType>Exe</OutputType>\n\t\t<TargetFramework>net6.0-windows</T"
  },
  {
    "path": "Examples/MicrophoneCS/Readme.txt",
    "chars": 109,
    "preview": "This example builds .NET 6 console application which shows how to use audio capture API of the .NET wrapper."
  },
  {
    "path": "Examples/MicrophoneCS/TranscribeCallbacks.cs",
    "chars": 2992,
    "preview": "using System.Globalization;\nusing Whisper;\n\nnamespace MicrophoneCS\n{\n\t/// <summary>Implementation of Callbacks abstract"
  },
  {
    "path": "Examples/OldMain/OldMain.vcxproj",
    "chars": 5035,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msb"
  },
  {
    "path": "Examples/OldMain/OldMain.vcxproj.filters",
    "chars": 721,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"4.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuil"
  },
  {
    "path": "Examples/OldMain/Readme.txt",
    "chars": 65,
    "preview": "This project builds the original whisper.cpp command-line sample"
  },
  {
    "path": "Examples/OldMain/Utils/Logger.cpp",
    "chars": 822,
    "preview": "#include <stdint.h>\n#include <vector>\n#include <cstdarg>\n#include \"Logger.h\"\n\nnamespace\n{\n\tvoid logMessage( const char* "
  },
  {
    "path": "Examples/OldMain/Utils/Logger.h",
    "chars": 742,
    "preview": "#pragma once\n\n#ifdef  __cplusplus\nextern \"C\" {\n#endif\n\nstruct ggml_tensor;\n\nvoid logError( const char8_t* pszFormat, ..."
  },
  {
    "path": "Examples/OldMain/dr_wav.h",
    "chars": 241358,
    "preview": "/*\nWAV audio loader and writer. Choice of public domain or MIT-0. See license statements at the end of this file.\ndr_wav"
  },
  {
    "path": "Examples/OldMain/main.cpp",
    "chars": 28768,
    "preview": "#include \"whisper.h\"\n\n// third-party utilities\n// use your favorite implementations\n#define DR_WAV_IMPLEMENTATION\n#inclu"
  },
  {
    "path": "Examples/TranscribeCS/AnsiCodes.cs",
    "chars": 1757,
    "preview": "using System.Runtime.InteropServices;\n\n/// <summary>Utility class to setup console coloring with ANSI codes.</summary>\n"
  },
  {
    "path": "Examples/TranscribeCS/CommandLineArgs.cs",
    "chars": 7131,
    "preview": "using System.Globalization;\nusing System.Reflection;\nusing Whisper;\n\nnamespace TranscribeCS\n{\n\tsealed record class Comm"
  },
  {
    "path": "Examples/TranscribeCS/Readme.txt",
    "chars": 125,
    "preview": "This example builds .NET 6 console application which shows how to transcribe or translate audio files with the .NET wra"
  },
  {
    "path": "Examples/TranscribeCS/Transcribe.cs",
    "chars": 2974,
    "preview": "using System.Globalization;\nusing Whisper;\n\nnamespace TranscribeCS\n{\n\t/// <summary>Implementation of Callbacks abstract"
  },
  {
    "path": "Examples/TranscribeCS/TranscribeCS.cs",
    "chars": 4153,
    "preview": "namespace TranscribeCS;\nusing Whisper;\n\nenum eFileOpenMode: byte\n{\n\t/// <summary>Decode chunks of audio directly from t"
  },
  {
    "path": "Examples/TranscribeCS/TranscribeCS.csproj",
    "chars": 684,
    "preview": "<Project Sdk=\"Microsoft.NET.Sdk\">\n\t<PropertyGroup>\n\t\t<OutputType>Exe</OutputType>\n\t\t<TargetFramework>net6.0-windows</Tar"
  },
  {
    "path": "Examples/WhisperDesktop/AppState.cpp",
    "chars": 4873,
    "preview": "#include \"stdafx.h\"\n#include \"AppState.h\"\n#include \"Utils/miscUtils.h\"\n#include <commctrl.h>\n#pragma comment(lib, \"Comct"
  },
  {
    "path": "Examples/WhisperDesktop/AppState.h",
    "chars": 1207,
    "preview": "#pragma once\n#include \"Utils/DebugConsole.h\"\n\nclass AppState\n{\n\tbool coInit = false;\n\tCRegKey registryKey;\n\tCIcon appIco"
  },
  {
    "path": "Examples/WhisperDesktop/CaptureDlg.cpp",
    "chars": 13692,
    "preview": "#include \"stdafx.h\"\n#include \"CaptureDlg.h\"\n\nHRESULT CaptureDlg::show()\n{\n\tauto res = DoModal( nullptr );\n\tif( res == -1"
  },
  {
    "path": "Examples/WhisperDesktop/CaptureDlg.h",
    "chars": 4137,
    "preview": "#pragma once\n#include \"AppState.h\"\n#include \"Utils/WTL/atlddx.h\"\n#include \"Utils/miscUtils.h\"\n#include \"Utils/LanguageDr"
  },
  {
    "path": "Examples/WhisperDesktop/CircleIndicator.cpp",
    "chars": 2822,
    "preview": "#include \"stdafx.h\"\n#include \"CircleIndicator.h\"\n#include <atltypes.h>\n#include \"AppState.h\"\n\nnamespace\n{\n\tstatic const"
  },
  {
    "path": "Examples/WhisperDesktop/CircleIndicator.h",
    "chars": 761,
    "preview": "#pragma once\n#include \"Utils/miscUtils.h\"\n#include \"Utils/WTL/atlcrack.h\"\n\n// This control renders a black circle, and i"
  },
  {
    "path": "Examples/WhisperDesktop/LoadModelDlg.cpp",
    "chars": 5972,
    "preview": "#include \"stdafx.h\"\n#include \"LoadModelDlg.h\"\n#include \"Utils/miscUtils.h\"\n#include \"Utils/logger.h\"\n#include \"ModelAdva"
  },
  {
    "path": "Examples/WhisperDesktop/LoadModelDlg.h",
    "chars": 2085,
    "preview": "#pragma once\n#include \"AppState.h\"\n#include \"Utils/WTL/atlddx.h\"\n#include \"Utils/miscUtils.h\"\n\nclass LoadModelDlg:\n\tpubl"
  },
  {
    "path": "Examples/WhisperDesktop/ModelAdvancedDlg.cpp",
    "chars": 2448,
    "preview": "#include \"stdafx.h\"\n#include \"ModelAdvancedDlg.h\"\nusing Whisper::eGpuModelFlags;\n\nstatic void __stdcall addGpu( const w"
  },
  {
    "path": "Examples/WhisperDesktop/ModelAdvancedDlg.h",
    "chars": 702,
    "preview": "#pragma once\n#include \"AppState.h\"\n#include \"Utils/WTL/atlddx.h\"\n#include \"Utils/miscUtils.h\"\n\nclass ModelAdvancedDlg :\n"
  },
  {
    "path": "Examples/WhisperDesktop/Readme.txt",
    "chars": 122,
    "preview": "This example shows how to consume the DLL from a C++ GUI application.\n\nThe GUI is implemented with ATL and WTL librarie"
  },
  {
    "path": "Examples/WhisperDesktop/Resource.h",
    "chars": 2764,
    "preview": "//{{NO_DEPENDENCIES}}\n// Microsoft Visual C++ generated include file.\n// Used by WhisperDesktop.rc\n//\n#define IDC_MYICON"
  },
  {
    "path": "Examples/WhisperDesktop/TranscribeDlg.cpp",
    "chars": 17200,
    "preview": "#include \"stdafx.h\"\n#include \"TranscribeDlg.h\"\n#include \"Utils/logger.h\"\n\nHRESULT TranscribeDlg::show()\n{\n\tauto res = Do"
  },
  {
    "path": "Examples/WhisperDesktop/TranscribeDlg.h",
    "chars": 4528,
    "preview": "#pragma once\n#include \"AppState.h\"\n#include \"Utils/WTL/atlddx.h\"\n#include \"Utils/WTL/atlcrack.h\"\n#include \"Utils/miscUti"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/DebugConsole.cpp",
    "chars": 6883,
    "preview": "// https://github.com/Const-me/vis_avs_dx/blob/master/avs_dx/DxVisuals/Interop/ConsoleLogger.cpp\n#include \"stdafx.h\"\n#in"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/DebugConsole.h",
    "chars": 1396,
    "preview": "#pragma once\n#include <whisperWindows.h>\n#include <deque>\n#include <unordered_set>\n\nclass AppState;\nclass DebugConsole\n{"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/LanguageDropdown.cpp",
    "chars": 1886,
    "preview": "#include \"stdafx.h\"\n#include \"LanguageDropdown.h\"\n#include \"miscUtils.h\"\n\nnamespace\n{\n\tinline wchar_t toUpper( wchar_t c"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/LanguageDropdown.h",
    "chars": 732,
    "preview": "#pragma once\n#include \"../AppState.h\"\n\n// Dropdown list which implements language selector control\nclass LanguageDropdow"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/PendingState.cpp",
    "chars": 853,
    "preview": "#include \"stdafx.h\"\n#include \"PendingState.h\"\n\nvoid PendingState::initialize( std::initializer_list<HWND> editors, std::"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/PendingState.h",
    "chars": 361,
    "preview": "#pragma once\n\n// Utility class to switch visual state of dialog controls between idle and pending\nclass PendingState\n{\n\t"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/TranslateCheckbox.cpp",
    "chars": 628,
    "preview": "#include \"stdafx.h\"\n#include \"TranslateCheckbox.h\"\n\nstatic const LPCTSTR regValTranslate = L\"translate\";\n\nvoid Translate"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/TranslateCheckbox.h",
    "chars": 263,
    "preview": "#pragma once\n#include \"../AppState.h\"\n\nclass TranslateCheckbox\n{\n\tHWND m_hWnd = nullptr;\npublic:\n\toperator HWND() const\n"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/WTL/MS-PL.txt",
    "chars": 2619,
    "preview": "Microsoft Public License (MS-PL)\n\nThis license governs use of the accompanying software. If you use the software, you\nac"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/WTL/ReadMe.html",
    "chars": 95938,
    "preview": "<html>\n\n<head>\n\t<meta http-equiv=\"Content-Language\" content=\"en-us\">\n\t<meta http-equiv=\"Content-Type\" content=\"text/html"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/WTL/atlapp.h",
    "chars": 31056,
    "preview": "// Windows Template Library - WTL version 10.0\n// Copyright (C) Microsoft Corporation, WTL Team. All rights reserved.\n//"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/WTL/atlcrack.h",
    "chars": 64515,
    "preview": "// Windows Template Library - WTL version 10.0\n// Copyright (C) Microsoft Corporation, WTL Team. All rights reserved.\n//"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/WTL/atlctrls.h",
    "chars": 265023,
    "preview": "// Windows Template Library - WTL version 10.0\n// Copyright (C) Microsoft Corporation, WTL Team. All rights reserved.\n//"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/WTL/atlddx.h",
    "chars": 15927,
    "preview": "// Windows Template Library - WTL version 10.0\n// Copyright (C) Microsoft Corporation, WTL Team. All rights reserved.\n//"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/WTL/atlgdi.h",
    "chars": 85141,
    "preview": "// Windows Template Library - WTL version 10.0\n// Copyright (C) Microsoft Corporation, WTL Team. All rights reserved.\n//"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/WTL/atlres.h",
    "chars": 9477,
    "preview": "// Windows Template Library - WTL version 10.0\n// Copyright (C) Microsoft Corporation, WTL Team. All rights reserved.\n//"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/WTL/atluser.h",
    "chars": 32564,
    "preview": "// Windows Template Library - WTL version 10.0\n// Copyright (C) Microsoft Corporation, WTL Team. All rights reserved.\n//"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/WTL/atlwinx.h",
    "chars": 15953,
    "preview": "// Windows Template Library - WTL version 10.0\n// Copyright (C) Microsoft Corporation, WTL Team. All rights reserved.\n//"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/logger.cpp",
    "chars": 1955,
    "preview": "#include \"stdafx.h\"\n#include \"logger.h\"\n#include \"miscUtils.h\"\n\nnamespace\n{\n\tusing namespace Whisper;\n\n\t// Terminal colo"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/logger.h",
    "chars": 1054,
    "preview": "#pragma once\n#include <whisperWindows.h>\n#include <cstdarg>\n\nvoid logMessage( Whisper::eLogLevel lvl, const char8_t* psz"
  },
  {
    "path": "Examples/WhisperDesktop/Utils/miscUtils.cpp",
    "chars": 5886,
    "preview": "#include \"stdafx.h\"\n#include \"miscUtils.h\"\n\nnamespace\n{\n\twchar_t* formatMessage( HRESULT hr )\n\t{\n\t\twchar_t* err;\n\t\tif( "
  },
  {
    "path": "Examples/WhisperDesktop/Utils/miscUtils.h",
    "chars": 2048,
    "preview": "#pragma once\n#include <iContext.h>\n#include \"logger.h\"\n\nCString formatErrorMessage( HRESULT hr );\n\nvoid reportFatalError"
  },
  {
    "path": "Examples/WhisperDesktop/WhisperDesktop.cpp",
    "chars": 1352,
    "preview": "#include \"stdafx.h\"\n#include \"AppState.h\"\n#include \"Utils/miscUtils.h\"\n#include \"LoadModelDlg.h\"\n#include \"TranscribeDlg"
  },
  {
    "path": "Examples/WhisperDesktop/WhisperDesktop.manifest",
    "chars": 923,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n<assembly xmlns=\"urn:schemas-microsoft-com:asm.v1\" manifestVers"
  },
  {
    "path": "Examples/WhisperDesktop/WhisperDesktop.vcxproj",
    "chars": 7302,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msb"
  },
  {
    "path": "Examples/WhisperDesktop/WhisperDesktop.vcxproj.filters",
    "chars": 5084,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"4.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuil"
  },
  {
    "path": "Examples/WhisperDesktop/framework.h",
    "chars": 474,
    "preview": "#pragma once\n#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers\n#define NOMINMAX\n// Windows H"
  },
  {
    "path": "Examples/WhisperDesktop/stdafx.cpp",
    "chars": 19,
    "preview": "#include \"stdafx.h\""
  },
  {
    "path": "Examples/WhisperDesktop/stdafx.h",
    "chars": 149,
    "preview": "#pragma once\n#include \"framework.h\"\n\n#include <whisperWindows.h>\n\n#include \"resource.h\"\n#include \"Utils/WTL/atlapp.h\"\n#i"
  },
  {
    "path": "Examples/WhisperDesktop/targetver.h",
    "chars": 204,
    "preview": "#pragma once\n// Setup Windows SDK to only enable features available since Windows 8.0\n#include <WinSDKVer.h>\n#define _WI"
  },
  {
    "path": "Examples/WhisperDesktop/useDiscreteGpu.c",
    "chars": 122,
    "preview": "__declspec( dllexport ) int NvOptimusEnablement = 1;\n__declspec( dllexport ) int AmdPowerXpressRequestHighPerformance = "
  },
  {
    "path": "Examples/main/Readme.txt",
    "chars": 162,
    "preview": "This example shows how to consume the DLL from a C++ console application.\n\nThe command-line interface matches the corre"
  },
  {
    "path": "Examples/main/main.cpp",
    "chars": 10351,
    "preview": "#include \"params.h\"\n#include \"../../Whisper/API/iContext.cl.h\"\n#include \"../../Whisper/API/iMediaFoundation.cl.h\"\n#inclu"
  },
  {
    "path": "Examples/main/main.vcxproj",
    "chars": 4648,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msb"
  },
  {
    "path": "Examples/main/main.vcxproj.filters",
    "chars": 602,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"4.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuil"
  },
  {
    "path": "Examples/main/miscUtils.cpp",
    "chars": 1197,
    "preview": "#include \"miscUtils.h\"\n#define WIN32_LEAN_AND_MEAN\n#include <windows.h>\n\nstd::string utf8( const std::wstring& utf16 )\n{"
  },
  {
    "path": "Examples/main/miscUtils.h",
    "chars": 196,
    "preview": "#pragma once\n#include <string>\n\nstd::string utf8( const std::wstring& utf16 );\n\nstd::wstring utf16( const std::string& u"
  },
  {
    "path": "Examples/main/params.cpp",
    "chars": 6205,
    "preview": "#include \"params.h\"\n#include <algorithm>\n#include <thread>\n#include \"miscUtils.h\"\n#include \"../../Whisper/API/iContext.c"
  },
  {
    "path": "Examples/main/params.h",
    "chars": 884,
    "preview": "#pragma once\n#include <vector>\n#include <string>\n\n// command-line parameters\nstruct whisper_params\n{\n\tuint32_t n_threads"
  },
  {
    "path": "Examples/main/textWriter.cpp",
    "chars": 4877,
    "preview": "#include \"textWriter.h\"\n#include \"../../ComLightLib/comLightClient.h\"\n#include <array>\n#define WIN32_LEAN_AND_MEAN\n#incl"
  },
  {
    "path": "Examples/main/textWriter.h",
    "chars": 357,
    "preview": "#pragma once\n#include \"../../Whisper/API/iContext.cl.h\"\n\n// These functions print output segments into text files of var"
  },
  {
    "path": "LICENSE",
    "chars": 16725,
    "preview": "Mozilla Public License Version 2.0\n==================================\n\n1. Definitions\n--------------\n\n1.1. \"Contributor\""
  },
  {
    "path": "Readme.md",
    "chars": 10419,
    "preview": "This project is a Windows port of the [whisper.cpp](https://github.com/ggerganov/whisper.cpp) implementation.<br/>\nWhic"
  },
  {
    "path": "SampleClips/Readme.txt",
    "chars": 1677,
    "preview": "This folder contains 2 sample speech audio clips, `jfk.wav` and `columbia.wma`\n\njfk.wav is from whisper.cpp repository."
  },
  {
    "path": "SampleClips/columbia-large-1080ti.txt",
    "chars": 2719,
    "preview": "    CPU Tasks\nLoadModel       950.578 milliseconds\nRunComplete     27.5329 seconds\nRun     27.434 seconds\nCallbacks    "
  },
  {
    "path": "SampleClips/columbia-large-1650.txt",
    "chars": 2652,
    "preview": "    CPU Tasks\nLoadModel       7.95251 seconds\nRunComplete     109.423 seconds\nRun     109.351 seconds\nCallbacks       12"
  },
  {
    "path": "SampleClips/columbia-large-vega7.txt",
    "chars": 2912,
    "preview": "    CPU Tasks\nLoadModel       2.88964 seconds\nRunComplete     140.747 seconds\nRun     140.661 seconds\nCallbacks       20"
  },
  {
    "path": "SampleClips/columbia-large-vega8.txt",
    "chars": 2928,
    "preview": "    CPU Tasks\nLoadModel       1.49776 seconds\nRunComplete     110.474 seconds\nRun     110.407 seconds\nCallbacks       1"
  },
  {
    "path": "SampleClips/columbia-medium-1080ti.txt",
    "chars": 2756,
    "preview": "    CPU Tasks\nLoadModel       600.5 milliseconds\nRunComplete     14.9475 seconds\nRun     14.8676 seconds\nCallbacks     "
  },
  {
    "path": "SampleClips/columbia-medium-1650.txt",
    "chars": 2710,
    "preview": "    CPU Tasks\nLoadModel       939.886 milliseconds\nRunComplete     48.7479 seconds\nRun     48.6305 seconds\nCallbacks    "
  },
  {
    "path": "SampleClips/columbia-medium-vega7.txt",
    "chars": 2969,
    "preview": "    CPU Tasks\nLoadModel       1.99675 seconds\nRunComplete     81.256 seconds\nRun     81.1666 seconds\nCallbacks       17."
  },
  {
    "path": "SampleClips/columbia-medium-vega8.txt",
    "chars": 2975,
    "preview": "    CPU Tasks\nLoadModel       841.605 milliseconds\nRunComplete     62.1145 seconds\nRun     62.0268 seconds\nCallbacks   "
  },
  {
    "path": "SampleClips/jfk-large-1080ti.txt",
    "chars": 2461,
    "preview": "    CPU Tasks\nLoadModel       945.134 milliseconds\nRunComplete     2.19628 seconds\nRun     2.08991 seconds\nCallbacks   "
  },
  {
    "path": "SampleClips/jfk-large-1650.txt",
    "chars": 2420,
    "preview": "    CPU Tasks\nLoadModel       7.92578 seconds\nRunComplete     8.33686 seconds\nRun     8.25683 seconds\nCallbacks       33"
  },
  {
    "path": "SampleClips/jfk-large-vega7.txt",
    "chars": 2677,
    "preview": "    CPU Tasks\nLoadModel       3.22847 seconds\nRunComplete     14.2729 seconds\nRun     14.186 seconds\nCallbacks       674"
  },
  {
    "path": "SampleClips/jfk-large-vega8.txt",
    "chars": 2682,
    "preview": "    CPU Tasks\nLoadModel       1.57347 seconds\nRunComplete     9.46787 seconds\nRun     9.40671 seconds\nCallbacks       2"
  },
  {
    "path": "SampleClips/jfk-medium-1080ti.txt",
    "chars": 2476,
    "preview": "    CPU Tasks\nLoadModel       593.861 milliseconds\nRunComplete     1.13909 seconds\nRun     1.06578 seconds\nCallbacks   "
  },
  {
    "path": "SampleClips/jfk-medium-1650.txt",
    "chars": 2447,
    "preview": "    CPU Tasks\nLoadModel       2.20693 seconds\nRunComplete     3.16174 seconds\nRun     3.07912 seconds\nCallbacks       38"
  },
  {
    "path": "SampleClips/jfk-medium-vega7.txt",
    "chars": 2686,
    "preview": "    CPU Tasks\nLoadModel       1.79203 seconds\nRunComplete     8.79853 seconds\nRun     8.71884 seconds\nCallbacks       62"
  },
  {
    "path": "SampleClips/jfk-medium-vega8.txt",
    "chars": 2695,
    "preview": "    CPU Tasks\nLoadModel       827.449 milliseconds\nRunComplete     4.95485 seconds\nRun     4.90459 seconds\nCallbacks   "
  },
  {
    "path": "SampleClips/summary.tsv",
    "chars": 1393,
    "preview": "Audio Clip\tModel\tGPU\tTotal, sec\tRelative speed\tEncode, sec\tDecode, sec\tRAM, MB\tVRAM, MB\njfk.wav\tmedium\tGeForce 1080Ti\t1."
  },
  {
    "path": "Tools/CompressShaders/Cabinet.cs",
    "chars": 2758,
    "preview": "using System.ComponentModel;\nusing System.Runtime.InteropServices;\n\nnamespace CompressShaders\n{\n\t/// <summary>Lossless "
  },
  {
    "path": "Tools/CompressShaders/CompressShaders.cs",
    "chars": 6152,
    "preview": "using System.Runtime.CompilerServices;\nnamespace CompressShaders;\n\nrecord struct sShaderBinary\n{\n\tpublic string name;\n\t"
  },
  {
    "path": "Tools/CompressShaders/CompressShaders.csproj",
    "chars": 465,
    "preview": "<Project Sdk=\"Microsoft.NET.Sdk\">\n\t<PropertyGroup>\n\t\t<OutputType>Exe</OutputType>\n\t\t<TargetFramework>net6.0</TargetFrame"
  },
  {
    "path": "Tools/CompressShaders/DetectFp64.cs",
    "chars": 1500,
    "preview": "#pragma warning disable CS0649\nusing System.Runtime.InteropServices;\n\nnamespace CompressShaders\n{\n\tstatic class DetectF"
  },
  {
    "path": "Tools/CompressShaders/LZ4.cs",
    "chars": 990,
    "preview": "namespace CompressShaders;\nusing K4os.Compression.LZ4;\n\n/// <summary>Lossless data compressor which uses LZ4-HC compres"
  },
  {
    "path": "Tools/CompressShaders/LanguageCodes.cs",
    "chars": 2950,
    "preview": "using System.Globalization;\nusing System.Text.RegularExpressions;\n\nnamespace CompressShaders\n{\n\tstatic class LanguageCo"
  },
  {
    "path": "Tools/CompressShaders/Readme.txt",
    "chars": 578,
    "preview": "This project builds a C# console app which serves as a code generator for a few pieces of Whisper.dll and WhisperNet.dl"
  },
  {
    "path": "Tools/CompressShaders/ShaderNames.cs",
    "chars": 710,
    "preview": "static class ShaderNames\n{\n\tpublic static void write( string path, IEnumerable<string> names )\n\t{\n\t\tstring[] arr = name"
  },
  {
    "path": "Tools/CompressTables/CompressTables.cs",
    "chars": 1304,
    "preview": "namespace CompressTables;\nusing CompressShaders;\nusing System.IO;\nusing System.Runtime.CompilerServices;\n\n/// <summary>"
  },
  {
    "path": "Tools/CompressTables/CompressTables.csproj",
    "chars": 557,
    "preview": "<Project Sdk=\"Microsoft.NET.Sdk\">\n\t<PropertyGroup>\n\t\t<OutputType>Exe</OutputType>\n\t\t<TargetFramework>net7.0</TargetFrame"
  },
  {
    "path": "Tools/PerfSummary/LogParser.cs",
    "chars": 4837,
    "preview": "using System.Globalization;\nusing System.Text.RegularExpressions;\n\nnamespace PerfSummary\n{\n\tenum eInputClip: byte\n\t{\n\t\t"
  },
  {
    "path": "Tools/PerfSummary/PerfSummary.cs",
    "chars": 672,
    "preview": "using System.Runtime.CompilerServices;\n\nnamespace PerfSummary\n{\n\tinternal class Program\n\t{\n\t\tstatic string getSolutionR"
  },
  {
    "path": "Tools/PerfSummary/PerfSummary.csproj",
    "chars": 367,
    "preview": "<Project Sdk=\"Microsoft.NET.Sdk\">\n\t<PropertyGroup>\n\t\t<OutputType>Exe</OutputType>\n\t\t<TargetFramework>net6.0</TargetFrame"
  },
  {
    "path": "Tools/PerfSummary/Summary.cs",
    "chars": 2043,
    "preview": "using System.Globalization;\n\nnamespace PerfSummary\n{\n\tstatic class Summary\n\t{\n\t\tpublic static void print( LogData[] log"
  },
  {
    "path": "Tools/compareTraces/CommandLineArgs.cpp",
    "chars": 964,
    "preview": "#include \"stdafx.h\"\n#include \"CommandLineArgs.h\"\n#include <charconv>\n\nstatic bool printUsage()\n{\n\tfprintf( stderr, \"Usag"
  },
  {
    "path": "Tools/compareTraces/CommandLineArgs.h",
    "chars": 141,
    "preview": "#pragma once\n\nstruct CommandLineArgs\n{\n\tint64_t printDiff = -1;\n\tstd::array<CString, 2> inputs;\n\n\tbool parse( int argc, "
  },
  {
    "path": "Tools/compareTraces/Readme.txt",
    "chars": 570,
    "preview": "This project builds a C++ console tool which compares debug traces of the model.\n\nTracing files easily exceed 1GB, and "
  },
  {
    "path": "Tools/compareTraces/TraceReader.cpp",
    "chars": 1192,
    "preview": "#include \"stdafx.h\"\n#include \"TraceReader.h\"\nusing namespace Tracing;\n\nconst sTraceItem& TraceReader::operator[]( size_t"
  },
  {
    "path": "Tools/compareTraces/TraceReader.h",
    "chars": 778,
    "preview": "#pragma once\n#include \"../../Whisper/Utils/Trace/TraceStructures.h\"\n#include <atlstr.h>\n#include <atlfile.h>\n\nnamespace "
  },
  {
    "path": "Tools/compareTraces/compare.cpp",
    "chars": 10262,
    "preview": "#include \"stdafx.h\"\n#include \"../../Whisper/API/iContext.cl.h\"\n#include \"TraceReader.h\"\n#include \"../../Whisper/ML/testU"
  },
  {
    "path": "Tools/compareTraces/compare.h",
    "chars": 101,
    "preview": "#pragma once\n#include \"CommandLineArgs.h\"\n\nHRESULT compareTraces( const CommandLineArgs& arguments );"
  },
  {
    "path": "Tools/compareTraces/compareTraces.cpp",
    "chars": 282,
    "preview": "#include \"stdafx.h\"\n#include <stdio.h>\n#include \"compare.h\"\n#include \"CommandLineArgs.h\"\n\nint wmain( int argc, wchar_t* "
  },
  {
    "path": "Tools/compareTraces/compareTraces.vcxproj",
    "chars": 4817,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project DefaultTargets=\"Build\" xmlns=\"http://schemas.microsoft.com/developer/msb"
  },
  {
    "path": "Tools/compareTraces/compareTraces.vcxproj.filters",
    "chars": 682,
    "preview": "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<Project ToolsVersion=\"4.0\" xmlns=\"http://schemas.microsoft.com/developer/msbuil"
  },
  {
    "path": "Tools/compareTraces/stdafx.cpp",
    "chars": 541,
    "preview": "#include \"stdafx.h\"\n\nnamespace\n{\n\twchar_t* formatMessage( HRESULT hr )\n\t{\n\t\twchar_t* err;\n\t\tif( FormatMessage( FORMAT_ME"
  },
  {
    "path": "Tools/compareTraces/stdafx.h",
    "chars": 898,
    "preview": "#pragma once\n#include <stdint.h>\n#include <assert.h>\n\n#define WIN32_LEAN_AND_MEAN\n#define NOMINMAX\n#include <windows.h>\n"
  }
]

// ... and 285 more files (download for full content)

About this extraction

This page contains the full source code of the Const-me/Whisper GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 485 files (3.3 MB), approximately 899.8k tokens, and a symbol index with 3982 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!