Repository: ggerganov/kbd-audio
Branch: master
Commit: a9182070debe
Files: 63
Total size: 11.7 MB
Directory structure:
gitextract_n3kfgpod/
├── .github/
│ └── workflows/
│ └── build.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── LICENSE
├── README.md
├── audio-logger.cpp
├── audio-logger.h
├── build-vars.h.in
├── cmake/
│ ├── BuildTypes.cmake
│ ├── FindFFTW.cmake
│ ├── GitVars.cmake
│ └── sdl2/
│ └── FindSDL2.cmake
├── common-gui.cpp
├── common-gui.h
├── common.cpp
├── common.h
├── compress-n-grams.cpp
├── constants.h
├── data/
│ ├── english_quadgrams.txt
│ ├── english_quintgrams.txt
│ ├── english_trigrams.txt
│ ├── ggwords-3-gram.dat.binary
│ ├── ggwords-4-gram.dat.binary
│ ├── ggwords-5-gram.dat.binary
│ ├── ggwords-6-gram.dat.binary
│ └── sample_quadgrams.txt
├── dr_wav.h
├── generate-clusters.cpp
├── guess-qp.cpp
├── guess-qp2.cpp
├── imconfig-vtx32.h
├── index-keytap2-gui-tmpl.html
├── index-keytap3-app-tmpl.html
├── index-keytap3-gui-tmpl.html
├── key-average-gui.cpp
├── key-detector.cpp
├── keytap-gui.cpp
├── keytap.cpp
├── keytap2-gui-old.cpp
├── keytap2-gui.cpp
├── keytap2.cpp
├── keytap3-app.cpp
├── keytap3-gui.cpp
├── keytap3-multi.cpp
├── keytap3.cpp
├── non-exact-subbreak.cpp
├── non-exact-subbreak2.cpp
├── play-full.cpp
├── play.cpp
├── record-full.cpp
├── record.cpp
├── scale.cpp
├── style.css
├── subbreak.cpp
├── subbreak.h
├── subbreak2.cpp
├── subbreak2.h
├── subbreak3.cpp
├── subbreak3.h
├── test-subbreak3.cpp
├── view-full-gui.cpp
└── view-gui.cpp
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/workflows/build.yml
================================================
name: CI
on: [push]
jobs:
ubuntu-18_04-gcc:
runs-on: ubuntu-18.04
strategy:
matrix:
build: [Debug, Release]
steps:
- name: Clone
uses: actions/checkout@v1
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential xorg-dev libglu1-mesa-dev
sudo apt-get install cmake;
sudo apt-get install libsdl2-dev;
- name: Configure
run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
- name: Build
run: make
ubuntu-18_04-clang:
runs-on: ubuntu-18.04
strategy:
matrix:
build: [Debug, Release]
steps:
- name: Clone
uses: actions/checkout@v1
with:
submodules: true
- name: Dependencies
run: |
sudo apt-get update
sudo apt-get install build-essential xorg-dev libglu1-mesa-dev
sudo apt-get install cmake;
sudo apt-get install libsdl2-dev;
- name: Configure
run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
- name: Build
run: make
macOS-latest:
runs-on: macOS-latest
strategy:
matrix:
build: [Debug, Release]
steps:
- name: Clone
uses: actions/checkout@v1
with:
submodules: true
- name: Dependencies
run: |
brew update
brew install sdl2
- name: Configure
run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
- name: Build
run: make
================================================
FILE: .gitignore
================================================
build*/
build-vars.h
data_original/*
chal*
.cache
.DS_Store
.ycm_extra_conf.py
.clangd
compile_commands.json
================================================
FILE: .gitmodules
================================================
[submodule "imgui"]
path = imgui
url = https://github.com/ocornut/imgui
================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required (VERSION 2.8)
project (KBD-Audio)
set(CMAKE_EXPORT_COMPILE_COMMANDS "on")
# options
option(BUILD_EXPERIMENTAL "Build experimental tools" OFF)
option(USE_FINDSDL2 "Use the FindSDL2.cmake script" OFF)
option(KBD_AUDIO_SANITIZE_THREAD "kbd-audio: enable thread sanitizer" OFF)
option(KBD_AUDIO_SANITIZE_ADDRESS "kbd-audio: enable address sanitizer" OFF)
option(KBD_AUDIO_SANITIZE_UNDEFINED "kbd-audio: enable undefined sanitizer" OFF)
# sanitizers
if (KBD_AUDIO_SANITIZE_THREAD)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=thread")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=thread")
endif()
if (KBD_AUDIO_SANITIZE_ADDRESS)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer")
endif()
if (KBD_AUDIO_SANITIZE_UNDEFINED)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined")
endif()
# sdl2
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
if (USE_FINDSDL2)
set(CMAKE_MODULE_PATH "${CMAKE_MODULE_PATH};${CMAKE_CURRENT_SOURCE_DIR}/cmake/sdl2")
endif()
# build types
include(cmake/GitVars.cmake)
include(cmake/BuildTypes.cmake)
configure_file(${CMAKE_SOURCE_DIR}/build-vars.h.in ${CMAKE_SOURCE_DIR}/build-vars.h @ONLY)
# warnings
if (CMAKE_COMPILER_IS_GNUCC OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -pedantic")
endif()
set (CMAKE_CXX_STANDARD 17)
set (CMAKE_CXX_STANDARD_REQUIRED ON)
if (EMSCRIPTEN)
set (CMAKE_CXX_FLAGS " \
-s USE_SDL=2 \
-s USE_PTHREADS=1 \
")
#-s DISABLE_EXCEPTION_CATCHING=1 \
set(CMAKE_EXE_LINKER_FLAGS " \
--bind \
--use-preload-cache \
-s ASSERTIONS=1 \
-s NO_EXIT_RUNTIME=0 \
-s PTHREAD_POOL_SIZE=16 \
-s INITIAL_MEMORY=536870912 \
")
elseif(MINGW)
find_package(PkgConfig REQUIRED)
pkg_search_module(SDL2 REQUIRED sdl2)
set(OPENGL_INCLUDE_DIR "")
set(OPENGL_LIBRARIES "opengl32")
else()
find_package(OpenGL REQUIRED)
find_package(SDL2)
find_package(FFTW)
endif()
find_package(Threads REQUIRED)
if (NOT USE_FINDSDL2 AND NOT SDL2_FOUND AND NOT EMSCRIPTEN)
message(WARNING "Unable to find SDL2 library. It is either not installed or CMake cannot find it."
" In the latter case, setting the USE_FINDSDL2 variable might help:\n"
" $ cmake -D USE_FINDSDL2 .."
)
message(FATAL_ERROR "Aborting")
endif()
if (NOT FFTW_FOUND)
message(WARNING "FFTW library not available. Some targets will not be built")
endif()
string(STRIP "${SDL2_LIBRARIES}" SDL2_LIBRARIES)
if (APPLE)
find_library(COREGRAPHICS_LIBRARY CoreGraphics )
find_library(COREFOUNDATION_LIBRARY CoreFoundation )
else (APPLE)
unset(COREGRAPHICS_LIBRARY)
unset(COREFOUNDATION_LIBRARY)
endif (APPLE)
include_directories(imgui)
include_directories(imgui/examples)
include_directories(imgui/examples/libs/gl3w)
include_directories(${SDL2_INCLUDE_DIRS})
include_directories(${OPENGL_INCLUDE_DIR})
add_library(Core STATIC
common.cpp
audio-logger.cpp
)
target_include_directories(Core PRIVATE
)
target_link_libraries(Core PRIVATE
${CMAKE_THREAD_LIBS_INIT}
${SDL2_LIBRARIES}
${COREFOUNDATION_LIBRARY}
)
# todo : this is ugly, what's the proper way?
if (MINGW)
target_link_libraries(Core PRIVATE stdc++)
endif()
if (EMSCRIPTEN)
add_library(Gui STATIC
common-gui.cpp
imgui/imgui.cpp
imgui/imgui_draw.cpp
imgui/imgui_demo.cpp
imgui/imgui_widgets.cpp
imgui/examples/imgui_impl_sdl.cpp
imgui/examples/imgui_impl_opengl3.cpp
)
target_compile_definitions(Gui PUBLIC
IMGUI_USER_CONFIG="../imconfig-vtx32.h"
)
else()
add_library(Gui STATIC
common-gui.cpp
imgui/imgui.cpp
imgui/imgui_draw.cpp
imgui/imgui_demo.cpp
imgui/imgui_widgets.cpp
imgui/examples/libs/gl3w/GL/gl3w.c
imgui/examples/imgui_impl_sdl.cpp
imgui/examples/imgui_impl_opengl3.cpp
)
# force GL3W loader
target_compile_definitions(Gui PUBLIC
IMGUI_IMPL_OPENGL_LOADER_GL3W=1
IMGUI_USER_CONFIG="../imconfig-vtx32.h"
)
target_link_libraries(Gui PRIVATE
${CMAKE_DL_LIBS}
${SDL2_LIBRARIES}
${OPENGL_LIBRARIES}
${COREFOUNDATION_LIBRARY}
)
endif()
add_executable(keytap-gui keytap-gui.cpp)
target_link_libraries(keytap-gui PRIVATE Core Gui)
add_executable(keytap2-gui keytap2-gui.cpp subbreak2.cpp)
target_link_libraries(keytap2-gui PRIVATE Core Gui)
add_executable(keytap3-gui keytap3-gui.cpp subbreak3.cpp)
target_link_libraries(keytap3-gui PRIVATE Core Gui)
add_executable(keytap3-app keytap3-app.cpp subbreak3.cpp)
target_link_libraries(keytap3-app PRIVATE Core)
add_executable(view-full-gui view-full-gui.cpp)
target_link_libraries(view-full-gui PRIVATE Core Gui)
if (EMSCRIPTEN)
# view-full-gui
set(TARGET view-full-gui)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${TARGET})
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-s TOTAL_MEMORY=536870912 \
-s LZ4=1 \
--preload-file ${PROJECT_SOURCE_DIR}/chal/record@/chal/record/ \ \
")
## keytap-gui
set(TARGET keytap-gui)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${TARGET})
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-s TOTAL_MEMORY=536870912 \
-s FORCE_FILESYSTEM=1 \
")
# keytap2-gui
set(TARGET keytap2-gui)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${TARGET})
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-s TOTAL_MEMORY=536870912 \
-s FORCE_FILESYSTEM=1 \
-s LZ4=1 \
--preload-file ${PROJECT_SOURCE_DIR}/data/@/data/ \
")
configure_file(${CMAKE_SOURCE_DIR}/index-${TARGET}-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/index.html @ONLY)
configure_file(${CMAKE_SOURCE_DIR}/style.css ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/style.css @ONLY)
# keytap3-app
set(TARGET keytap3-app)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${TARGET})
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-s FORCE_FILESYSTEM=1 \
-s LZ4=1 \
--preload-file ${PROJECT_SOURCE_DIR}/data/ggwords-6-gram.dat.binary@/data/ \
")
configure_file(${CMAKE_SOURCE_DIR}/index-${TARGET}-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/index.html @ONLY)
configure_file(${CMAKE_SOURCE_DIR}/style.css ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/style.css @ONLY)
# keytap3-gui
set(TARGET keytap3-gui)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/${TARGET})
set_target_properties(${TARGET} PROPERTIES LINK_FLAGS " \
-s TOTAL_MEMORY=536870912 \
-s FORCE_FILESYSTEM=1 \
-s LZ4=1 \
--preload-file ${PROJECT_SOURCE_DIR}/data/ggwords-6-gram.dat.binary@/data/ \
")
configure_file(${CMAKE_SOURCE_DIR}/index-${TARGET}-tmpl.html ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/index.html @ONLY)
configure_file(${CMAKE_SOURCE_DIR}/style.css ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/style.css @ONLY)
endif()
if (NOT EMSCRIPTEN)
add_executable(key-detector key-detector.cpp)
target_link_libraries(key-detector PRIVATE Core)
add_executable(keytap keytap.cpp)
target_link_libraries(keytap PRIVATE Core)
add_executable(keytap2 keytap2.cpp)
target_link_libraries(keytap2 PRIVATE Core)
add_executable(keytap3 keytap3.cpp subbreak3.cpp)
target_link_libraries(keytap3 PRIVATE Core)
add_executable(play play.cpp)
target_link_libraries(play PRIVATE Core)
add_executable(play-full play-full.cpp)
target_link_libraries(play-full PRIVATE Core)
if (NOT MINGW)
# todo : need to find termios.h alternative for Windows
add_executable(record record.cpp)
target_link_libraries(record PRIVATE Core)
endif()
add_executable(record-full record-full.cpp)
target_link_libraries(record-full PRIVATE Core)
add_executable(view-gui view-gui.cpp)
target_link_libraries(view-gui PRIVATE Core Gui)
add_executable(compress-n-grams compress-n-grams.cpp subbreak3.cpp)
target_link_libraries(compress-n-grams PRIVATE Core)
#
## Experimental stuff
if (BUILD_EXPERIMENTAL)
if (FFTW_FOUND)
add_executable(key-average-gui key-average-gui.cpp)
target_include_directories(key-average-gui PRIVATE ${FFTW_INCLUDE_DIRS})
target_link_libraries(key-average-gui PRIVATE Core Gui ${FFTW_LIBRARIES})
else()
message(WARNING "Skipping 'key-average-gui' target because FFTW is not available")
endif()
add_executable(keytap3-multi keytap3-multi.cpp subbreak3.cpp)
target_link_libraries(keytap3-multi PRIVATE Core)
add_executable(guess-qp guess-qp.cpp)
target_link_libraries(guess-qp PRIVATE Core)
add_executable(guess-qp2 guess-qp2.cpp)
target_link_libraries(guess-qp2 PRIVATE Core)
add_executable(scale scale.cpp)
target_link_libraries(scale PRIVATE Core)
add_executable(subbreak subbreak.cpp)
target_link_libraries(subbreak PRIVATE Core)
add_executable(non-exact-subbreak non-exact-subbreak.cpp subbreak2.cpp)
target_link_libraries(non-exact-subbreak PRIVATE Core)
add_executable(non-exact-subbreak2 non-exact-subbreak2.cpp subbreak2.cpp)
target_link_libraries(non-exact-subbreak2 PRIVATE Core)
add_executable(generate-clusters generate-clusters.cpp subbreak2.cpp)
target_link_libraries(generate-clusters PRIVATE Core)
add_executable(test-subbreak3 test-subbreak3.cpp subbreak3.cpp)
target_link_libraries(test-subbreak3 PRIVATE Core)
endif()
endif()
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2018 Georgi Gerganov
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
kbd-audio
=====
[](https://github.com/ggerganov/kbd-audio/actions)
This is a collection of command-line and GUI tools for capturing and analyzing audio data.
### Keytap
The most interesting tool is called **keytap** - it can guess pressed keyboard keys only by analyzing the audio captured from the computer's microphone.
Check this blog post for more details:
[Keytap: description and some random thoughts](https://ggerganov.github.io/jekyll/update/2018/11/30/keytap-description-and-thoughts.html)
[Video: short demo of Keytap in action](https://www.youtube.com/watch?v=2OjzI9m7W10)
[Try it online:](https://ggerganov.github.io/keytap)
### Keytap2
The **keytap2** tool is another interesting tool for recovering text from audio. It does not require training data - instead it uses statistical information about the frequencies of the letters and n-grams in the English language.
A more detailed description of the tool is available here: [Keytap2 discussion](https://github.com/ggerganov/kbd-audio/discussions/31)
[Video: short demo of Keytap2 in action](https://www.youtube.com/watch?v=jNtw17S6SR0)
[CTF: can you guess the text being typed?](https://ggerganov.github.io/keytap-challenge/)
[Try it online:](https://keytap2.ggerganov.com)
### Keytap3
This version introduces significant algorithm improvements and better n-gram statistics compared to keytap2. The attack is now fully
automated and does not require any manual intervation during the text recovery process.
[Video: short demo of using Keytap3](https://youtu.be/5aphvxpSt3o)
[Video: another example of using Keytap3](https://youtu.be/kCOrxrR-4ak)
[GUI for Keytap3](https://keytap3-gui.ggerganov.com)
[Check if your keyboard is vulnerable to Keytap:](https://keytap3.ggerganov.com)
### What people say about Keytap
*"This works incredibly well.\
I hope you realize what you've created (and made available to every person in the world)."* -- ffpip
*"I just tried it and it works incredibly well. It kind of makes me want to stop using a mechanical keyboard."* -- Karawebnetwork
*"This attack and Van Eck phreaking are why Edward Snowden, while typing passwords and other sensitive information, would pull a blanket over himself and his laptop."* -- aarchi
*"This is what mechanical keyboard users deserve"* -- super guy
*"fuck.."* -- Lluis Franco
## Build instructions
Dependencies:
- **SDL2** - used to capture audio and to open GUI windows [libsdl](https://www.libsdl.org)
[Ubuntu]
$ sudo apt install libsdl2-dev
[Mac OS with brew]
$ brew install sdl2
[MSYS2]
$ pacman -S git cmake make mingw-w64-x86_64-dlfcn mingw-w64-x86_64-gcc mingw-w64-x86_64-SDL2
- **FFTW3** *(optional)* - some of the helper tools perform Fourier transformations [fftw](http://www.fftw.org)
**Linux, FreeBSD, Mac OS, Windows (MSYS2 + MinGW)**
git clone https://github.com/ggerganov/kbd-audio
cd kbd-audio
git submodule update --init
mkdir build && cd build
cmake ..
make
## Tools
Short summary of the available tools. If the status of the tool is not **stable**, expect problems and non-optimal results.
| Name | Type | Status |
| --- | --- | --- |
| **record** | text | **stable** |
| **record-full** | text | **stable** |
| **play** | text | **stable** |
| **play-full** | text | **stable** |
| **view-gui** | gui | **stable** |
| **view-full-gui** | gui | **stable** |
| **key-detector** | text | **stable** |
| **keytap** | text | **stable** |
| **keytap-gui** | gui | **stable** |
| **keytap2-gui** | gui | **stable** |
| **keytap3** | text | **stable** |
| **keytap3-gui** | gui | **stable** |
| - | *extra* | - |
| **guess-qp** | text | experiment |
| **guess-qp2** | text | experiment |
| **keytap3-multi** | text | experiment |
| **scale** | text | experiment |
| **subreak** | text | experiment |
| **key-average-gui** | gui | experiment |
| **keytap2** | text | experiment |
## Tool details
* **record-full**
Record audio to a raw binary file on disk
./record-full output.kbd [-cN]
---
* **play-full**
Playback a recording captured via the **record-full** tool
./play-full input.kbd [-pN]
---
* **record**
Record audio only while typing. Useful for collecting training data for **keytap**
./record output.kbd [-cN] [-CN]
---
* **play**
Playback a recording created via the **record** tool
./play input.kbd [-pN]
---
* **keytap**
Detect pressed keys via microphone audio capture in real-time. Uses training data captured via the **record** tool.
./keytap input0.kbd [input1.kbd] [input2.kbd] ... [-cN] [-CN] [-pF] [-tF]
---
* **keytap-gui**
Detect pressed keys via microphone audio capture in real-time. Uses training data captured via the **record** tool. GUI version.
./keytap-gui input0.kbd [input1.kbd] [input2.kbd] ... [-cN] [-CN]
Online demo: https://keytap.ggerganov.com
---
* **keytap2-gui** record.kbd n-gram-dir [-pN] [-cN] [-CN]
Detect pressed keys via microphone audio capture. Uses statistical information (n-gram frequencies) about the language. **No training data is required**. The *'record.kbd'* input file has to be generated via the **record-full** tool and contains the audio data that will be analyzed. The *'n-gram-dir'* folder file has to contain n-gram probability files for the corresponding language.
./keytap2-gui record.kbd ../data
Online demo: https://keytap2.ggerganov.com
---
* **keytap3**
Fully automated recovery of unknown text from audio recordings.
./keytap3 input.kbd ../data [-cN] [-CN] [-pF] [-tF] [-FN] [-fN]
Online demo: https://keytap3.ggerganov.com
---
* **keytap3-gui**
GUI version of the **keytap3** tool.
./keytap3-gui input.kbd ../data [-cN] [-CN] [-pF] [-tF] [-FN] [-fN]
Online demo: https://keytap3-gui.ggerganov.com
---
* **view-full-gui**
Visualize waveforms recorded with the **record-full** tool. Can also playback the audio data.
./view-full-gui input.kbd [-pN]

---
* **view-gui**
Visualize training data recorded with the **record** tool. Can also playback the audio data.
./view-gui input.kbd [-pN]

---
## Feedback
Any feedback about the performance of the tools is highly appreciated. Please drop a comment [here](https://github.com/ggerganov/kbd-audio/issues/3).
================================================
FILE: audio-logger.cpp
================================================
/*! \file audio-logger.cpp
* \brief Enter description here.
* \author Georgi Gerganov
*/
#include "audio-logger.h"
#include
#include
#include
#include
#include
namespace {
void cbAudioReady(void * userData, uint8_t * stream, int32_t /*nbytes*/) {
AudioLogger * logger = (AudioLogger *)(userData);
logger->addFrame((AudioLogger::Sample *)(stream));
}
}
struct AudioLogger::Data {
Data() : isReady(false) {
for (auto & frame : buffer) {
frame.fill(0);
}
for (auto & record : records) {
record.clear();
}
nFramesToRecord.fill(0);
}
SDL_AudioDeviceID deviceIdIn = 0;
//SDL_AudioDeviceID deviceIdOut = 0;
int32_t sampleSize_bytes = -1;
int32_t bufferId = 0;
std::array buffer;
int32_t nRecords = 0;
std::array nFramesToRecord;
std::array records;
Parameters parameters;
TFilterCoefficients filterCoefficients;
std::mutex mutex;
std::atomic_bool isReady;
};
AudioLogger::AudioLogger() : data_(new AudioLogger::Data()) {}
AudioLogger::~AudioLogger() {}
bool AudioLogger::install(Parameters && parameters) {
auto & data = getData();
if (parameters.captureId < 0) {
fprintf(stderr, "error : invalid captureId = %d\n", parameters.captureId);
return false;
}
if (parameters.nChannels < 0) {
fprintf(stderr, "error : invalid nChannels = %d\n", parameters.nChannels);
return false;
}
if (parameters.sampleRate <= 0) {
fprintf(stderr, "error : invalid sampleRate = %d\n", (int) parameters.sampleRate);
return false;
}
if (parameters.freqCutoff_Hz <= 0) {
fprintf(stderr, "error : invalid freqCutoff_Hz = %g\n", parameters.freqCutoff_Hz);
return false;
}
static bool isInitialized = false;
if (!isInitialized && SDL_Init(SDL_INIT_AUDIO) < 0) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't initialize SDL: %s\n", SDL_GetError());
return false;
}
isInitialized = true;
int nDevices = SDL_GetNumAudioDevices(SDL_TRUE);
printf("Found %d capture devices:\n", nDevices);
for (int i = 0; i < nDevices; i++) {
printf(" - Capture device #%d: '%s'\n", i, SDL_GetAudioDeviceName(i, SDL_TRUE));
}
if (parameters.captureId < 0 || parameters.captureId >= nDevices) {
fprintf(stderr, "error : invalid capture device id selected - %d\n", parameters.captureId);
return false;
}
SDL_AudioSpec captureSpec;
SDL_zero(captureSpec);
captureSpec.freq = parameters.sampleRate;
captureSpec.format = AUDIO_F32SYS;
captureSpec.channels = parameters.nChannels;
captureSpec.samples = kSamplesPerFrame;
captureSpec.callback = ::cbAudioReady;
captureSpec.userdata = this;
SDL_AudioSpec obtainedSpec;
SDL_zero(obtainedSpec);
printf("Attempt to open capture device %d : '%s' ...\n", parameters.captureId, SDL_GetAudioDeviceName(parameters.captureId, SDL_TRUE));
data.deviceIdIn = SDL_OpenAudioDevice(SDL_GetAudioDeviceName(parameters.captureId, SDL_TRUE), SDL_TRUE, &captureSpec, &obtainedSpec, SDL_AUDIO_ALLOW_CHANNELS_CHANGE);
if (!data.deviceIdIn) {
SDL_LogError(SDL_LOG_CATEGORY_APPLICATION, "Couldn't open an audio device for capture: %s!\n", SDL_GetError());
SDL_Quit();
return false;
}
switch (obtainedSpec.format) {
case AUDIO_U8:
case AUDIO_S8:
case AUDIO_U16SYS:
case AUDIO_S16SYS:
case AUDIO_S32SYS:
{
fprintf(stderr, "error : unsupported sample format %d\n", obtainedSpec.format);
return false;
}
break;
case AUDIO_F32SYS:
{
data.sampleSize_bytes = 4;
}
break;
}
printf("Opened capture device succesfully!\n");
printf(" DeviceId: %d\n", data.deviceIdIn);
printf(" Frequency: %d\n", obtainedSpec.freq);
printf(" Format: %d (%d bytes)\n", obtainedSpec.format, data.sampleSize_bytes);
printf(" Channels: %d\n", obtainedSpec.channels);
printf(" Samples: %d\n", obtainedSpec.samples);
if (obtainedSpec.channels != parameters.nChannels && parameters.nChannels != 0) {
printf("Warning: obtained number of channels (%d) does not match requested (%d)\n",
obtainedSpec.channels, parameters.nChannels);
}
SDL_PauseAudioDevice(data.deviceIdIn, 0);
parameters.nChannels = obtainedSpec.channels;
switch (parameters.filter) {
case EAudioFilter::None:
{
}
break;
case EAudioFilter::FirstOrderHighPass:
{
data.filterCoefficients = ::calculateCoefficientsFirstOrderHighPass(parameters.freqCutoff_Hz, parameters.sampleRate);
}
break;
case EAudioFilter::SecondOrderButterworthHighPass:
{
data.filterCoefficients = ::calculateCoefficientsSecondOrderButterworthHighPass(parameters.freqCutoff_Hz, parameters.sampleRate);
}
break;
};
data.parameters = parameters;
data.isReady = true;
// print filter paramters
printf(" Audio Filter: %d\n", parameters.filter);
printf(" Cutoff frequency: %g Hz\n", parameters.freqCutoff_Hz);
printf("Capturing audio ..\n");
return true;
}
bool AudioLogger::terminate() {
auto & data = getData();
SDL_PauseAudioDevice(data.deviceIdIn, 1);
SDL_CloseAudioDevice(data.deviceIdIn);
return true;
}
bool AudioLogger::addFrame(const Sample * stream) {
auto & data = getData();
if (data.isReady == false) return false;
if (SDL_GetQueuedAudioSize(data.deviceIdIn) > 32*sizeof(float)*kSamplesPerFrame) {
printf("Queue size: %d\n", SDL_GetQueuedAudioSize(data.deviceIdIn));
SDL_ClearQueuedAudio(data.deviceIdIn);
}
const float norm = 1.0/data.parameters.nChannels;
auto & curFrame = data.buffer[data.bufferId];
for (int i = 0; i < kSamplesPerFrame; ++i) {
Sample x = 0;
for (int j = 0; j < data.parameters.nChannels; ++j) {
x += stream[i*data.parameters.nChannels + j];
}
curFrame[i] = x*norm;
}
switch (data.parameters.filter) {
case EAudioFilter::None:
{
}
break;
case EAudioFilter::FirstOrderHighPass:
{
for (auto & s : curFrame) {
s = ::filterFirstOrderHighPass(data.filterCoefficients, s);
}
}
break;
case EAudioFilter::SecondOrderButterworthHighPass:
{
for (auto & s : curFrame) {
s = ::filterSecondOrderButterworthHighPass(data.filterCoefficients, s);
}
}
break;
}
std::lock_guard lock(data.mutex);
for (int r = 0; r < data.nRecords; ++r) {
auto & record = data.records[r];
auto & nFramesToRecord = data.nFramesToRecord[r];
if (nFramesToRecord > 0) {
record.push_back(curFrame);
if (--nFramesToRecord == 0) {
if (data.parameters.callback) data.parameters.callback(record);
record.clear();
for (int k = r + 1; k < data.nRecords; ++k) {
data.records[k - 1] = std::move(data.records[k]);
data.nFramesToRecord[k - 1] = data.nFramesToRecord[k];
}
--data.nRecords;
--r;
}
}
}
if (++data.bufferId >= (int) data.buffer.size()) {
data.bufferId = 0;
}
return true;
}
bool AudioLogger::record(float bufferSize_s, int32_t nPrevFrames) {
auto & data = getData();
if (isValidBufferSize(bufferSize_s) == false) {
return false;
}
auto bufferSize_frames = getBufferSize_frames(data.parameters.sampleRate, bufferSize_s);
if (nPrevFrames >= bufferSize_frames) {
fprintf(stderr, "warning : invalid previous frames in record requested - %d. max allowed is %d s\n", nPrevFrames, bufferSize_frames - 1);
return false;
}
std::lock_guard lock(data.mutex);
if (data.nRecords == kMaxRecords) {
fprintf(stderr, "warning : max number of simultaneous records %d reached\n", kMaxRecords);
return false;
}
auto & record = data.records[data.nRecords];
if (record.size() == 0) {
int fStart = data.bufferId - nPrevFrames;
if (fStart < 0) fStart += data.buffer.size();
for (int i = 0; i < nPrevFrames; ++i) {
record.push_back(data.buffer[(fStart + i)%data.buffer.size()]);
}
} else {
fprintf(stderr, "warning : new record requested before last has been processed. should never happen\n");
}
data.nFramesToRecord[data.nRecords] = bufferSize_frames - nPrevFrames;
++data.nRecords;
return true;
}
bool AudioLogger::pause() {
auto & data = getData();
SDL_PauseAudioDevice(data.deviceIdIn, 1);
data.nFramesToRecord.fill(0);
return true;
}
bool AudioLogger::resume() {
auto & data = getData();
SDL_PauseAudioDevice(data.deviceIdIn, 0);
return true;
}
bool AudioLogger::isValidBufferSize(float bufferSize_s) const {
if (bufferSize_s <= 0) {
fprintf(stderr, "error : invalid bufferSize_s = %g\n", bufferSize_s);
return false;
}
if (bufferSize_s > kMaxBufferSize_s) {
fprintf(stderr, "error : invalid record size requested - %g s. max allowed is %g s\n", bufferSize_s, kMaxBufferSize_s);
return false;
}
return true;
}
================================================
FILE: audio-logger.h
================================================
/*! \file audio-logger.h
* \brief Audio capture helper class
*
* Used by all kbd-audio tools.
* It provides a stream of captured audio via a provided callback.
*
* \author Georgi Gerganov
*/
#pragma once
#include "constants.h"
#include "common.h"
#include
#include
#include
#include
class AudioLogger {
public:
using Sample = TSampleF;
using Frame = std::array;
using Record = std::vector;
using Callback = std::function;
struct Parameters {
Callback callback;
int32_t captureId = -1;
int32_t nChannels = -1;
int64_t sampleRate = -1;
// Sample Type
// todo : support for other sample types
enum ESampleType {
F32SYS,
};
ESampleType sampleType = F32SYS;
// Audio Filter
EAudioFilter filter = FirstOrderHighPass;
float freqCutoff_Hz = 1000.0f;
};
AudioLogger();
~AudioLogger();
bool install(Parameters && parameters);
bool terminate();
bool addFrame(const Sample * stream);
bool record(float bufferSize_s, int32_t nPrevFrames);
bool pause();
bool resume();
bool isValidBufferSize(float bufferSize_s) const;
private:
struct Data;
std::unique_ptr data_;
Data & getData() { return *data_; }
const Data & getData() const { return *data_; }
};
================================================
FILE: build-vars.h.in
================================================
#pragma once
const char * kGIT_SHA1 = "@GIT_SHA1@";
const char * kGIT_DATE = "@GIT_DATE@";
const char * kGIT_COMMIT_SUBJECT = "@GIT_COMMIT_SUBJECT@";
================================================
FILE: cmake/BuildTypes.cmake
================================================
# Add new build types
# ReleaseGG - Release with enabled asserts
SET(CMAKE_CXX_FLAGS_RELEASEGG
"-O3"
CACHE STRING "Flags used by the c++ compiler during release builds with enabled asserts."
FORCE )
SET(CMAKE_C_FLAGS_RELEASEGG
"-O3"
CACHE STRING "Flags used by the compiler during release builds with enabled asserts."
FORCE )
SET(CMAKE_EXE_LINKER_FLAGS_RELEASEGG
""
CACHE STRING "Flags used for linking binaries during release builds with enabled asserts."
FORCE )
SET(CMAKE_SHARED_LINKER_FLAGS_RELEASEGG
""
CACHE STRING "Flags used by the shared libraries linker during release builds with enabled asserts."
FORCE )
MARK_AS_ADVANCED(
CMAKE_CXX_FLAGS_RELEASEGG
CMAKE_C_FLAGS_RELEASEGG
CMAKE_EXE_LINKER_FLAGS_RELEASEGG
CMAKE_SHARED_LINKER_FLAGS_RELEASEGG )
# RelWithDebInfoGG - RelWithDebInfo with enabled asserts
SET(CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
"-O2 -g"
CACHE STRING "Flags used by the c++ compiler during release builds with debug symbols and enabled asserts."
FORCE )
SET(CMAKE_C_FLAGS_RELWITHDEBINFOGG
"-O2 -g"
CACHE STRING "Flags used by the compiler during release builds with debug symbols and enabled asserts."
FORCE )
SET(CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
""
CACHE STRING "Flags used for linking binaries during release builds with debug symbols and enabled asserts."
FORCE )
SET(CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG
""
CACHE STRING "Flags used by the shared libraries linker during release builds with debug symbols and enabled asserts."
FORCE )
MARK_AS_ADVANCED(
CMAKE_CXX_FLAGS_RELWITHDEBINFOGG
CMAKE_C_FLAGS_RELWITHDEBINFOGG
CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFOGG
CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFOGG )
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE RelWithDebInfoGG CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "ReleaseGG" "RelWithDebInfoGG")
endif()
================================================
FILE: cmake/FindFFTW.cmake
================================================
# - Find FFTW
# Find the native FFTW includes and library
#
# FFTW_INCLUDE_DIRS - where to find fftw3.h
# FFTW_LIBRARIES - List of libraries when using FFTW.
# FFTWF_LIBRARIES - List of libraries when using FFTW single precision.
# FFTW_FOUND - True if FFTW found.
if (FFTW_INCLUDE_DIRS)
# Already in cache, be silent
set (FFTW_FIND_QUIETLY TRUE)
endif (FFTW_INCLUDE_DIRS)
find_path (FFTW_INCLUDE_DIRS fftw3.h)
#find_library (FFTW_LIBRARIES NAMES fftw3f)
find_library (FFTW_LIBRARIES NAMES fftw3)
find_library (FFTWF_LIBRARIES NAMES fftw3f)
set (FFTW_LIBRARIES ${FFTW_LIBRARIES} ${FFTWF_LIBRARIES})
# handle the QUIETLY and REQUIRED arguments and set FFTW_FOUND to TRUE if
# all listed variables are TRUE
include (FindPackageHandleStandardArgs)
find_package_handle_standard_args (FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDE_DIRS)
mark_as_advanced (FFTW_LIBRARIES FFTW_INCLUDE_DIRS)
================================================
FILE: cmake/GitVars.cmake
================================================
find_package(Git)
# the commit's SHA1
execute_process(COMMAND
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_SHA1
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# the date of the commit
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_DATE
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
# the subject of the commit
execute_process(COMMAND
"${GIT_EXECUTABLE}" log -1 --format=%s
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
================================================
FILE: cmake/sdl2/FindSDL2.cmake
================================================
# Distributed under the OSI-approved BSD 3-Clause License. See accompanying
# file Copyright.txt or https://cmake.org/licensing for details.
#.rst:
# FindSDL2
# -------
#
# Locate SDL2 library
#
# This module defines
#
# ::
#
# SDL2_LIBRARY, the name of the library to link against
# SDL2_FOUND, if false, do not try to link to SDL
# SDL2_INCLUDE_DIR, where to find SDL.h
# SDL2_VERSION_STRING, human-readable string containing the version of SDL
#
#
#
# This module responds to the flag:
#
# ::
#
# SDL2_BUILDING_LIBRARY
# If this is defined, then no SDL2_main will be linked in because
# only applications need main().
# Otherwise, it is assumed you are building an application and this
# module will attempt to locate and set the proper link flags
# as part of the returned SDL2_LIBRARY variable.
#
#
#
# Don't forget to include SDLmain.h and SDLmain.m your project for the
# OS X framework based version. (Other versions link to -lSDLmain which
# this module will try to find on your behalf.) Also for OS X, this
# module will automatically add the -framework Cocoa on your behalf.
#
#
#
# Additional Note: If you see an empty SDL2_LIBRARY_TEMP in your
# configuration and no SDL2_LIBRARY, it means CMake did not find your SDL
# library (SDL.dll, libsdl.so, SDL.framework, etc). Set
# SDL2_LIBRARY_TEMP to point to your SDL library, and configure again.
# Similarly, if you see an empty SDLMAIN_LIBRARY, you should set this
# value as appropriate. These values are used to generate the final
# SDL2_LIBRARY variable, but when these values are unset, SDL2_LIBRARY
# does not get created.
#
#
#
# $SDLDIR is an environment variable that would correspond to the
# ./configure --prefix=$SDLDIR used in building SDL. l.e.galup 9-20-02
#
# Modified by Eric Wing. Added code to assist with automated building
# by using environmental variables and providing a more
# controlled/consistent search behavior. Added new modifications to
# recognize OS X frameworks and additional Unix paths (FreeBSD, etc).
# Also corrected the header search path to follow "proper" SDL
# guidelines. Added a search for SDLmain which is needed by some
# platforms. Added a search for threads which is needed by some
# platforms. Added needed compile switches for MinGW.
#
# On OSX, this will prefer the Framework version (if found) over others.
# People will have to manually change the cache values of SDL2_LIBRARY to
# override this selection or set the CMake environment
# CMAKE_INCLUDE_PATH to modify the search paths.
#
# Note that the header path has changed from SDL/SDL.h to just SDL.h
# This needed to change because "proper" SDL convention is #include
# "SDL.h", not . This is done for portability reasons
# because not all systems place things in SDL/ (see FreeBSD).
if(NOT SDL2_DIR)
set(SDL2_DIR "" CACHE PATH "SDL2 directory")
endif()
find_path(SDL2_INCLUDE_DIR SDL_scancode.h
HINTS
ENV SDLDIR
${SDL2_DIR}
PATH_SUFFIXES SDL2
# path suffixes to search inside ENV{SDLDIR}
include/SDL2 include
)
if(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(VC_LIB_PATH_SUFFIX lib/x64)
else()
set(VC_LIB_PATH_SUFFIX lib/x86)
endif()
# SDL-1.1 is the name used by FreeBSD ports...
# don't confuse it for the version number.
find_library(SDL2_LIBRARY_TEMP
NAMES SDL2
HINTS
ENV SDLDIR
${SDL2_DIR}
PATH_SUFFIXES lib ${VC_LIB_PATH_SUFFIX}
)
# Hide this cache variable from the user, it's an internal implementation
# detail. The documented library variable for the user is SDL2_LIBRARY
# which is derived from SDL2_LIBRARY_TEMP further below.
set_property(CACHE SDL2_LIBRARY_TEMP PROPERTY TYPE INTERNAL)
if(NOT SDL2_BUILDING_LIBRARY)
if(NOT SDL2_INCLUDE_DIR MATCHES ".framework")
# Non-OS X framework versions expect you to also dynamically link to
# SDLmain. This is mainly for Windows and OS X. Other (Unix) platforms
# seem to provide SDLmain for compatibility even though they don't
# necessarily need it.
find_library(SDL2MAIN_LIBRARY
NAMES SDL2main
HINTS
ENV SDLDIR
${SDL2_DIR}
PATH_SUFFIXES lib ${VC_LIB_PATH_SUFFIX}
PATHS
/sw
/opt/local
/opt/csw
/opt
)
endif()
endif()
# SDL may require threads on your system.
# The Apple build may not need an explicit flag because one of the
# frameworks may already provide it.
# But for non-OSX systems, I will use the CMake Threads package.
if(NOT APPLE)
find_package(Threads)
endif()
# MinGW needs an additional link flag, -mwindows
# It's total link flags should look like -lmingw32 -lSDLmain -lSDL -mwindows
if(MINGW)
set(MINGW32_LIBRARY mingw32 "-mwindows" CACHE STRING "link flags for MinGW")
endif()
if(SDL2_LIBRARY_TEMP)
# For SDLmain
if(SDL2MAIN_LIBRARY AND NOT SDL2_BUILDING_LIBRARY)
list(FIND SDL2_LIBRARY_TEMP "${SDL2MAIN_LIBRARY}" _SDL2_MAIN_INDEX)
if(_SDL2_MAIN_INDEX EQUAL -1)
set(SDL2_LIBRARY_TEMP "${SDL2MAIN_LIBRARY}" ${SDL2_LIBRARY_TEMP})
endif()
unset(_SDL2_MAIN_INDEX)
endif()
# For OS X, SDL uses Cocoa as a backend so it must link to Cocoa.
# CMake doesn't display the -framework Cocoa string in the UI even
# though it actually is there if I modify a pre-used variable.
# I think it has something to do with the CACHE STRING.
# So I use a temporary variable until the end so I can set the
# "real" variable in one-shot.
if(APPLE)
set(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} "-framework Cocoa")
endif()
# For threads, as mentioned Apple doesn't need this.
# In fact, there seems to be a problem if I used the Threads package
# and try using this line, so I'm just skipping it entirely for OS X.
if(NOT APPLE)
set(SDL2_LIBRARY_TEMP ${SDL2_LIBRARY_TEMP} ${CMAKE_THREAD_LIBS_INIT})
endif()
# For MinGW library
if(MINGW)
set(SDL2_LIBRARY_TEMP ${MINGW32_LIBRARY} ${SDL2_LIBRARY_TEMP})
endif()
# Set the final string here so the GUI reflects the final state.
set(SDL2_LIBRARY ${SDL2_LIBRARY_TEMP} CACHE STRING "Where the SDL Library can be found")
endif()
if(SDL2_INCLUDE_DIR AND EXISTS "${SDL2_INCLUDE_DIR}/SDL2_version.h")
file(STRINGS "${SDL2_INCLUDE_DIR}/SDL2_version.h" SDL2_VERSION_MAJOR_LINE REGEX "^#define[ \t]+SDL2_MAJOR_VERSION[ \t]+[0-9]+$")
file(STRINGS "${SDL2_INCLUDE_DIR}/SDL2_version.h" SDL2_VERSION_MINOR_LINE REGEX "^#define[ \t]+SDL2_MINOR_VERSION[ \t]+[0-9]+$")
file(STRINGS "${SDL2_INCLUDE_DIR}/SDL2_version.h" SDL2_VERSION_PATCH_LINE REGEX "^#define[ \t]+SDL2_PATCHLEVEL[ \t]+[0-9]+$")
string(REGEX REPLACE "^#define[ \t]+SDL2_MAJOR_VERSION[ \t]+([0-9]+)$" "\\1" SDL2_VERSION_MAJOR "${SDL2_VERSION_MAJOR_LINE}")
string(REGEX REPLACE "^#define[ \t]+SDL2_MINOR_VERSION[ \t]+([0-9]+)$" "\\1" SDL2_VERSION_MINOR "${SDL2_VERSION_MINOR_LINE}")
string(REGEX REPLACE "^#define[ \t]+SDL2_PATCHLEVEL[ \t]+([0-9]+)$" "\\1" SDL2_VERSION_PATCH "${SDL2_VERSION_PATCH_LINE}")
set(SDL2_VERSION_STRING ${SDL2_VERSION_MAJOR}.${SDL2_VERSION_MINOR}.${SDL2_VERSION_PATCH})
unset(SDL2_VERSION_MAJOR_LINE)
unset(SDL2_VERSION_MINOR_LINE)
unset(SDL2_VERSION_PATCH_LINE)
unset(SDL2_VERSION_MAJOR)
unset(SDL2_VERSION_MINOR)
unset(SDL2_VERSION_PATCH)
endif()
set(SDL2_LIBRARIES ${SDL2_LIBRARY})
set(SDL2_INCLUDE_DIRS ${SDL2_INCLUDE_DIR})
FIND_PACKAGE_HANDLE_STANDARD_ARGS(SDL
REQUIRED_VARS SDL2_LIBRARIES SDL2_INCLUDE_DIRS
VERSION_VAR SDL2_VERSION_STRING)
mark_as_advanced(SDL2_LIBRARY SDL2_INCLUDE_DIR)
================================================
FILE: common-gui.cpp
================================================
/*! \file common-gui.cpp
* \brief Enter description here.
*/
#include "common-gui.h"
#ifdef __EMSCRIPTEN__
#include "emscripten.h"
#define IMGUI_IMPL_OPENGL_LOADER_GLEW
#endif
#include "imgui.h"
#include "imgui_impl_sdl.h"
#include "imgui_impl_opengl3.h"
#if defined(IMGUI_IMPL_OPENGL_LOADER_GL3W)
#include // Initialize with gl3wInit()
#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLEW)
#include // Initialize with glewInit()
#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLAD)
#include // Initialize with gladLoadGL()
#else
#include IMGUI_IMPL_OPENGL_LOADER_CUSTOM
#endif
namespace Gui {
bool init(const char * windowTitle, int windowSizeX, int windowSizeY, Objects & objects) {
#if __APPLE__
// GL 3.2 Core + GLSL 150
const char* glsl_version = "#version 150";
SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_FORWARD_COMPATIBLE_FLAG);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 2);
#elif __EMSCRIPTEN__
const char* glsl_version = "#version 100";
//const char* glsl_version = "#version 300 es";
SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, 0);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_ES);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 2);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 0);
#else
// GL 3.0 + GLSL 130
const char* glsl_version = "#version 130";
SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, 0);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 3);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 0);
#endif
// Create window with graphics context
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 24);
SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 8);
SDL_DisplayMode current;
SDL_GetCurrentDisplayMode(0, ¤t);
#ifdef __EMSCRIPTEN__
SDL_Renderer *renderer;
SDL_CreateWindowAndRenderer(windowSizeX, windowSizeY, SDL_WINDOW_OPENGL, &objects.window, &renderer);
#else
objects.window = SDL_CreateWindow(windowTitle, SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED, windowSizeX, windowSizeY, SDL_WINDOW_OPENGL|SDL_WINDOW_RESIZABLE|SDL_WINDOW_ALLOW_HIGHDPI);
#endif
objects.gl_context = SDL_GL_CreateContext(objects.window);
SDL_GL_MakeCurrent(objects.window, objects.gl_context);
SDL_GL_SetSwapInterval(1); // Enable vsync
// Initialize OpenGL loader
#if defined(IMGUI_IMPL_OPENGL_LOADER_GL3W)
bool err = gl3wInit() != 0;
#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLEW)
bool err = glewInit() != GLEW_OK;
#elif defined(IMGUI_IMPL_OPENGL_LOADER_GLAD)
bool err = gladLoadGL() == 0;
#else
bool err = false; // If you use IMGUI_IMPL_OPENGL_LOADER_CUSTOM, your loader is likely to requires some form of initialization.
#endif
if (err) {
fprintf(stderr, "Failed to initialize OpenGL loader!\n");
return false;
}
// Setup Dear ImGui binding
IMGUI_CHECKVERSION();
ImGui::CreateContext();
ImGuiIO& io = ImGui::GetIO(); (void)io;
//io.ConfigFlags |= ImGuiConfigFlags_NavEnableKeyboard; // Enable Keyboard Controls
ImGuiStyle & style = ImGui::GetStyle();
style.AntiAliasedFill = false;
style.AntiAliasedLines = false;
style.WindowRounding = 0.0f;
style.WindowPadding = ImVec2(8, 8);
style.WindowRounding = 0.0f;
style.FramePadding = ImVec2(4, 3);
style.FrameRounding = 0.0f;
style.ItemSpacing = ImVec2(8, 4);
style.ItemInnerSpacing = ImVec2(4, 4);
style.IndentSpacing = 21.0f;
style.ScrollbarSize = 16.0f;
style.ScrollbarRounding = 9.0f;
style.GrabMinSize = 10.0f;
style.GrabRounding = 3.0f;
style.Colors[ImGuiCol_Text] = ImVec4(1.00f, 1.00f, 1.00f, 1.00f);
style.Colors[ImGuiCol_TextDisabled] = ImVec4(0.24f, 0.41f, 0.41f, 1.00f);
style.Colors[ImGuiCol_WindowBg] = ImVec4(0.08f, 0.08f, 0.08f, 0.94f);
//style.Colors[ImGuiCol_ChildWindowBg] = ImVec4(0.07f, 0.07f, 0.09f, 1.00f);
style.Colors[ImGuiCol_PopupBg] = ImVec4(0.07f, 0.07f, 0.09f, 1.00f);
style.Colors[ImGuiCol_Border] = ImVec4(0.31f, 0.31f, 0.31f, 0.71f);
style.Colors[ImGuiCol_BorderShadow] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f);
style.Colors[ImGuiCol_FrameBg] = ImVec4(0.00f, 0.39f, 0.39f, 0.39f);
style.Colors[ImGuiCol_FrameBgHovered] = ImVec4(0.26f, 1.00f, 1.00f, 0.39f);
style.Colors[ImGuiCol_FrameBgActive] = ImVec4(0.00f, 0.78f, 0.00f, 1.00f);
style.Colors[ImGuiCol_TitleBg] = ImVec4(0.00f, 0.50f, 0.50f, 0.70f);
style.Colors[ImGuiCol_TitleBgCollapsed] = ImVec4(0.00f, 0.50f, 0.50f, 1.00f);
style.Colors[ImGuiCol_TitleBgActive] = ImVec4(0.00f, 0.70f, 0.70f, 1.00f);
style.Colors[ImGuiCol_MenuBarBg] = ImVec4(0.00f, 0.70f, 0.70f, 1.00f);
style.Colors[ImGuiCol_ScrollbarBg] = ImVec4(0.10f, 0.27f, 0.27f, 1.00f);
style.Colors[ImGuiCol_ScrollbarGrab] = ImVec4(0.80f, 0.80f, 0.83f, 0.31f);
style.Colors[ImGuiCol_ScrollbarGrabHovered] = ImVec4(0.26f, 1.00f, 1.00f, 0.39f);
style.Colors[ImGuiCol_ScrollbarGrabActive] = ImVec4(0.00f, 0.78f, 0.00f, 1.00f);
//style.Colors[ImGuiCol_ComboBg] = ImVec4(0.00f, 0.39f, 0.39f, 1.00f);
style.Colors[ImGuiCol_CheckMark] = ImVec4(0.80f, 0.80f, 0.83f, 0.39f);
style.Colors[ImGuiCol_SliderGrab] = ImVec4(0.80f, 0.80f, 0.83f, 0.39f);
style.Colors[ImGuiCol_SliderGrabActive] = ImVec4(0.00f, 0.78f, 0.00f, 1.00f);
style.Colors[ImGuiCol_Button] = ImVec4(0.13f, 0.55f, 0.55f, 1.00f);
style.Colors[ImGuiCol_ButtonHovered] = ImVec4(0.61f, 1.00f, 0.00f, 0.51f);
style.Colors[ImGuiCol_ButtonActive] = ImVec4(0.00f, 0.78f, 0.00f, 1.00f);
style.Colors[ImGuiCol_Header] = ImVec4(0.79f, 0.51f, 0.00f, 0.51f);
style.Colors[ImGuiCol_HeaderHovered] = ImVec4(0.79f, 0.51f, 0.00f, 0.67f);
style.Colors[ImGuiCol_HeaderActive] = ImVec4(0.79f, 0.51f, 0.00f, 0.67f);
//style.Colors[ImGuiCol_Column] = ImVec4(0.79f, 0.51f, 0.00f, 0.67f);
//style.Colors[ImGuiCol_ColumnHovered] = ImVec4(0.25f, 1.00f, 0.00f, 1.00f);
//style.Colors[ImGuiCol_ColumnActive] = ImVec4(0.79f, 0.51f, 0.00f, 0.67f);
style.Colors[ImGuiCol_ResizeGrip] = ImVec4(0.00f, 0.00f, 0.00f, 0.00f);
style.Colors[ImGuiCol_ResizeGripHovered] = ImVec4(0.26f, 1.00f, 1.00f, 0.39f);
style.Colors[ImGuiCol_ResizeGripActive] = ImVec4(0.00f, 0.78f, 0.00f, 1.00f);
//style.Colors[ImGuiCol_CloseButton] = ImVec4(0.40f, 0.39f, 0.38f, 0.16f);
//style.Colors[ImGuiCol_CloseButtonHovered] = ImVec4(0.26f, 1.00f, 1.00f, 0.39f);
//style.Colors[ImGuiCol_CloseButtonActive] = ImVec4(0.79f, 0.51f, 0.00f, 0.67f);
style.Colors[ImGuiCol_PlotLines] = ImVec4(1.00f, 0.65f, 0.38f, 0.67f);
style.Colors[ImGuiCol_PlotLinesHovered] = ImVec4(0.25f, 1.00f, 0.00f, 1.00f);
style.Colors[ImGuiCol_PlotHistogram] = ImVec4(1.00f, 0.65f, 0.38f, 0.67f);
style.Colors[ImGuiCol_PlotHistogramHovered] = ImVec4(0.25f, 1.00f, 0.00f, 1.00f);
style.Colors[ImGuiCol_TextSelectedBg] = ImVec4(0.25f, 1.00f, 0.00f, 0.43f);
style.Colors[ImGuiCol_ModalWindowDarkening] = ImVec4(1.00f, 0.98f, 0.95f, 0.78f);
ImGui_ImplSDL2_InitForOpenGL(objects.window, objects.gl_context);
ImGui_ImplOpenGL3_Init(glsl_version);
return true;
}
bool render(const Objects & objects) {
ImGui::Render();
SDL_GL_MakeCurrent(objects.window, objects.gl_context);
glViewport(0, 0, (int) ImGui::GetIO().DisplaySize.x, (int) ImGui::GetIO().DisplaySize.y);
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
glClear(GL_COLOR_BUFFER_BIT);
ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
SDL_GL_SwapWindow(objects.window);
return true;
}
bool free(const Objects & objects) {
ImGui_ImplOpenGL3_Shutdown();
ImGui_ImplSDL2_Shutdown();
ImGui::DestroyContext();
SDL_GL_DeleteContext(objects.gl_context);
SDL_DestroyWindow(objects.window);
SDL_Quit();
return true;
}
}
================================================
FILE: common-gui.h
================================================
/*! \file common-gui.h
* \brief Enter description here.
*/
#pragma once
#include "common.h"
#include
namespace Gui {
struct Objects {
SDL_Window * window = nullptr;
SDL_GLContext gl_context = nullptr;
};
bool init(const char * windowTitle, int windowSizeX, int windowSizeY, Objects & objects);
bool render(const Objects & objects);
bool free(const Objects & objects);
}
================================================
FILE: common.cpp
================================================
/*! \file common.cpp
* \brief Enter description here.
*/
#include "common.h"
#include "constants.h"
#include
#include
#include
#include
#include
#include
#include
#ifndef pi
#define pi 3.1415926535897932384626433832795
#endif
#ifndef sqrt2
#define sqrt2 (2.0 * 0.707106781186547524401)
#endif
#ifndef sqrt2over2
#define sqrt2over2 0.707106781186547524401
#endif
namespace {
template
bool readWaveform(std::ifstream & fin, TWaveformT & res, int32_t offset, std::streamsize size) {
if (std::is_same::value) {
std::vector buf(size/sizeof(TSampleInput));
res.resize(offset + size/sizeof(TSampleInput));
fin.read((char *)(buf.data()), size);
double amax = calcAbsMax(buf);
double iamax = amax != 0.0 ? 1.0/amax : 1.0;
for (auto i = 0; i < (int) buf.size(); ++i) res[offset + i] = std::round(std::numeric_limits::max()*(buf[i]*iamax));
} else if (std::is_same::value) {
res.resize(offset + size/sizeof(TSample));
fin.read((char *)(res.data() + offset), size);
} else {
return false;
}
return true;
}
}
constexpr float iRAND_MAX = 1.0f/float(RAND_MAX);
float frand() { return ((float)rand())*iRAND_MAX; }
float frandGaussian(float mu, float sigma) {
static const float two_pi = 2.0*3.14159265358979323846;
thread_local float z1;
thread_local bool generate;
generate = !generate;
if (!generate)
return z1 * sigma + mu;
float u1 = frand();
float u2 = frand();
float t = sqrt(-2.0f * log(1.0f - u1));
float z0 = t*cos(two_pi*u2);
z1 = t*sin(two_pi*u2);
return z0 * sigma + mu;
}
uint64_t t_ms() {
return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); // duh ..
}
std::map parseCmdArguments(int argc, char ** argv) {
int last = argc;
std::map res;
for (int i = 1; i < last; ++i) {
if (argv[i][0] == '-') {
if (strlen(argv[i]) > 1) {
res[std::string(1, argv[i][1])] = strlen(argv[i]) > 2 ? argv[i] + 2 : "";
}
}
}
return res;
}
template
bool convert(const TWaveformT & src, TWaveformT & dst) {
static_assert(std::is_same::value == false, "Required different sample types");
static_assert(std::is_same::value, "Source sample type not supported");
static_assert(std::is_same::value, "Destination sample type not supported");
dst.resize(src.size());
double amax = calcAbsMax(src);
double iamax = amax != 0.0 ? 1.0/amax : 1.0;
for (auto i = 0; i < (int) src.size(); ++i) dst[i] = std::round(std::numeric_limits::max()*(src[i]*iamax));
return true;
}
template bool convert(const TWaveformT & src, TWaveformT & dst);
template
void filter(TWaveformT & waveform, EAudioFilter filterId, float freqCutoff_Hz, int64_t sampleRate) {
switch (filterId) {
case EAudioFilter::None:
{
return;
}
break;
case EAudioFilter::FirstOrderHighPass:
{
auto filterCoefficients = ::calculateCoefficientsFirstOrderHighPass(freqCutoff_Hz, sampleRate);
for (auto & s : waveform) {
s = ::filterFirstOrderHighPass(filterCoefficients, s);
}
return;
}
break;
case EAudioFilter::SecondOrderButterworthHighPass:
{
auto filterCoefficients = ::calculateCoefficientsSecondOrderButterworthHighPass(freqCutoff_Hz, sampleRate);
for (auto & s : waveform) {
s = ::filterSecondOrderButterworthHighPass(filterCoefficients, s);
}
return;
}
break;
}
fprintf(stderr, "Unknown filter type: %d\n", filterId);
}
template void filter(TWaveformT & waveform, EAudioFilter filterId, float freqCutoff_Hz, int64_t sampleRate);
template
double calcAbsMax(const TWaveformT & waveform) {
double amax = 0.0f;
for (auto i = 0; i < (int) waveform.size(); ++i) if (std::abs(waveform[i]) > amax) amax = std::abs(waveform[i]);
return amax;
}
template double calcAbsMax(const TWaveformT & waveform);
template
bool saveToFile(const std::string & fname, TWaveformT & waveform) {
static_assert(std::is_same::value, "Sample type not supported");
std::ofstream fout(fname, std::ios::binary);
if (fout.good() == false) {
return false;
}
auto totalSize_bytes = sizeof(TSample)*waveform.size();
fout.write((char *)(waveform.data()), totalSize_bytes);
fout.close();
printf("Total data saved: %g MB\n", ((float)(totalSize_bytes)/1024.0f/1024.0f));
return true;
}
template bool saveToFile(const std::string & fname, TWaveformT & waveform);
template
bool readFromFile(const std::string & fname, TWaveformT & res) {
std::ifstream fin(fname, std::ios::binary | std::ios::ate);
if (fin.good() == false) {
return false;
}
{
std::streamsize size = fin.tellg();
fin.seekg(0, std::ios::beg);
static_assert(std::is_same::value, "TSampleInput not supported");
static_assert(std::is_same::value ||
std::is_same::value, "TSample not supported");
if (readWaveform(fin, res, 0, size) == false) {
return false;
}
}
fin.close();
return true;
}
template bool readFromFile(const std::string & fname, TWaveformT & res);
template bool readFromFile(const std::string & fname, TWaveformT & res);
template
bool readFromFile(const std::string & fname, TWaveformT & res, TTrainKeys & trainKeys, int32_t & bufferSize_frames) {
trainKeys.clear();
std::ifstream fin(fname, std::ios::binary);
if (fin.good() == false) {
return false;
}
bufferSize_frames = 1;
fin.read((char *)(&bufferSize_frames), sizeof(bufferSize_frames));
{
static_assert(std::is_same::value, "TSampleInput not supported");
static_assert(std::is_same::value ||
std::is_same::value, "TSample not supported");
int32_t offset = 0;
std::streamsize size = bufferSize_frames*kSamplesPerFrame*sizeof(TSampleInput);
while (true) {
TKey keyPressed = 0;
fin.read((char *)(&keyPressed), sizeof(keyPressed));
if (fin.eof()) break;
trainKeys.push_back(keyPressed);
if (readWaveform(fin, res, offset, size) == false) {
return false;
}
offset += size/sizeof(TSampleInput);
if (fin.eof()) break;
}
}
fin.close();
return true;
}
template bool readFromFile(const std::string & fname, TWaveformT & res, TTrainKeys & trainKeys, int32_t & bufferSize_frames);
//
// filters
//
TFilterCoefficients calculateCoefficientsFirstOrderHighPass(int fc, int fs) {
TFilterCoefficients res;
float th = 2.0 * pi * fc / fs;
float g = cos(th) / (1.0 + sin(th));
res.a0 = (1.0 + g) / 2.0;
res.a1 = -((1.0 + g) / 2.0);
res.a2 = 0.0;
res.b1 = -g;
res.b2 = 0.0;
return res;
}
TFilterCoefficients calculateCoefficientsSecondOrderButterworthHighPass(int fc, int fs) {
TFilterCoefficients res;
float c = tan(pi*fc / fs);
res.a0 = 1.0 / (1.0 + sqrt2*c + pow(c, 2.0));
res.a1 = -2.0 * res.a0;
res.a2 = res.a0;
res.b1 = 2.0 * res.a0*(pow(c, 2.0) - 1.0);
res.b2 = res.a0 * (1.0 - sqrt2*c + pow(c, 2.0));
return res;
}
TSampleF filterFirstOrderHighPass(TFilterCoefficients & coefficients, TSampleF sample) {
TSampleF xn = sample;
TSampleF yn =
coefficients.a0*xn + coefficients.a1*coefficients.xnz1 + coefficients.a2*coefficients.xnz2 -
coefficients.b1*coefficients.ynz1 - coefficients.b2*coefficients.ynz2;
coefficients.xnz2 = coefficients.xnz1;
coefficients.xnz1 = xn;
coefficients.ynz2 = coefficients.ynz1;
coefficients.ynz1 = yn;
return yn;
}
TSampleF filterSecondOrderButterworthHighPass(TFilterCoefficients & coefficients, TSampleF sample) {
TSampleF xn = sample;
TSampleF yn =
coefficients.a0*xn + coefficients.a1*coefficients.xnz1 + coefficients.a2*coefficients.xnz2 -
coefficients.b1*coefficients.ynz1 - coefficients.b2*coefficients.ynz2;
coefficients.xnz2 = coefficients.xnz1;
coefficients.xnz1 = xn;
coefficients.ynz2 = coefficients.ynz1;
coefficients.ynz1 = yn;
return yn;
}
//
// calcCC
//
std::tuple calcSum(const TKeyWaveformF & waveform, int is0, int is1) {
double sum = 0.0f;
double sum2 = 0.0f;
for (int is = is0; is < is1; ++is) {
auto a0 = waveform[is];
sum += a0;
sum2 += a0*a0;
}
return std::tuple(sum, sum2);
}
template<>
std::tuple calcSum(const TWaveformViewT & waveform) {
int64_t sum = 0;
int64_t sum2 = 0;
auto samples = waveform.samples;
auto n = waveform.n;
for (int is = 0; is < n; ++is) {
for (int j = 0; j < TSampleMI16::N; j++) {
int32_t a0 = samples[is][j];
sum += a0;
sum2 += a0*a0;
}
}
return std::tuple(sum, sum2);
}
template
std::tuple calcSum(const TWaveformViewT & waveform) {
int64_t sum = 0;
int64_t sum2 = 0;
auto samples = waveform.samples;
auto n = waveform.n;
for (int is = 0; is < n; ++is) {
int32_t a0 = samples[is];
sum += a0;
sum2 += a0*a0;
}
return std::tuple(sum, sum2);
}
// calcSum : specializations
template std::tuple calcSum(const TWaveformViewT & waveform);
//
// calcCC
//
TValueCC calcCC(
const TKeyWaveformF & waveform0,
const TKeyWaveformF & waveform1,
double sum0, double sum02,
int is00, int is0, int is1) {
TValueCC cc = -1.0f;
double sum1 = 0.0f;
double sum12 = 0.0f;
double sum01 = 0.0f;
for (int is = 0; is < is1 - is0; ++is) {
auto a0 = waveform0[is00 + is];
auto a1 = waveform1[is0 + is];
#ifdef MY_DEBUG
if (is00 + is < 0 || is00 + is >= waveform0.size()) printf("BUG 0\n");
if (is0 + is < 0 || is0 + is >= waveform1.size()) {
printf("BUG 1\n");
printf("%d %d %d\n", is0, is, (int) waveform1.size());
}
#endif
sum1 += a1;
sum12 += a1*a1;
sum01 += a0*a1;
}
int ncc = (is1 - is0);
{
double nom = sum01*ncc - sum0*sum1;
double den2a = sum02*ncc - sum0*sum0;
double den2b = sum12*ncc - sum1*sum1;
cc = (nom)/(sqrt(den2a*den2b));
}
return cc;
}
template<>
TValueCC calcCC(
const TWaveformViewT & waveform0,
const TWaveformViewT & waveform1,
int64_t sum0, int64_t sum02) {
TValueCC cc = -1.0f;
int64_t sum1 = 0;
int64_t sum12 = 0;
int64_t sum01 = 0;
auto samples0 = waveform0.samples;
auto n0 = waveform0.n;
auto samples1 = waveform1.samples;
auto n1 = waveform1.n;
#ifdef MY_DEBUG
if (n0 != n1) {
printf("BUG 234f8273\n");
}
#endif
auto n = std::min(n0, n1);
for (int64_t is = 0; is < n; ++is) {
for (int j = 0; j < TSampleMI16::N; j++) {
int32_t a0 = samples0[is][j];
int32_t a1 = samples1[is][j];
sum1 += a1;
sum12 += a1*a1;
sum01 += a0*a1;
}
}
n *= TSampleMI16::N;
{
double nom = sum01*n - sum0*sum1;
double den2a = sum02*n - sum0*sum0;
double den2b = sum12*n - sum1*sum1;
cc = (nom)/(sqrt(den2a*den2b));
}
return cc;
}
template
TValueCC calcCC(
const TWaveformViewT & waveform0,
const TWaveformViewT & waveform1,
int64_t sum0, int64_t sum02) {
TValueCC cc = -1.0f;
int64_t sum1 = 0;
int64_t sum12 = 0;
int64_t sum01 = 0;
auto samples0 = waveform0.samples;
auto n0 = waveform0.n;
auto samples1 = waveform1.samples;
auto n1 = waveform1.n;
#ifdef MY_DEBUG
if (n0 != n1) {
printf("BUG 234f8273\n");
}
#endif
auto n = std::min(n0, n1);
for (int64_t is = 0; is < n; ++is) {
int32_t a0 = samples0[is];
int32_t a1 = samples1[is];
sum1 += a1;
sum12 += a1*a1;
sum01 += a0*a1;
}
{
double nom = sum01*n - sum0*sum1;
double den2a = sum02*n - sum0*sum0;
double den2b = sum12*n - sum1*sum1;
cc = (nom)/(sqrt(den2a*den2b));
}
return cc;
}
// calcCC : specializations
template TValueCC calcCC(
const TWaveformViewT & waveform0,
const TWaveformViewT & waveform1,
int64_t sum0, int64_t sum02);
//
// findBestCC
//
std::tuple findBestCC(
const TKeyWaveformF & waveform0,
const TKeyWaveformF & waveform1,
int is0, int is1,
int alignWindow) {
TOffset besto = -1;
TValueCC bestcc = -1.0f;
int is00 = waveform0.size()/2 - (is1 - is0)/2;
auto ret = calcSum(waveform0, is00, is00 + is1 - is0);
auto sum0 = std::get<0>(ret);
auto sum02 = std::get<1>(ret);
#ifdef __EMSCRIPTEN__
int nWorkers = std::min(4, std::max(1, int(std::thread::hardware_concurrency()) - 2));
#else
int nWorkers = std::min(4u, std::thread::hardware_concurrency());
std::mutex mutex;
std::vector workers(nWorkers);
for (int i = 0; i < (int) workers.size(); ++i) {
auto & worker = workers[i];
worker = std::thread([&, sum0 = sum0, sum02 = sum02, i]() {
TOffset cbesto = -1;
TValueCC cbestcc = -1.0f;
for (int o = -alignWindow + i; o <= alignWindow; o += nWorkers) {
auto cc = calcCC(waveform0, waveform1, sum0, sum02, is00, is0 + o, is1 + o);
if (cc > cbestcc) {
cbesto = o;
cbestcc = cc;
}
}
{
std::lock_guard lock(mutex);
if (cbestcc > bestcc) {
bestcc = cbestcc;
besto = cbesto;
}
}
});
}
for (auto & worker : workers) worker.join();
#endif
return std::tuple(bestcc, besto);
}
template
std::tuple findBestCC(
const TWaveformViewT & waveform0,
const TWaveformViewT & waveform1,
int64_t alignWindow) {
TValueCC bestcc = -1.0;
TOffset besto = -1;
//auto samples0 = waveform0.samples;
auto n0 = waveform0.n;
auto samples1 = waveform1.samples;
#ifdef MY_DEBUG
auto n1 = waveform1.n;
if (n0 + 2*alignWindow != n1) {
printf("BUG 924830jm92, n0 = %d, a = %d\n", (int) n0, (int) alignWindow);
}
#endif
auto ret = calcSum(waveform0);
auto sum0 = std::get<0>(ret);
auto sum02 = std::get<1>(ret);
for (int o = 0; o <= 2*alignWindow; ++o) {
auto cc = calcCC(waveform0, { samples1 + o, n0 }, sum0, sum02);
if (cc > bestcc) {
besto = o - alignWindow;
bestcc = cc;
}
}
return std::tuple(bestcc, besto);
}
// findBestCC : specializations
template std::tuple findBestCC(
const TWaveformViewT & waveform0,
const TWaveformViewT & waveform1,
int64_t alignWindow);
//
// calculateSimilarityMap
//
template<>
bool calculateSimilartyMap(
const int32_t keyPressWidth_samples,
const int32_t alignWindow_samples,
const int32_t offsetFromPeak_samples,
TKeyPressCollectionT & keyPresses,
TSimilarityMap & res) {
int nPresses = keyPresses.size();
int w = keyPressWidth_samples;
int a = alignWindow_samples;
res.clear();
res.resize(nPresses);
for (auto & x : res) x.resize(nPresses);
#ifdef __EMSCRIPTEN__
int nWorkers = std::min(kMaxThreads, std::max(1, int(std::thread::hardware_concurrency()) - 2));
#else
int nWorkers = std::thread::hardware_concurrency();
#endif
std::vector workers(nWorkers);
for (int iw = 0; iw < (int) workers.size(); ++iw) {
auto & worker = workers[iw];
worker = std::thread([&](int ith) {
for (int i = ith; i < nPresses; i += nWorkers) {
res[i][i].cc = 1.0f;
res[i][i].offset = 0;
const auto & waveform0 = keyPresses[i].waveform;
const auto & pos0 = keyPresses[i].pos;
auto & avgcc = keyPresses[i].ccAvg;
const auto samples0 = waveform0.samples;
for (int j = i + 1; j < nPresses; ++j) {
if (i == j) continue;
const auto waveform1 = keyPresses[j].waveform;
const auto pos1 = keyPresses[j].pos;
const auto samples1 = waveform1.samples;
const auto ret = findBestCC(TWaveformViewT { samples0 + pos0 + offsetFromPeak_samples - w, 2*w },
TWaveformViewT { samples1 + pos1 + offsetFromPeak_samples - w - a, 2*w + 2*a }, a);
const auto bestcc = std::get<0>(ret);
const auto bestoffset = std::get<1>(ret);
res[i][j].cc = bestcc;
res[i][j].offset = bestoffset;
res[j][i].cc = bestcc;
res[j][i].offset = -bestoffset;
avgcc += bestcc;
}
avgcc /= (nPresses - 1);
}
}, iw);
}
for (auto & worker : workers) worker.join();
return true;
}
template
bool calculateSimilartyMap(
const int32_t keyPressWidth_samples,
const int32_t alignWindow_samples,
const int32_t offsetFromPeak_samples,
TKeyPressCollectionT & keyPresses,
TSimilarityMap & res) {
int nPresses = keyPresses.size();
int w = keyPressWidth_samples;
int a = alignWindow_samples;
res.clear();
res.resize(nPresses);
for (auto & x : res) x.resize(nPresses);
#ifdef __EMSCRIPTEN__
int nWorkers = std::min(kMaxThreads, std::max(1, int(std::thread::hardware_concurrency()) - 2));
#else
int nWorkers = std::thread::hardware_concurrency();
#endif
std::vector workers(nWorkers);
for (int iw = 0; iw < (int) workers.size(); ++iw) {
auto & worker = workers[iw];
worker = std::thread([&](int ith) {
for (int i = ith; i < nPresses; i += nWorkers) {
res[i][i].cc = 1.0f;
res[i][i].offset = 0;
const auto & waveform0 = keyPresses[i].waveform;
const auto & pos0 = keyPresses[i].pos;
auto & avgcc = keyPresses[i].ccAvg;
const auto samples0 = waveform0.samples;
for (int j = i + 1; j < nPresses; ++j) {
if (i == j) continue;
const auto waveform1 = keyPresses[j].waveform;
const auto pos1 = keyPresses[j].pos;
const auto samples1 = waveform1.samples;
const auto ret = findBestCC(TWaveformViewT { samples0 + pos0 + offsetFromPeak_samples - w, 2*w },
TWaveformViewT { samples1 + pos1 + offsetFromPeak_samples - w - a, 2*w + 2*a }, a);
const auto bestcc = std::get<0>(ret);
const auto bestoffset = std::get<1>(ret);
res[i][j].cc = bestcc;
res[i][j].offset = bestoffset;
res[j][i].cc = bestcc;
res[j][i].offset = -bestoffset;
avgcc += bestcc;
}
avgcc /= (nPresses - 1);
}
}, iw);
}
for (auto & worker : workers) worker.join();
return true;
}
template bool calculateSimilartyMap(
const int32_t keyPressWidth_samples,
const int32_t alignWindow_samples,
const int32_t offsetFromPeak_samples,
TKeyPressCollectionT & keyPresses,
TSimilarityMap & res);
//
// findKeyPresses
//
template<>
bool findKeyPresses(
const TWaveformViewT & waveform,
TKeyPressCollectionT & res,
TWaveformT & waveformThreshold,
TWaveformT & waveformMax,
double thresholdBackground,
int historySize,
int historySizeReset,
bool removeLowPower) {
res.clear();
waveformThreshold.resize(waveform.n);
waveformMax.resize(waveform.n);
int rbBegin = 0;
double rbAverage = 0.0;
std::vector rbSamples(8*historySize, 0.0);
int k = historySize;
std::deque que(k);
auto samples = waveform.samples;
auto n = waveform.n;
TWaveformT waveformAbs(n);
for (int64_t i = 0; i < n; ++i) {
waveformAbs[i][0] = std::abs(samples[i][0]);
}
for (int64_t i = 0; i < n; ++i) {
{
int64_t ii = i - k/2;
if (ii >= 0) {
rbAverage *= rbSamples.size();
rbAverage -= rbSamples[rbBegin];
double acur = waveformAbs[i][0];
rbSamples[rbBegin] = acur;
rbAverage += acur;
rbAverage /= rbSamples.size();
if (++rbBegin >= (int) rbSamples.size()) {
rbBegin = 0;
}
}
}
if (i < k) {
while((!que.empty()) && waveformAbs[i] >= waveformAbs[que.back()]) {
que.pop_back();
}
que.push_back(i);
} else {
while((!que.empty()) && que.front() <= i - k) {
que.pop_front();
}
while((!que.empty()) && waveformAbs[i] >= waveformAbs[que.back()]) {
que.pop_back();
}
que.push_back(i);
int64_t itest = i - k/2;
if (itest >= 2*k && itest < n - 2*k && que.front() == itest) {
double acur = waveformAbs[itest][0];
if (acur > thresholdBackground*rbAverage) {
res.emplace_back(TKeyPressDataT { std::move(waveform), itest, 0.0, -1, -1, '?' });
}
}
waveformThreshold[itest][0] = thresholdBackground*rbAverage;
waveformMax[itest] = waveformAbs[que.front()];
}
}
if (removeLowPower) {
while (true) {
auto oldn = res.size();
double avgPower = 0.0;
for (const auto & kp : res) {
avgPower += waveformAbs[kp.pos][0];
}
avgPower /= res.size();
auto tmp = std::move(res);
for (const auto & kp : tmp) {
if (waveformAbs[kp.pos][0] > 0.3*avgPower) {
res.push_back(kp);
}
}
if (res.size() == oldn) break;
}
}
if (res.size() > 1) {
TKeyPressCollectionT res2;
res2.push_back(res.front());
for (int i = 1; i < (int) res.size(); ++i) {
if (res[i].pos - res2.back().pos > historySizeReset || waveformMax[res[i].pos] > waveformMax[res2.back().pos]) {
res2.push_back(res[i]);
}
}
std::swap(res, res2);
}
return true;
}
template
bool findKeyPresses(
const TWaveformViewT & waveform,
TKeyPressCollectionT & res,
TWaveformT & waveformThreshold,
TWaveformT & waveformMax,
double thresholdBackground,
int historySize,
int historySizeReset,
bool removeLowPower) {
res.clear();
waveformThreshold.resize(waveform.n);
waveformMax.resize(waveform.n);
int rbBegin = 0;
double rbAverage = 0.0;
std::vector rbSamples(8*historySize, 0.0);
int k = historySize;
std::deque que(k);
auto samples = waveform.samples;
auto n = waveform.n;
TWaveformT waveformAbs(n);
for (int64_t i = 0; i < n; ++i) {
waveformAbs[i] = std::abs(samples[i]);
}
for (int64_t i = 0; i < n; ++i) {
{
int64_t ii = i - k/2;
if (ii >= 0) {
rbAverage *= rbSamples.size();
rbAverage -= rbSamples[rbBegin];
double acur = waveformAbs[i];
rbSamples[rbBegin] = acur;
rbAverage += acur;
rbAverage /= rbSamples.size();
if (++rbBegin >= (int) rbSamples.size()) {
rbBegin = 0;
}
}
}
if (i < k) {
while((!que.empty()) && waveformAbs[i] >= waveformAbs[que.back()]) {
que.pop_back();
}
que.push_back(i);
} else {
while((!que.empty()) && que.front() <= i - k) {
que.pop_front();
}
while((!que.empty()) && waveformAbs[i] >= waveformAbs[que.back()]) {
que.pop_back();
}
que.push_back(i);
int64_t itest = i - k/2;
if (itest >= 2*k && itest < n - 2*k && que.front() == itest) {
double acur = waveformAbs[itest];
if (acur > thresholdBackground*rbAverage) {
res.emplace_back(TKeyPressDataT { std::move(waveform), itest, 0.0, -1, -1, '?' });
}
}
waveformThreshold[itest] = thresholdBackground*rbAverage;
waveformMax[itest] = waveformAbs[que.front()];
}
}
if (removeLowPower) {
while (true) {
auto oldn = res.size();
double avgPower = 0.0;
for (const auto & kp : res) {
avgPower += waveformAbs[kp.pos];
}
avgPower /= res.size();
auto tmp = std::move(res);
for (const auto & kp : tmp) {
if (waveformAbs[kp.pos] > 0.3*avgPower) {
res.push_back(kp);
}
}
if (res.size() == oldn) break;
}
}
if (res.size() > 1) {
TKeyPressCollectionT res2;
res2.push_back(res.front());
for (int i = 1; i < (int) res.size(); ++i) {
if (res[i].pos - res2.back().pos > historySizeReset || waveformMax[res[i].pos] > waveformMax[res2.back().pos]) {
res2.push_back(res[i]);
}
}
std::swap(res, res2);
}
return true;
}
template bool findKeyPresses(
const TWaveformViewT & waveform,
TKeyPressCollectionT & res,
TWaveformT & waveformThreshold,
TWaveformT & waveformMax,
double thresholdBackground,
int historySize,
int historySizeReset,
bool removeLowPower);
template
bool saveKeyPresses(const std::string & fname, const TKeyPressCollectionT & keyPresses) {
std::ofstream fout(fname, std::ios::binary);
int n = keyPresses.size();
fout.write((char *)(&n), sizeof(n));
for (int i = 0; i < n; ++i) {
fout.write((char *)(&keyPresses[i].pos), sizeof(keyPresses[i].pos));
}
fout.close();
return true;
}
template bool saveKeyPresses(const std::string & fname, const TKeyPressCollectionT & keyPresses);
template
bool loadKeyPresses(const std::string & fname, const TWaveformViewT & waveform, TKeyPressCollectionT & keyPresses) {
keyPresses.clear();
std::ifstream fin(fname, std::ios::binary);
int n = 0;
fin.read((char *)(&n), sizeof(n));
keyPresses.resize(n);
for (int i = 0; i < n; ++i) {
keyPresses[i].waveform = waveform;
fin.read((char *)(&keyPresses[i].pos), sizeof(keyPresses[i].pos));
}
fin.close();
return true;
}
template bool loadKeyPresses(const std::string & fname, const TWaveformViewT & waveform, TKeyPressCollectionT & keyPresses);
template
bool dumpKeyPresses(const std::string & fname, const TKeyPressCollectionT & data) {
std::ofstream fout(fname);
for (auto & k : data) {
fout << k.pos << " 1" << std::endl;
}
fout.close();
return true;
}
template bool dumpKeyPresses(const std::string & fname, const TKeyPressCollectionT & data);
template
void cbPlayback(void * userData, uint8_t * stream, int len) {
TPlaybackDataT * data = (TPlaybackDataT *)(userData);
if (data->playing == false) {
int offset = 0;
TSample a = 0;
while (len > 0) {
memcpy(stream + offset*sizeof(a), &a, sizeof(a));
len -= sizeof(a);
++offset;
}
return;
}
auto end = std::min(data->idx + TPlaybackDataT::kSamples/data->slowDown, data->waveform.n);
auto idx = data->idx;
auto sidx = 0;
for (; idx < end; ++idx) {
TSample a = data->waveform.samples[idx];
memcpy(stream + (sidx)*sizeof(a), &a, sizeof(a));
len -= sizeof(a);
++sidx;
if (data->slowDown == 2) {
TSample a2 = data->waveform.samples[idx + 1];
a = 0.5*a + 0.5*a2;
memcpy(stream + (sidx)*sizeof(a), &a, sizeof(a));
len -= sizeof(a);
++sidx;
}
}
while (len > 0) {
TSample a = 0;
memcpy(stream + (idx - data->idx)*sizeof(a), &a, sizeof(a));
len -= sizeof(a);
++idx;
}
data->idx = idx;
}
template void cbPlayback(void * userData, uint8_t * stream, int len);
template
bool generateLowResWaveform(const TWaveformViewT & waveform, TWaveformT & waveformLowRes, int nWindow) {
waveformLowRes.resize(waveform.n);
int k = nWindow;
std::deque que(k);
//auto [samples, n] = waveform;
auto samples = waveform.samples;
auto n = waveform.n;
TWaveformT waveformAbs(n);
for (int64_t i = 0; i < n; ++i) {
waveformAbs[i] = std::abs(samples[i]);
}
for (int64_t i = 0; i < n; ++i) {
if (i < k) {
while((!que.empty()) && waveformAbs[i] >= waveformAbs[que.back()]) {
que.pop_back();
}
que.push_back(i);
} else {
while((!que.empty()) && que.front() <= i - k) {
que.pop_front();
}
while((!que.empty()) && waveformAbs[i] >= waveformAbs[que.back()]) {
que.pop_back();
}
que.push_back(i);
int64_t itest = i - k/2;
waveformLowRes[itest] = waveformAbs[que.front()];
}
}
return true;
}
template bool generateLowResWaveform(const TWaveformViewT & waveform, TWaveformT & waveformLowRes, int nWindow);
template
bool adjustKeyPresses(TKeyPressCollectionT & keyPresses, TSimilarityMap & sim) {
struct Pair {
int i = -1;
int j = -1;
TValueCC cc = -1.0;
bool operator < (const Pair & a) const { return cc > a.cc; }
};
bool res = false;
int n = keyPresses.size();
std::vector ccpairs;
for (int i = 0; i < n - 1; ++i) {
for (int j = i + 1; j < n; ++j) {
ccpairs.emplace_back(Pair{i, j, sim[i][j].cc});
}
}
int nused = 0;
std::vector used(n, false);
std::sort(ccpairs.begin(), ccpairs.end());
int npairs = ccpairs.size();
for (int ip = 0; ip < npairs; ++ip) {
auto & curpair = ccpairs[ip];
int k0 = curpair.i;
int k1 = curpair.j;
if (used[k0] && used[k1]) continue;
if (sim[k0][k1].offset != 0) res = true;
if (used[k1] == false) {
keyPresses[k1].pos += sim[k0][k1].offset;
} else {
keyPresses[k0].pos -= sim[k0][k1].offset;
}
sim[k0][k1].offset = 0;
sim[k1][k0].offset = 0;
if (used[k0] == false) { used[k0] = true; ++nused; }
if (used[k1] == false) { used[k1] = true; ++nused; }
if (nused == n) break;
}
return res;
}
template bool adjustKeyPresses(TKeyPressCollectionT & keyPresses, TSimilarityMap & sim);
template
bool removeLowSimilarityKeys(TKeyPressCollectionT & keyPresses, TSimilarityMap & sim, double threshold) {
const int n = keyPresses.size();
if (n != (int) sim.size()) {
fprintf(stderr, "removeLowSimilarityKeys: n != sim.size()\n");
return false;
}
std::vector used(n, false);
for (int i = 0; i < n; ++i) {
if (used[i]) continue;
for (int j = 0 ; j < n; ++j) {
if (i == j) continue;
if (sim[i][j].cc > threshold) {
used[i] = true;
used[j] = true;
break;
}
}
}
int nRemoved = 0;
for (int i = 0; i < n; ++i) {
if (used[i]) continue;
//keyPresses.erase(keyPresses.begin() + i - nRemoved);
++nRemoved;
}
if (nRemoved == 0) {
return true;
}
auto keyPresses0 = keyPresses;
keyPresses.clear();
for (int i = 0; i < n; ++i) {
if (used[i]) {
keyPresses.push_back(keyPresses0[i]);
}
}
for (int i = 0; i < n; ++i) {
auto cur = sim[i];
sim[i].clear();
for (int j = 0; j < n; ++j) {
if (used[j]) {
sim[i].push_back(cur[j]);
}
}
}
auto sim0 = sim;
sim.clear();
for (int i = 0; i < n; ++i) {
if (used[i]) {
sim.push_back(sim0[i]);
}
}
return true;
}
template bool removeLowSimilarityKeys(TKeyPressCollectionT & keyPresses, TSimilarityMap & sim, double threshold);
================================================
FILE: common.h
================================================
/*! \file common.h
* \brief Common types and functions
* \author Georgi Gerganov
*/
#pragma once
#include