Repository: rhasspy/rhasspy3
Branch: master
Commit: 11e8d3016d32
Files: 258
Total size: 454.7 KB

Directory structure:
gitextract_7danshym/

├── .gitignore
├── .gitmodules
├── .isort.cfg
├── LICENSE.md
├── README.md
├── bin/
│   ├── asr_adapter_raw2text.py
│   ├── asr_adapter_wav2text.py
│   ├── asr_transcribe.py
│   ├── asr_transcribe_stream.py
│   ├── asr_transcribe_wav.py
│   ├── client_unix_socket.py
│   ├── config_print.py
│   ├── handle_adapter_json.py
│   ├── handle_adapter_text.py
│   ├── handle_intent.py
│   ├── handle_text.py
│   ├── intent_recognize.py
│   ├── mic_adapter_raw.py
│   ├── mic_record_sample.py
│   ├── mic_test_energy.py
│   ├── pipeline_run.py
│   ├── program_download.py
│   ├── program_install.py
│   ├── satellite_run.py
│   ├── server_run.py
│   ├── snd_adapter_raw.py
│   ├── snd_play.py
│   ├── tts_adapter_http.py
│   ├── tts_adapter_text2wav.py
│   ├── tts_speak.py
│   ├── tts_synthesize.py
│   ├── vad_adapter_raw.py
│   ├── vad_segment_wav.py
│   ├── wake_adapter_raw.py
│   └── wake_detect.py
├── docs/
│   ├── README.md
│   ├── adapters.md
│   ├── domains.md
│   ├── home_assistant.md
│   ├── satellite.md
│   ├── tutorial.md
│   └── wyoming.md
├── examples/
│   └── satellite/
│       └── configuration.yaml
├── mypy.ini
├── programs/
│   ├── asr/
│   │   ├── coqui-stt/
│   │   │   ├── README.md
│   │   │   ├── bin/
│   │   │   │   ├── coqui_stt_raw2text.py
│   │   │   │   ├── coqui_stt_server.py
│   │   │   │   └── coqui_stt_wav2text.py
│   │   │   ├── requirements.txt
│   │   │   └── script/
│   │   │       ├── download.py
│   │   │       ├── raw2text
│   │   │       ├── server
│   │   │       ├── setup
│   │   │       └── wav2text
│   │   ├── faster-whisper/
│   │   │   ├── README.md
│   │   │   ├── bin/
│   │   │   │   ├── faster_whisper_server.py
│   │   │   │   └── faster_whisper_wav2text.py
│   │   │   ├── script/
│   │   │   │   ├── download.py
│   │   │   │   ├── server
│   │   │   │   ├── setup
│   │   │   │   └── wav2text
│   │   │   └── src/
│   │   │       ├── LICENSE
│   │   │       ├── README.md
│   │   │       ├── faster_whisper/
│   │   │       │   ├── __init__.py
│   │   │       │   ├── audio.py
│   │   │       │   ├── feature_extractor.py
│   │   │       │   └── transcribe.py
│   │   │       ├── requirements.conversion.txt
│   │   │       ├── requirements.txt
│   │   │       └── setup.py
│   │   ├── pocketsphinx/
│   │   │   ├── README.md
│   │   │   ├── bin/
│   │   │   │   ├── pocketsphinx_raw2text.py
│   │   │   │   ├── pocketsphinx_server.py
│   │   │   │   └── pocketsphinx_wav2text.py
│   │   │   ├── requirements.txt
│   │   │   └── script/
│   │   │       ├── download.py
│   │   │       ├── raw2text
│   │   │       ├── server
│   │   │       ├── setup
│   │   │       └── wav2text
│   │   ├── vosk/
│   │   │   ├── README.md
│   │   │   ├── bin/
│   │   │   │   ├── vosk_raw2text.py
│   │   │   │   ├── vosk_server.py
│   │   │   │   └── vosk_wav2text.py
│   │   │   ├── requirements.txt
│   │   │   └── script/
│   │   │       ├── download.py
│   │   │       ├── raw2text
│   │   │       ├── server
│   │   │       ├── setup
│   │   │       └── wav2text
│   │   ├── whisper/
│   │   │   ├── README.md
│   │   │   ├── bin/
│   │   │   │   ├── whisper_server.py
│   │   │   │   └── whisper_wav2text.py
│   │   │   ├── requirements.txt
│   │   │   └── script/
│   │   │       ├── server
│   │   │       ├── setup
│   │   │       └── wav2text
│   │   └── whisper-cpp/
│   │       ├── .gitignore
│   │       ├── Dockerfile.libwhisper
│   │       ├── Dockerfile.libwhisper.dockerignore
│   │       ├── README.md
│   │       ├── bin/
│   │       │   ├── whisper_cpp_server.py
│   │       │   └── whisper_cpp_wav2text.py
│   │       ├── lib/
│   │       │   ├── Makefile
│   │       │   └── whisper_cpp.py
│   │       ├── requirements.txt
│   │       └── script/
│   │           ├── build_libwhisper
│   │           ├── download.py
│   │           ├── server
│   │           ├── setup
│   │           ├── setup.py
│   │           └── wav2text
│   ├── handle/
│   │   ├── date_time/
│   │   │   └── bin/
│   │   │       └── date_time.py
│   │   └── home_assistant/
│   │       └── bin/
│   │           └── converse.py
│   ├── intent/
│   │   └── regex/
│   │       └── bin/
│   │           └── regex.py
│   ├── mic/
│   │   ├── pyaudio/
│   │   │   ├── README.md
│   │   │   ├── bin/
│   │   │   │   ├── pyaudio_events.py
│   │   │   │   ├── pyaudio_list_mics.py
│   │   │   │   ├── pyaudio_raw.py
│   │   │   │   └── pyaudio_shared.py
│   │   │   ├── requirements.txt
│   │   │   └── script/
│   │   │       ├── events
│   │   │       ├── list_mics
│   │   │       ├── raw
│   │   │       └── setup
│   │   ├── sounddevice/
│   │   │   ├── README.md
│   │   │   ├── bin/
│   │   │   │   ├── sounddevice_events.py
│   │   │   │   ├── sounddevice_list_mics.py
│   │   │   │   ├── sounddevice_raw.py
│   │   │   │   └── sounddevice_shared.py
│   │   │   ├── requirements.txt
│   │   │   └── script/
│   │   │       ├── events
│   │   │       ├── list_mics
│   │   │       ├── raw
│   │   │       └── setup
│   │   └── udp_raw/
│   │       └── bin/
│   │           └── udp_raw.py
│   ├── remote/
│   │   └── websocket/
│   │       ├── bin/
│   │       │   └── stream2stream.py
│   │       ├── requirements.txt
│   │       └── script/
│   │           ├── run
│   │           └── setup
│   ├── snd/
│   │   └── udp_raw/
│   │       └── bin/
│   │           └── udp_raw.py
│   ├── tts/
│   │   ├── coqui-tts/
│   │   │   ├── README.md
│   │   │   ├── requirements.txt
│   │   │   └── script/
│   │   │       ├── list_models
│   │   │       ├── server
│   │   │       └── setup
│   │   ├── flite/
│   │   │   └── script/
│   │   │       ├── download.py
│   │   │       └── setup
│   │   ├── larynx/
│   │   │   ├── README.md
│   │   │   ├── bin/
│   │   │   │   └── larynx_client.py
│   │   │   ├── requirements.txt
│   │   │   └── script/
│   │   │       ├── list_models
│   │   │       ├── server
│   │   │       └── setup
│   │   ├── marytts/
│   │   │   └── bin/
│   │   │       └── marytts.py
│   │   ├── mimic3/
│   │   │   ├── README.md
│   │   │   ├── bin/
│   │   │   │   └── mimic3_server.py
│   │   │   ├── requirements.txt
│   │   │   └── script/
│   │   │       ├── server
│   │   │       └── setup
│   │   └── piper/
│   │       ├── README.md
│   │       ├── bin/
│   │       │   └── piper_server.py
│   │       └── script/
│   │           ├── download.py
│   │           ├── server
│   │           └── setup.py
│   ├── vad/
│   │   ├── energy/
│   │   │   └── bin/
│   │   │       └── energy_speech_prob.py
│   │   ├── silero/
│   │   │   ├── README.md
│   │   │   ├── bin/
│   │   │   │   └── silero_speech_prob.py
│   │   │   ├── requirements.txt
│   │   │   ├── script/
│   │   │   │   ├── setup
│   │   │   │   └── speech_prob
│   │   │   └── share/
│   │   │       └── silero_vad.onnx
│   │   └── webrtcvad/
│   │       ├── README.md
│   │       ├── bin/
│   │       │   └── webrtcvad_speech_prob.py
│   │       ├── requirements.txt
│   │       └── script/
│   │           ├── setup
│   │           └── speech_prob
│   └── wake/
│       ├── porcupine1/
│       │   ├── bin/
│       │   │   ├── list_models.py
│       │   │   ├── porcupine_raw_text.py
│       │   │   ├── porcupine_shared.py
│       │   │   └── porcupine_stream.py
│       │   ├── requirements.txt
│       │   └── script/
│       │       ├── download.py
│       │       ├── list_models
│       │       ├── raw2text
│       │       └── setup
│       ├── precise-lite/
│       │   ├── bin/
│       │   │   └── precise.py
│       │   ├── requirements.txt
│       │   ├── script/
│       │   │   └── setup
│       │   └── share/
│       │       └── hey_mycroft.tflite
│       └── snowboy/
│           ├── bin/
│           │   └── snowboy_raw_text.py
│           ├── requirements.txt
│           ├── script/
│           │   └── setup
│           └── share/
│               ├── hey_extreme.umdl
│               ├── jarvis.umdl
│               ├── neoya.umdl
│               ├── smart_mirror.umdl
│               ├── snowboy.umdl
│               ├── subex.umdl
│               └── view_glass.umdl
├── pylintrc
├── requirements_dev.txt
├── requirements_http_api.txt
├── rhasspy3/
│   ├── VERSION
│   ├── __init__.py
│   ├── asr.py
│   ├── audio.py
│   ├── config.py
│   ├── configuration.yaml
│   ├── core.py
│   ├── event.py
│   ├── handle.py
│   ├── intent.py
│   ├── mic.py
│   ├── pipeline.py
│   ├── program.py
│   ├── py.typed
│   ├── remote.py
│   ├── snd.py
│   ├── tts.py
│   ├── util/
│   │   ├── __init__.py
│   │   ├── dataclasses_json.py
│   │   └── jaml.py
│   ├── vad.py
│   └── wake.py
├── rhasspy3_http_api/
│   ├── __init__.py
│   ├── __main__.py
│   ├── asr.py
│   ├── css/
│   │   └── main.css
│   ├── handle.py
│   ├── intent.py
│   ├── js/
│   │   ├── main.js
│   │   └── recorder.worklet.js
│   ├── pipeline.py
│   ├── snd.py
│   ├── templates/
│   │   ├── asr.html
│   │   ├── index.html
│   │   ├── layout.html
│   │   ├── pipeline.html
│   │   ├── satellite.html
│   │   └── tts.html
│   ├── tts.py
│   └── wake.py
├── script/
│   ├── format
│   ├── http_server
│   ├── lint
│   ├── run
│   ├── setup
│   ├── setup_http_server
│   └── test
├── setup.cfg
├── setup.py
├── tests/
│   ├── test_dataclasses_json.py
│   └── test_jaml.py
└── tools/
    └── websocket-client/
        ├── bin/
        │   └── websocket_client.py
        ├── requirements.txt
        └── script/
            ├── run
            └── setup

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.DS_Store
.idea
*.log
tmp/

*.py[cod]
*.egg
/build
htmlcov

.projectile
.venv/
venv/
.mypy_cache/
*.egg-info/

/local/


================================================
FILE: .gitmodules
================================================
[submodule "programs/asr/whisper.cpp/build/whisper.cpp"]
	path = programs/asr/whisper.cpp/build/whisper.cpp
	url = https://github.com/ggerganov/whisper.cpp


================================================
FILE: .isort.cfg
================================================
[settings]
multi_line_output=3
include_trailing_comma=True
force_grid_wrap=0
use_parentheses=True
line_length=88


================================================
FILE: LICENSE.md
================================================
MIT License

Copyright (c) 2022 Michael Hansen

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
![Rhasspy 3](img/banner.png)

**NOTE: This is a very early developer preview!**

An open source toolkit for building voice assistants.

![Voice assistant pipeline](img/pipeline.png)

Rhasspy focuses on:

* Privacy - no data leaves your computer unless you want it to
* Broad language support - more than just English
* Customization - everything can be changed

## Getting Started

* Check out the [tutorial](docs/tutorial.md)
* Connect Rhasspy to [Home Assistant](docs/home_assistant.md)
   * Install the [Rhasspy 3 add-on](https://github.com/rhasspy/hassio-addons)
* Run one or more [satellites](docs/satellite.md)
* Join [the community](https://community.rhasspy.org/)


## Missing Pieces

This is a developer preview, so there are lots of things missing:

* A user friendly web UI
* An automated method for installing programs/services and downloading models
* Support for custom speech to text grammars
* Intent systems besides Home Assistant
* The ability to accumulate context within a pipeline


## Core Concepts

### Domains

Rhasspy is organized by [domain](docs/domains.md):

* mic - audio input
* wake - wake word detection
* asr - speech to text
* vad - voice activity detection
* intent - intent recognition from text
* handle - intent or text input handling
* tts - text to speech
* snd - audio output


### Programs

Rhasspy talks to external programs using the [Wyoming protocol](docs/wyoming.md). You can add your own programs by implementing the protocol or using an [adapter](#adapters).


### Adapters

[Small scripts](docs/adapters.md) that live in `bin/` and bridge existing programs into the [Wyoming protocol](docs/wyoming.md).

For example, a speech to text program (`asr`) that accepts a WAV file and outputs text can use `asr_adapter_wav2text.py`


### Pipelines

Complete voice loop from microphone input (mic) to speaker output (snd). Stages are:

1. detect (optional)
    * Wait until wake word is detected in mic
2. transcribe
    * Listen until vad detects silence, then convert audio to text
3. recognize (optional)
    * Recognize an intent from text
4. handle
    * Handle an intent or text, producing a text response
5. speak
    * Convert handle output text to speech, and speak through snd

### Servers

Some programs take a while to load, so it's best to leave them running as a server. Use `bin/server_run.py` or add `--server <domain> <name>` when running the HTTP server.

See `servers` section of `configuration.yaml` file.

---


## Supported Programs

* mic
    * [arecord](https://alsa-project.org/wiki/Main_Page)
    * [gstreamer_udp](https://gstreamer.freedesktop.org/)
    * [sounddevice](https://python-sounddevice.readthedocs.io)
    * [pyaudio](https://people.csail.mit.edu/hubert/pyaudio/docs/)
* wake 
    * [porcupine1](https://github.com/Picovoice/porcupine)
    * [precise-lite](https://github.com/mycroftAI/mycroft-precise)
    * [snowboy](https://github.com/Kitt-AI/snowboy)
* vad
    * [silero](https://github.com/snakers4/silero-vad)
    * [webrtcvad](https://pypi.org/project/webrtcvad/)
* asr 
    * [whisper](https://github.com/openai/whisper)
    * [whisper-cpp](https://github.com/ggerganov/whisper.cpp/)
    * [faster-whisper](https://github.com/guillaumekln/faster-whisper/)
    * [vosk](https://alphacephei.com/vosk/)
    * [coqui-stt](https://stt.readthedocs.io)
    * [pocketsphinx](https://github.com/cmusphinx/pocketsphinx)
* handle
    * [home_assistant_conversation](https://www.home-assistant.io/docs/assist)
* tts 
    * [piper](https://github.com/rhasspy/piper/)
    * [mimic3](https://github.com/mycroftAI/mimic3)
    * [larynx](https://github.com/rhasspy/larynx/)
    * [coqui-tts](https://tts.readthedocs.io)
    * [marytts](http://mary.dfki.de/)
    * [flite](http://www.festvox.org/flite/)
    * [festival](http://www.cstr.ed.ac.uk/projects/festival/)
    * [espeak-ng](https://github.com/espeak-ng/espeak-ng/)
* snd
    * [aplay](https://alsa-project.org/wiki/Main_Page)
    * [gstreamer_udp](https://gstreamer.freedesktop.org/)
    
    
---


## HTTP API

`http://localhost:13331/<endpoint>`

Unless overridden, the pipeline named "default" is used.

* `/pipeline/run`
    * Runs a full pipeline from mic to snd
    * Produces JSON
    * Override `pipeline` or:
        * `wake_program`
        * `asr_program`
        * `intent_program`
        * `handle_program`
        * `tts_program`
        * `snd_program`
    * Skip stages with `start_after`
        * `wake` - skip detection, body is detection name (text)
        * `asr` - skip recording, body is transcript (text) or WAV audio
        * `intent` - skip recognition, body is intent/not-recognized event (JSON)
        * `handle` - skip handling, body is handle/not-handled event (JSON)
        * `tts` - skip synthesis, body is WAV audio
    * Stop early with `stop_after`
        * `wake` - only detection
        * `asr` - detection and transcription
        * `intent` - detection, transcription, recognition
        * `handle` - detection, transcription, recognition, handling
        * `tts` - detection, transcription, recognition, handling, synthesis
* `/wake/detect`
    * Detect wake word in WAV input
    * Produces JSON
    * Override `wake_program` or `pipeline`
* `/asr/transcribe`
    * Transcribe audio from WAV input
    * Produces JSON
    * Override `asr_program` or `pipeline`
* `/intent/recognize`
    * Recognizes intent from text body (POST) or `text` (GET)
    * Produces JSON
    * Override `intent_program` or `pipeline`
* `/handle/handle`
    * Handles intent/text from body (POST) or `input` (GET)
    * `Content-Type` must be `application/json` for intent input
    * Override `handle_program` or `pipeline`
* `/tts/synthesize`
    * Synthesizes audio from text body (POST) or `text` (GET)
    * Produces WAV audio
    * Override `tts_program` or `pipeline`
* `/tts/speak`
    * Plays audio from text body (POST)  or `text` (GET)
    * Produces JSON
    * Override `tts_program`, `snd_program`, or `pipeline`
* `/snd/play`
    * Plays WAV audio via snd
    * Override `snd_program` or `pipeline`
* `/config`
    * Returns JSON config
* `/version`
    * Returns version info


## WebSocket API

`ws://localhost:13331/<endpoint>`

Audio streams are raw PCM in binary messages.

Use the `rate`, `width`, and `channels` parameters for sample rate (hertz), width (bytes), and channel count. By default, input audio is 16Khz 16-bit mono, and output audio is 22Khz 16-bit mono.

The client can "end" the audio stream by sending an empty binary message.

* `/pipeline/asr-tts`
    * Run pipeline from asr (stream in) to tts (stream out)
    * Produces JSON messages as events happen
    * Override `pipeline` or:
        * `asr_program`
        * `vad_program`
        * `handle_program`
        * `tts_program`
    * Use `in_rate`, `in_width`, `in_channels` for audio input format
    * Use `out_rate`, `out_width`, `out_channels` for audio output format
* `/wake/detect`
    * Detect wake word from websocket audio stream
    * Produces a JSON message when audio stream ends
    * Override `wake_program` or `pipeline`
* `/asr/transcribe`
    * Transcribe a websocket audio stream
    * Produces a JSON message when audio stream ends
    * Override `asr_program` or `pipeline`
* `/snd/play`
    * Play a websocket audio stream
    * Produces a JSON message when audio stream ends
    * Override `snd_program` or `pipeline`


================================================
FILE: bin/asr_adapter_raw2text.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import shlex
import subprocess
from pathlib import Path

from rhasspy3.asr import Transcript
from rhasspy3.audio import AudioChunk, AudioChunkConverter, AudioStop
from rhasspy3.event import read_event, write_event

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "command",
        help="Command to run",
    )
    parser.add_argument("--shell", action="store_true")
    #
    parser.add_argument(
        "--rate",
        type=int,
        help="Sample rate (hz)",
    )
    parser.add_argument(
        "--width",
        type=int,
        help="Sample width bytes",
    )
    parser.add_argument(
        "--channels",
        type=int,
        help="Sample channel count",
    )
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    if args.shell:
        command = args.command
    else:
        command = shlex.split(args.command)

    proc = subprocess.Popen(
        command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=args.shell
    )
    text = ""
    converter = AudioChunkConverter(args.rate, args.width, args.channels)

    with proc:
        assert proc.stdin is not None
        assert proc.stdout is not None

        while True:
            event = read_event()
            if event is None:
                break

            if AudioChunk.is_type(event.type):
                chunk = AudioChunk.from_event(event)
                chunk = converter.convert(chunk)
                proc.stdin.write(chunk.audio)
                proc.stdin.flush()
            elif AudioStop.is_type(event.type):
                break

        stdout, _stderr = proc.communicate()
        text = stdout.decode()

    write_event(Transcript(text=text.strip()).event())


if __name__ == "__main__":
    main()


================================================
FILE: bin/asr_adapter_wav2text.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import shlex
import subprocess
import tempfile
import wave
from pathlib import Path

from rhasspy3.asr import Transcript
from rhasspy3.audio import AudioChunk, AudioStop
from rhasspy3.event import read_event, write_event

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "command",
        help="Command to run",
    )
    parser.add_argument("--shell", action="store_true")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    with tempfile.NamedTemporaryFile(mode="wb+", suffix=".wav") as wav_io:
        args.command = args.command.format(wav_file=wav_io.name)
        if args.shell:
            command = args.command
        else:
            command = shlex.split(args.command)

        wav_params_set = False
        wav_file: wave.Wave_write = wave.open(wav_io, "wb")
        try:
            with wav_file:
                while True:
                    event = read_event()
                    if event is None:
                        break

                    if AudioChunk.is_type(event.type):
                        chunk = AudioChunk.from_event(event)
                        if not wav_params_set:
                            wav_file.setframerate(chunk.rate)
                            wav_file.setsampwidth(chunk.width)
                            wav_file.setnchannels(chunk.channels)
                            wav_params_set = True

                        wav_file.writeframes(chunk.audio)
                    elif AudioStop.is_type(event.type):
                        break

            wav_io.seek(0)
            text = subprocess.check_output(command, shell=args.shell).decode()
            write_event(Transcript(text=text.strip()).event())
        except wave.Error:
            pass


if __name__ == "__main__":
    main()


================================================
FILE: bin/asr_transcribe.py
================================================
#!/usr/bin/env python3
"""Transcribes mic audio into text."""
import argparse
import asyncio
import json
import logging
import sys
from pathlib import Path

from rhasspy3.asr import DOMAIN, Transcript
from rhasspy3.core import Rhasspy
from rhasspy3.event import async_read_event
from rhasspy3.mic import DOMAIN as MIC_DOMAIN
from rhasspy3.program import create_process
from rhasspy3.vad import segment

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    parser.add_argument(
        "--mic-program", help="Name of mic program to use (overrides pipeline)"
    )
    parser.add_argument(
        "--asr-program", help="Name of asr program to use (overrides pipeline)"
    )
    parser.add_argument(
        "--vad-program", help="Name of vad program to use (overrides pipeline)"
    )
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    mic_program = args.mic_program
    asr_program = args.asr_program
    vad_program = args.vad_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not mic_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        mic_program = pipeline.mic

    assert mic_program, "No mic program"

    if not asr_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        asr_program = pipeline.asr

    assert asr_program, "No asr program"

    if not vad_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        vad_program = pipeline.vad

    assert vad_program, "No vad program"

    # Transcribe voice command
    async with (await create_process(rhasspy, MIC_DOMAIN, mic_program)) as mic_proc, (
        await create_process(rhasspy, DOMAIN, asr_program)
    ) as asr_proc:
        assert mic_proc.stdout is not None
        assert asr_proc.stdin is not None
        assert asr_proc.stdout is not None

        _LOGGER.info("Ready")
        await segment(rhasspy, vad_program, mic_proc.stdout, asr_proc.stdin)

        # Read transcript
        _LOGGER.debug("Waiting for transcript")
        transcript = Transcript(text="")
        while True:
            event = await async_read_event(asr_proc.stdout)
            if event is None:
                break

            if Transcript.is_type(event.type):
                transcript = Transcript.from_event(event)
                break

        json.dump(transcript.event().to_dict(), sys.stdout, ensure_ascii=False)
        print("", flush=True)


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: bin/asr_transcribe_stream.py
================================================
#!/usr/bin/env python3
"""Transcribes raw audio from stdin into text."""
import argparse
import asyncio
import json
import logging
import sys
from pathlib import Path

from rhasspy3.asr import DOMAIN, Transcript
from rhasspy3.audio import (
    DEFAULT_IN_CHANNELS,
    DEFAULT_IN_RATE,
    DEFAULT_IN_WIDTH,
    DEFAULT_SAMPLES_PER_CHUNK,
    AudioChunk,
    AudioChunkConverter,
    AudioStart,
    AudioStop,
)
from rhasspy3.core import Rhasspy
from rhasspy3.event import async_read_event, async_write_event
from rhasspy3.program import create_process

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    parser.add_argument(
        "--asr-program", help="Name of asr program to use (overrides pipeline)"
    )
    #
    parser.add_argument(
        "--mic-rate",
        type=int,
        default=DEFAULT_IN_RATE,
        help="Input sample rate (hertz)",
    )
    parser.add_argument(
        "--mic-width",
        type=int,
        default=DEFAULT_IN_WIDTH,
        help="Input sample width (bytes)",
    )
    parser.add_argument(
        "--mic-channels",
        type=int,
        default=DEFAULT_IN_CHANNELS,
        help="Input sample channel count",
    )
    #
    parser.add_argument(
        "--asr-rate", type=int, default=DEFAULT_IN_RATE, help="asr sample rate (hertz)"
    )
    parser.add_argument(
        "--asr-width",
        type=int,
        default=DEFAULT_IN_WIDTH,
        help="asr sample width (bytes)",
    )
    parser.add_argument(
        "--asr-channels",
        type=int,
        default=DEFAULT_IN_CHANNELS,
        help="asr sample channel count",
    )
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=DEFAULT_SAMPLES_PER_CHUNK,
        help="Samples to process per chunk",
    )
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    asr_program = args.asr_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not asr_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        asr_program = pipeline.asr

    assert asr_program, "No asr program"
    _LOGGER.debug("asr program: %s", asr_program)

    # Transcribe raw audio from stdin
    converter = AudioChunkConverter(args.asr_rate, args.asr_width, args.asr_channels)
    bytes_per_chunk = args.samples_per_chunk * args.mic_width * args.mic_channels
    timestamp = 0

    async with (await create_process(rhasspy, DOMAIN, asr_program)) as asr_proc:
        assert asr_proc.stdin is not None
        assert asr_proc.stdout is not None
        _LOGGER.debug("Started %s", asr_program)

        await async_write_event(
            AudioStart(
                args.asr_rate, args.asr_width, args.asr_channels, timestamp=timestamp
            ).event(),
            asr_proc.stdin,
        )

        audio_bytes = sys.stdin.buffer.read(bytes_per_chunk)
        while audio_bytes:
            chunk = AudioChunk(
                args.mic_rate,
                args.mic_width,
                args.mic_channels,
                audio_bytes,
                timestamp=timestamp,
            )
            timestamp += chunk.milliseconds
            chunk = converter.convert(chunk)

            # Write audio
            await async_write_event(
                chunk.event(),
                asr_proc.stdin,
            )
            audio_bytes = sys.stdin.buffer.read(bytes_per_chunk)

        await async_write_event(AudioStop(timestamp=timestamp).event(), asr_proc.stdin)

        # Read transcript
        transcript = Transcript(text="")
        while True:
            event = await async_read_event(asr_proc.stdout)
            if event is None:
                break

            if Transcript.is_type(event.type):
                transcript = Transcript.from_event(event)
                break

        json.dump(transcript.event().to_dict(), sys.stdout, ensure_ascii=False)
        print("", flush=True)


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: bin/asr_transcribe_wav.py
================================================
#!/usr/bin/env python3
"""Transcribes WAV audio into text."""
import argparse
import asyncio
import io
import json
import logging
import os
import sys
import time
import wave
from pathlib import Path
from typing import Iterable, Optional

from rhasspy3.asr import DOMAIN, Transcript
from rhasspy3.audio import (
    DEFAULT_IN_CHANNELS,
    DEFAULT_IN_RATE,
    DEFAULT_IN_WIDTH,
    DEFAULT_SAMPLES_PER_CHUNK,
    AudioChunkConverter,
    AudioStart,
    AudioStop,
    wav_to_chunks,
)
from rhasspy3.core import Rhasspy
from rhasspy3.event import async_read_event, async_write_event
from rhasspy3.program import create_process

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    parser.add_argument(
        "--asr-program", help="Name of asr program to use (overrides pipeline)"
    )
    #
    parser.add_argument(
        "--rate", type=int, default=DEFAULT_IN_RATE, help="Sample rate (hertz)"
    )
    parser.add_argument(
        "--width", type=int, default=DEFAULT_IN_WIDTH, help="Sample width (bytes)"
    )
    parser.add_argument(
        "--channels", type=int, default=DEFAULT_IN_CHANNELS, help="Sample channel count"
    )
    parser.add_argument(
        "--samples-per-chunk", type=int, default=DEFAULT_SAMPLES_PER_CHUNK
    )
    #
    parser.add_argument("wav", nargs="*", help="Path to WAV file(s)")
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    asr_program = args.asr_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not asr_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        asr_program = pipeline.asr

    assert asr_program, "No asr program"
    _LOGGER.debug("asr program: %s", asr_program)

    # Transcribe WAV file(s)
    for wav_bytes in get_wav_bytes(args):
        converter = AudioChunkConverter(args.rate, args.width, args.channels)

        with io.BytesIO(wav_bytes) as wav_io:
            with wave.open(wav_io, "rb") as wav_file:
                chunks = list(wav_to_chunks(wav_file, args.samples_per_chunk))

        async with (await create_process(rhasspy, DOMAIN, asr_program)) as asr_proc:
            assert asr_proc.stdin is not None
            assert asr_proc.stdout is not None

            # Write audio
            start_time = time.monotonic_ns()
            await async_write_event(
                AudioStart(args.rate, args.width, args.channels, timestamp=0).event(),
                asr_proc.stdin,
            )

            last_timestamp: Optional[int] = None
            for chunk in chunks:
                chunk = converter.convert(chunk)
                await async_write_event(chunk.event(), asr_proc.stdin)
                last_timestamp = chunk.timestamp

            await async_write_event(
                AudioStop(timestamp=last_timestamp).event(),
                asr_proc.stdin,
            )

            # Read transcript
            _LOGGER.debug("Waiting for transcription")
            transcript = Transcript(text="")
            while True:
                event = await async_read_event(asr_proc.stdout)
                if event is None:
                    break

                if Transcript.is_type(event.type):
                    transcript = Transcript.from_event(event)
                    end_time = time.monotonic_ns()
                    _LOGGER.debug(
                        "Transcribed in %s second(s)", (end_time - start_time) / 1e9
                    )
                    break

            _LOGGER.debug(transcript)

            json.dump(transcript.event().to_dict(), sys.stdout, ensure_ascii=False)
            print("", flush=True)


def get_wav_bytes(args: argparse.Namespace) -> Iterable[bytes]:
    """Yields WAV audio from stdin or args."""
    if args.wav:
        # WAV file path(s)
        for wav_path in args.wav:
            _LOGGER.debug("Processing %s", wav_path)
            with open(wav_path, "rb") as wav_file:
                yield wav_file.read()
    else:
        # WAV on stdin
        if os.isatty(sys.stdin.fileno()):
            print("Reading WAV audio from stdin", file=sys.stderr)

        yield sys.stdin.buffer.read()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: bin/client_unix_socket.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import socket
import threading

from rhasspy3.event import read_event, write_event

_LOGGER = logging.getLogger("wrapper_unix_socket")


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("socketfile", help="Path to Unix domain socket file")
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    _LOGGER.debug("Connecting to %s", args.socketfile)
    sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
    sock.connect(args.socketfile)
    _LOGGER.debug("Connected")

    try:
        with sock.makefile(mode="rwb") as conn_file:
            read_thread = threading.Thread(
                target=read_proc, args=(conn_file,), daemon=True
            )
            read_thread.start()

            write_thread = threading.Thread(
                target=write_proc, args=(conn_file,), daemon=True
            )
            write_thread.start()
            write_thread.join()
    except KeyboardInterrupt:
        pass


def read_proc(conn_file):
    try:
        while True:
            event = read_event(conn_file)
            if event is None:
                break

            write_event(event)
    except Exception:
        _LOGGER.exception("Unexpected error in read thread")


def write_proc(conn_file):
    try:
        while True:
            event = read_event()
            if event is None:
                break

            write_event(event, conn_file)
    except Exception:
        _LOGGER.exception("Unexpected error in write thread")


if __name__ == "__main__":
    main()


================================================
FILE: bin/config_print.py
================================================
#!/usr/bin/env python3
"""Prints configuration as JSON."""
import argparse
import json
import logging
import sys
from pathlib import Path

from rhasspy3.core import Rhasspy

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument("--indent", type=int, default=4)
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    json.dump(rhasspy.config_dict, sys.stdout, indent=args.indent, ensure_ascii=False)


if __name__ == "__main__":
    main()


================================================
FILE: bin/handle_adapter_json.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import shlex
import subprocess
from pathlib import Path

from rhasspy3.event import read_event, write_event
from rhasspy3.handle import Handled, NotHandled
from rhasspy3.intent import Intent, NotRecognized

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "command",
        help="Command to run",
    )
    parser.add_argument("--shell", action="store_true")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    if args.shell:
        command = args.command
    else:
        command = shlex.split(args.command)

    proc = subprocess.Popen(
        command,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        shell=args.shell,
        universal_newlines=True,
    )
    with proc:
        assert proc.stdin is not None
        assert proc.stdout is not None

        while True:
            event = read_event()
            if event is None:
                break

            if Intent.is_type(event.type):
                intent = Intent.from_event(event)
                stdout, _stderr = proc.communicate(
                    input=json.dumps(
                        {
                            "intent": {
                                "name": intent.name,
                            },
                            "entities": [
                                {"entity": entity.name, "value": entity.value}
                                for entity in intent.entities or []
                            ],
                            "slots": {
                                entity.name: entity.value
                                for entity in intent.entities or []
                            },
                        },
                        ensure_ascii=False,
                    )
                )
                handled = False
                for line in stdout.splitlines():
                    line = line.strip()
                    if line:
                        write_event(Handled(text=line).event())
                        handled = True
                        break

                if not handled:
                    write_event(NotHandled().event())

                break

            if NotRecognized.is_type(event.type):
                write_event(NotHandled().event())
                break


if __name__ == "__main__":
    main()


================================================
FILE: bin/handle_adapter_text.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import shlex
import subprocess
from pathlib import Path

from rhasspy3.asr import Transcript
from rhasspy3.event import read_event, write_event
from rhasspy3.handle import Handled, NotHandled

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "command",
        help="Command to run",
    )
    parser.add_argument("--shell", action="store_true")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    if args.shell:
        command = args.command
    else:
        command = shlex.split(args.command)

    proc = subprocess.Popen(
        command,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        shell=args.shell,
        universal_newlines=True,
    )
    with proc:
        assert proc.stdin is not None
        assert proc.stdout is not None

        while True:
            event = read_event()
            if event is None:
                break

            if Transcript.is_type(event.type):
                transcript = Transcript.from_event(event)
                stdout, _stderr = proc.communicate(input=transcript.text)
                handled = False
                for line in stdout.splitlines():
                    line = line.strip()
                    if line:
                        write_event(Handled(text=line).event())
                        handled = True
                        break

                if not handled:
                    write_event(NotHandled().event())

                break


if __name__ == "__main__":
    main()


================================================
FILE: bin/handle_intent.py
================================================
#!/usr/bin/env python3
"""Handle text or intent."""
import argparse
import asyncio
import json
import logging
import os
import sys
from pathlib import Path
from typing import Iterable

from rhasspy3.core import Rhasspy
from rhasspy3.handle import handle
from rhasspy3.intent import Intent

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    parser.add_argument(
        "--handle-program", help="Name of handle program to use (overrides pipeline)"
    )
    parser.add_argument("intent", nargs="*", help="Intent JSON event(s) to handle")
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    handle_program = args.handle_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not handle_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        handle_program = pipeline.handle

    assert handle_program, "No handle program"

    for line in get_input(args):
        # Intent JSON
        handle_input: Intent = Intent.from_dict(json.loads(line))
        handle_result = await handle(rhasspy, handle_program, handle_input)
        if handle_result is None:
            _LOGGER.warning("No result")
            continue

        _LOGGER.debug(handle_result)
        json.dump(handle_result.event().to_dict(), sys.stdout, ensure_ascii=False)


def get_input(args: argparse.Namespace) -> Iterable[str]:
    """Get input from stdin or args."""
    if args.intent:
        for event_json in args.intent:
            yield event_json
    else:
        if os.isatty(sys.stdin.fileno()):
            print("Reading input from stdin", file=sys.stderr)

        for line in sys.stdin:
            line = line.strip()
            if line:
                yield line


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: bin/handle_text.py
================================================
#!/usr/bin/env python3
"""Handle text or intent."""
import argparse
import asyncio
import json
import logging
import os
import sys
from pathlib import Path
from typing import Iterable

from rhasspy3.asr import Transcript
from rhasspy3.core import Rhasspy
from rhasspy3.handle import handle

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    parser.add_argument(
        "--handle-program", help="Name of handle program to use (overrides pipeline)"
    )
    parser.add_argument("text", nargs="*", help="Text input to handle")
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    handle_program = args.handle_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not handle_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        handle_program = pipeline.handle

    assert handle_program, "No handle program"

    for line in get_input(args):
        # Text
        handle_input = Transcript(text=line)
        handle_result = await handle(rhasspy, handle_program, handle_input)
        if handle_result is None:
            _LOGGER.warning("No result")
            continue

        _LOGGER.debug(handle_result)
        json.dump(handle_result.event().to_dict(), sys.stdout, ensure_ascii=False)


def get_input(args: argparse.Namespace) -> Iterable[str]:
    """Get input from stdin or args."""
    if args.text:
        for text in args.text:
            yield text
    else:
        if os.isatty(sys.stdin.fileno()):
            print("Reading input from stdin", file=sys.stderr)

        for line in sys.stdin:
            line = line.strip()
            if line:
                yield line


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: bin/intent_recognize.py
================================================
#!/usr/bin/env python3
import argparse
import asyncio
import json
import logging
import os
import sys
from pathlib import Path
from typing import Iterable

from rhasspy3.core import Rhasspy
from rhasspy3.intent import recognize

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    parser.add_argument(
        "--intent-program", help="Name of intent program to use (overrides pipeline)"
    )
    parser.add_argument("text", nargs="*", help="Text to recognize")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    intent_program = args.intent_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not intent_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        intent_program = pipeline.intent

    assert intent_program, "No intent program"

    for text in get_texts(args):
        intent_result = await recognize(rhasspy, intent_program, text)
        if intent_result is None:
            continue

        json.dump(intent_result.event().data, sys.stdout, ensure_ascii=False)
        print("", flush=True)


def get_texts(args: argparse.Namespace) -> Iterable[str]:
    if args.text:
        for text in args.text:
            yield text
    else:
        if os.isatty(sys.stdin.fileno()):
            print("Reading text from stdin", file=sys.stderr)

        for line in sys.stdin:
            line = line.strip()
            if line:
                yield line


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: bin/mic_adapter_raw.py
================================================
#!/usr/bin/env python3
"""Reads raw audio chunks from stdin."""
import argparse
import logging
import shlex
import subprocess
import time
from pathlib import Path

from rhasspy3.audio import DEFAULT_SAMPLES_PER_CHUNK, AudioChunk, AudioStart
from rhasspy3.event import write_event

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "command",
        help="Command to run",
    )
    parser.add_argument("--shell", action="store_true", help="Run command with shell")
    #
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=DEFAULT_SAMPLES_PER_CHUNK,
        help="Number of samples to read at a time from command",
    )
    parser.add_argument(
        "--rate",
        type=int,
        required=True,
        help="Sample rate (hz)",
    )
    parser.add_argument(
        "--width",
        type=int,
        required=True,
        help="Sample width bytes",
    )
    parser.add_argument(
        "--channels",
        type=int,
        required=True,
        help="Sample channel count",
    )
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    bytes_per_chunk = args.samples_per_chunk * args.width * args.channels

    if args.shell:
        command = args.command
    else:
        command = shlex.split(args.command)

    proc = subprocess.Popen(command, stdout=subprocess.PIPE)
    with proc:
        assert proc.stdout is not None

        write_event(
            AudioStart(
                args.rate, args.width, args.channels, timestamp=time.monotonic_ns()
            ).event()
        )
        while True:
            audio_bytes = proc.stdout.read(bytes_per_chunk)
            if not audio_bytes:
                break

            write_event(
                AudioChunk(
                    args.rate,
                    args.width,
                    args.channels,
                    audio_bytes,
                    timestamp=time.monotonic_ns(),
                ).event()
            )


if __name__ == "__main__":
    main()


================================================
FILE: bin/mic_record_sample.py
================================================
#!/usr/bin/env python3
"""Record a spoken audio sample to a WAV file."""
import argparse
import asyncio
import logging
import wave
from collections import deque
from pathlib import Path
from typing import Deque

from rhasspy3.audio import AudioChunk
from rhasspy3.core import Rhasspy
from rhasspy3.event import async_read_event, async_write_event
from rhasspy3.mic import DOMAIN as MIC_DOMAIN
from rhasspy3.program import create_process
from rhasspy3.vad import DOMAIN as VAD_DOMAIN
from rhasspy3.vad import VoiceStarted, VoiceStopped

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to write")
    #
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    parser.add_argument(
        "--mic-program", help="Name of mic program to use (overrides pipeline)"
    )
    parser.add_argument(
        "--vad-program", help="Name of vad program to use (overrides pipeline)"
    )
    #
    parser.add_argument(
        "--chunk-buffer-size",
        type=int,
        default=25,
        help="Audio chunks to buffer before start is known",
    )
    parser.add_argument(
        "-b",
        "--keep-chunks-before",
        type=int,
        default=5,
        help="Audio chunks to keep before voice starts",
    )
    parser.add_argument(
        "-a",
        "--keep-chunks-after",
        type=int,
        default=0,
        help="Audio chunks to keep after voice ends",
    )
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    mic_program = args.mic_program
    vad_program = args.vad_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not mic_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        mic_program = pipeline.mic

    assert mic_program, "No mic program"

    if not vad_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        vad_program = pipeline.vad

    assert vad_program, "No vad program"

    for wav_path in args.wav_file:
        wav_file: wave.Wave_write = wave.open(wav_path, "wb")
        with wav_file:
            is_first_chunk = True

            # Audio kept before we get the event that the voice command started
            # at a timestep in the past.
            chunk_buffer: Deque[AudioChunk] = deque(
                maxlen=max(args.chunk_buffer_size, args.keep_chunks_before)
            )

            async with (
                await create_process(rhasspy, MIC_DOMAIN, mic_program)
            ) as mic_proc, (
                await create_process(rhasspy, VAD_DOMAIN, vad_program)
            ) as vad_proc:
                assert mic_proc.stdout is not None
                assert vad_proc.stdin is not None
                assert vad_proc.stdout is not None

                _LOGGER.info("Recording %s", wav_path)
                mic_task = asyncio.create_task(async_read_event(mic_proc.stdout))
                vad_task = asyncio.create_task(async_read_event(vad_proc.stdout))
                pending = {mic_task, vad_task}

                before_command = True
                while True:
                    done, pending = await asyncio.wait(
                        pending, return_when=asyncio.FIRST_COMPLETED
                    )
                    if mic_task in done:
                        mic_event = mic_task.result()
                        if mic_event is None:
                            break

                        # Process chunk
                        if AudioChunk.is_type(mic_event.type):
                            chunk = AudioChunk.from_event(mic_event)
                            if is_first_chunk:
                                _LOGGER.debug("Receiving audio")
                                is_first_chunk = False
                                wav_file.setframerate(chunk.rate)
                                wav_file.setsampwidth(chunk.width)
                                wav_file.setnchannels(chunk.channels)

                            await async_write_event(mic_event, vad_proc.stdin)
                            if before_command:
                                chunk_buffer.append(chunk)
                            else:
                                wav_file.writeframes(chunk.audio)

                        # Next chunk
                        mic_task = asyncio.create_task(
                            async_read_event(mic_proc.stdout)
                        )
                        pending.add(mic_task)

                    if vad_task in done:
                        vad_event = vad_task.result()
                        if vad_event is None:
                            break

                        if VoiceStarted.is_type(vad_event.type):
                            if before_command:
                                # Start of voice command
                                voice_started = VoiceStarted.from_event(vad_event)
                                if voice_started.timestamp is None:
                                    # Keep chunks before
                                    chunks_left = args.keep_chunks_before
                                    while chunk_buffer and (chunks_left > 0):
                                        chunk = chunk_buffer.popleft()
                                        wav_file.writeframes(chunk.audio)
                                else:
                                    # Locate start chunk
                                    start_idx = 0
                                    for i, chunk in enumerate(chunk_buffer):
                                        if (chunk.timestamp is not None) and (
                                            chunk.timestamp >= voice_started.timestamp
                                        ):
                                            start_idx = i
                                            break

                                    # Back up by "keep chunks" and then write audio forward
                                    start_idx = max(
                                        0, start_idx - args.keep_chunks_before
                                    )
                                    for i, chunk in enumerate(chunk_buffer):
                                        if i >= start_idx:
                                            wav_file.writeframes(chunk.audio)

                                    chunk_buffer.clear()

                                before_command = False
                                _LOGGER.info("Speaking started")
                        elif VoiceStopped.is_type(vad_event.type):
                            # End of voice command
                            _LOGGER.info("Speaking ended")
                            break

                        # Next VAD event
                        vad_task = asyncio.create_task(
                            async_read_event(vad_proc.stdout)
                        )
                        pending.add(vad_task)

                # After chunks
                num_chunks_left = args.keep_chunks_after
                while num_chunks_left > 0:
                    mic_event = await mic_task
                    if mic_event is None:
                        break

                    if AudioChunk.is_type(mic_event.type):
                        chunk = AudioChunk.from_event(mic_event)
                        wav_file.writeframes(chunk.audio)
                        num_chunks_left -= 1

                    if num_chunks_left > 0:
                        mic_task = asyncio.create_task(
                            async_read_event(mic_proc.stdout)
                        )


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        pass


================================================
FILE: bin/mic_test_energy.py
================================================
#!/usr/bin/env python3
"""Prints microphone energy level to console for testing."""
import argparse
import asyncio
import audioop
import logging
from pathlib import Path

from rhasspy3.audio import AudioChunk, AudioStop
from rhasspy3.core import Rhasspy
from rhasspy3.event import async_read_event
from rhasspy3.mic import DOMAIN as MIC_DOMAIN
from rhasspy3.program import create_process

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    parser.add_argument(
        "--mic-program", help="Name of mic program to use (overrides pipeline)"
    )
    #
    parser.add_argument(
        "--levels", type=int, default=40, help="Number of levels to display"
    )
    parser.add_argument(
        "--numeric",
        action="store_true",
        help="Print energy numeric values instead of showing level",
    )
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    mic_program = args.mic_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not mic_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        mic_program = pipeline.mic

    assert mic_program, "No mic program"
    max_energy = 0

    async with (await create_process(rhasspy, MIC_DOMAIN, mic_program)) as mic_proc:
        assert mic_proc.stdout is not None

        while True:
            event = await async_read_event(mic_proc.stdout)
            if event is None:
                break

            if AudioChunk.is_type(event.type):
                chunk = AudioChunk.from_event(event)
                energy = -audioop.rms(chunk.audio, chunk.width)
                energy_bytes = bytes([energy & 0xFF, (energy >> 8) & 0xFF])
                debiased_energy = audioop.rms(
                    audioop.add(
                        chunk.audio,
                        energy_bytes * (len(chunk.audio) // chunk.width),
                        chunk.width,
                    ),
                    chunk.width,
                )

                max_energy = max(max_energy, debiased_energy)
                max_energy = max(1, max_energy)

                if args.numeric:
                    # Print numbers
                    print(debiased_energy, "/", max_energy)
                else:
                    # Print graphic
                    energy_level = int(args.levels * (debiased_energy / max_energy))
                    energy_level = max(0, energy_level)
                    print(
                        "\r",  # We still use typewriters!
                        "[",
                        "*" * energy_level,
                        " " * (args.levels - energy_level),
                        "]",
                        sep="",
                        end="",
                    )

            elif AudioStop.is_type(event.type):
                break


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        pass


================================================
FILE: bin/pipeline_run.py
================================================
#!/usr/bin/env python3
"""Run a pipeline all or part of the way."""
import argparse
import asyncio
import json
import logging
import sys
from pathlib import Path
from typing import IO, Optional, Union

from rhasspy3.asr import Transcript
from rhasspy3.audio import DEFAULT_SAMPLES_PER_CHUNK
from rhasspy3.core import Rhasspy
from rhasspy3.event import Event
from rhasspy3.handle import Handled, NotHandled
from rhasspy3.intent import Intent, NotRecognized
from rhasspy3.pipeline import StopAfterDomain
from rhasspy3.pipeline import run as run_pipeline
from rhasspy3.wake import Detection

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    #
    parser.add_argument(
        "--stop-after",
        choices=[domain.value for domain in StopAfterDomain],
        help="Domain to stop pipeline after",
    )
    #
    parser.add_argument(
        "--wake-name", help="Skip wake word detection and use name instead"
    )
    parser.add_argument(
        "--asr-wav",
        help="Use WAV file for speech to text instead of mic input (skips wake)",
    )
    parser.add_argument("--asr-text", help="Use text for asr transcript (skips wake)")
    parser.add_argument(
        "--intent-json", help="Use JSON for recognized intent (skips wake, asr)"
    )
    parser.add_argument(
        "--handle-text", help="Use text for handle response (skips handle)"
    )
    parser.add_argument(
        "--tts-wav", help="Play WAV file instead of text to speech response (skips tts)"
    )

    parser.add_argument(
        "--samples-per-chunk", type=int, default=DEFAULT_SAMPLES_PER_CHUNK
    )
    parser.add_argument("--asr-chunks-to-buffer", type=int, default=0)
    parser.add_argument("--loop", action="store_true", help="Keep pipeline running")
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    wake_detection: Optional[Detection] = None
    if args.wake_name:
        # Wake word detection will be skipped
        wake_detection = Detection(name=args.wake_name)

    asr_wav_in: Optional[IO[bytes]] = None
    if args.asr_wav:
        # asr input will come from WAV file instead of mic
        asr_wav_in = open(args.asr_wav, "rb")

    asr_transcript: Optional[Transcript] = None
    if args.asr_text:
        # asr transcription will be skipped
        asr_transcript = Transcript(text=args.asr_text)

    intent_result: Optional[Union[Intent, NotRecognized]] = None
    if args.intent_json:
        # intent recognition will be skipped
        intent_event = Event.from_dict(json.loads(args.intent_json))
        if Intent.is_type(intent_event.type):
            intent_result = Intent.from_event(intent_event)
        elif NotRecognized.is_type(intent_event.type):
            intent_result = NotRecognized.from_event(intent_event)

    handle_result: Optional[Union[Handled, NotHandled]] = None
    if args.handle_text:
        # text/intent handling will be skipped
        handle_result = Handled(text=args.handle_text)

    tts_wav_in: Optional[IO[bytes]] = None
    if args.tts_wav:
        # tts synthesis will be skipped
        tts_wav_in = open(args.tts_wav, "rb")

    rhasspy = Rhasspy.load(args.config)

    while True:
        pipeline_result = await run_pipeline(
            rhasspy,
            args.pipeline,
            samples_per_chunk=args.samples_per_chunk,
            asr_chunks_to_buffer=args.asr_chunks_to_buffer,
            wake_detection=wake_detection,
            asr_wav_in=asr_wav_in,
            asr_transcript=asr_transcript,
            intent_result=intent_result,
            handle_result=handle_result,
            tts_wav_in=tts_wav_in,
            stop_after=args.stop_after,
        )

        json.dump(pipeline_result.to_dict(), sys.stdout, ensure_ascii=False)
        print("")

        if not args.loop:
            break


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        pass


================================================
FILE: bin/program_download.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import shlex
import string
import subprocess
from pathlib import Path

from rhasspy3.core import Rhasspy

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("domain")
    parser.add_argument("program")
    parser.add_argument("model")
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    program_config = rhasspy.config.programs.get(args.domain, {}).get(args.program)
    assert program_config is not None, f"No config for {args.domain} {args.program}"

    install = program_config.install
    assert install is not None, f"No install config for {args.domain} {args.program}"

    downloads = install.downloads
    assert downloads is not None, f"No downloads for {args.domain} {args.program}"

    model = downloads.get(args.model)
    assert (
        model is not None
    ), f"No download named {args.model} for {args.domain} {args.program}"

    program_dir = rhasspy.programs_dir / args.domain / args.program
    data_dir = rhasspy.data_dir / args.domain / args.program

    default_mapping = {
        "program_dir": str(program_dir.absolute()),
        "data_dir": str(data_dir.absolute()),
        "model": str(args.model),
    }

    # Check if already installed
    if model.check_file is not None:
        check_file = Path(
            string.Template(model.check_file).safe_substitute(default_mapping)
        )
        if check_file.exists():
            _LOGGER.info("Installed: %s", check_file)
            return

    download = install.download
    assert download is not None, f"No download config for {args.domain} {args.program}"

    download_command = string.Template(download.command).safe_substitute(
        default_mapping
    )
    _LOGGER.info(download_command)

    cwd = program_dir if program_dir.exists() else rhasspy.config_dir

    if download.shell:
        subprocess.check_call(download_command, shell=True, cwd=cwd)
    else:
        subprocess.check_call(shlex.split(download_command), cwd=cwd)


if __name__ == "__main__":
    main()


================================================
FILE: bin/program_install.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import shlex
import string
import subprocess
from pathlib import Path

from rhasspy3.core import Rhasspy

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("domain")
    parser.add_argument("program")
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    program_config = rhasspy.config.programs.get(args.domain, {}).get(args.program)
    assert program_config is not None, f"No config for {args.domain} {args.program}"

    install = program_config.install
    assert install is not None, f"No install config for {args.domain} {args.program}"

    program_dir = rhasspy.programs_dir / args.domain / args.program
    data_dir = rhasspy.data_dir / args.domain / args.program

    default_mapping = {
        "program_dir": str(program_dir.absolute()),
        "data_dir": str(data_dir.absolute()),
    }

    # Check if already installed
    if install.check_file is not None:
        check_file = Path(
            string.Template(install.check_file).safe_substitute(default_mapping)
        )
        if check_file.exists():
            _LOGGER.info("Installed: %s", check_file)
            return

    install_command = string.Template(install.command).safe_substitute(default_mapping)
    _LOGGER.debug(install_command)

    cwd = program_dir if program_dir.exists() else rhasspy.config_dir

    if install.shell:
        subprocess.check_call(install_command, shell=True, cwd=cwd)
    else:
        subprocess.check_call(shlex.split(install_command), cwd=cwd)


if __name__ == "__main__":
    main()


================================================
FILE: bin/satellite_run.py
================================================
#!/usr/bin/env python3
"""Run satellite loop."""
import argparse
import asyncio
import logging
from collections import deque
from pathlib import Path
from typing import Deque, List

from rhasspy3.audio import AudioChunk, AudioStop
from rhasspy3.core import Rhasspy
from rhasspy3.event import Event, async_read_event, async_write_event
from rhasspy3.mic import DOMAIN as MIC_DOMAIN
from rhasspy3.program import create_process
from rhasspy3.remote import DOMAIN as REMOTE_DOMAIN
from rhasspy3.snd import DOMAIN as SND_DOMAIN
from rhasspy3.snd import Played
from rhasspy3.wake import detect

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-s", "--satellite", default="default", help="Name of satellite to use"
    )
    #
    parser.add_argument(
        "--mic-program",
        help="Program to use for mic input (overrides satellite)",
    )
    parser.add_argument(
        "--wake-program",
        help="Program to use for wake word detection (overiddes satellite)",
    )
    parser.add_argument(
        "--remote-program",
        help="Program to use for remote communication with base station (overrides satellite)",
    )
    parser.add_argument(
        "--snd-program",
        help="Program to use for audio output (overrides satellite)",
    )
    #
    parser.add_argument("--asr-chunks-to-buffer", type=int, default=0)
    #
    parser.add_argument("--loop", action="store_true", help="Keep satellite running")
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    mic_program = args.mic_program
    wake_program = args.wake_program
    remote_program = args.remote_program
    snd_program = args.snd_program
    satellite = rhasspy.config.satellites.get(args.satellite)

    if not mic_program:
        assert satellite is not None, f"No satellite named {args.satellite}"
        mic_program = satellite.mic

    assert mic_program, "No mic program"

    if not wake_program:
        assert satellite is not None, f"No satellite named {args.satellite}"
        wake_program = satellite.wake

    assert wake_program, "No wake program"

    if not remote_program:
        assert satellite is not None, f"No satellite named {args.satellite}"
        remote_program = satellite.remote

    assert remote_program, "No remote program"

    if not snd_program:
        assert satellite is not None, f"No satellite named {args.satellite}"
        snd_program = satellite.snd

    assert snd_program, "No snd program"

    while True:
        chunk_buffer: Deque[Event] = deque(maxlen=args.asr_chunks_to_buffer)
        snd_buffer: List[Event] = []

        async with (await create_process(rhasspy, MIC_DOMAIN, mic_program)) as mic_proc:
            assert mic_proc.stdout is not None

            detection = await detect(
                rhasspy, wake_program, mic_proc.stdout, chunk_buffer
            )
            if detection is None:
                continue

            async with (
                await create_process(rhasspy, REMOTE_DOMAIN, remote_program)
            ) as remote_proc:
                assert remote_proc.stdin is not None
                assert remote_proc.stdout is not None

                while chunk_buffer:
                    await async_write_event(chunk_buffer.pop(), remote_proc.stdin)

                mic_task = asyncio.create_task(async_read_event(mic_proc.stdout))
                remote_task = asyncio.create_task(async_read_event(remote_proc.stdout))
                pending = {mic_task, remote_task}

                try:
                    # Stream to remote until audio is received
                    while True:
                        done, pending = await asyncio.wait(
                            pending, return_when=asyncio.FIRST_COMPLETED
                        )

                        if mic_task in done:
                            mic_event = mic_task.result()
                            if mic_event is None:
                                break

                            if AudioChunk.is_type(mic_event.type):
                                await async_write_event(mic_event, remote_proc.stdin)

                            mic_task = asyncio.create_task(
                                async_read_event(mic_proc.stdout)
                            )
                            pending.add(mic_task)

                        if remote_task in done:
                            remote_event = remote_task.result()
                            if remote_event is not None:
                                snd_buffer.append(remote_event)

                            for task in pending:
                                task.cancel()

                            break

                    # Output audio
                    async with (
                        await create_process(rhasspy, SND_DOMAIN, snd_program)
                    ) as snd_proc:
                        assert snd_proc.stdin is not None
                        assert snd_proc.stdout is not None

                        for remote_event in snd_buffer:
                            if AudioChunk.is_type(remote_event.type):
                                await async_write_event(remote_event, snd_proc.stdin)
                            elif AudioStop.is_type(remote_event.type):
                                # Unexpected, but it could happen
                                continue

                        while True:
                            remote_event = await async_read_event(remote_proc.stdout)
                            if remote_event is None:
                                break

                            if AudioChunk.is_type(remote_event.type):
                                await async_write_event(remote_event, snd_proc.stdin)
                            elif AudioStop.is_type(remote_event.type):
                                await async_write_event(remote_event, snd_proc.stdin)
                                break

                        # Wait for audio to finish playing
                        while True:
                            snd_event = await async_read_event(snd_proc.stdout)
                            if snd_event is None:
                                break

                            if Played.is_type(snd_event.type):
                                break
                except Exception:
                    _LOGGER.exception(
                        "Unexpected error communicating with remote base station"
                    )

        if not args.loop:
            break


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        pass


================================================
FILE: bin/server_run.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import os
import shlex
import string
import subprocess
import sys
from pathlib import Path
from typing import List, Union

from rhasspy3.core import Rhasspy
from rhasspy3.util import merge_dict

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument("domain", help="Domain of server (asr, tts, etc.)")
    parser.add_argument("server", help="Name of server to run")

    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    server = rhasspy.config.servers[args.domain][args.server]

    program_dir = rhasspy.programs_dir / args.domain / args.server
    data_dir = rhasspy.data_dir / args.domain / args.server

    # ${variables} available within command and template args
    default_mapping = {
        "program_dir": str(program_dir.absolute()),
        "data_dir": str(data_dir.absolute()),
    }

    command_str = server.command
    command_mapping = dict(default_mapping)

    if server.template_args:
        # Substitute within template args
        args_mapping = dict(server.template_args)
        for arg_name, arg_str in args_mapping.items():
            if not isinstance(arg_str, str):
                continue

            arg_template = string.Template(arg_str)
            args_mapping[arg_name] = arg_template.safe_substitute(default_mapping)

        merge_dict(command_mapping, args_mapping)

    command_template = string.Template(command_str)
    command_str = command_template.safe_substitute(command_mapping)

    env = dict(os.environ)

    # Add rhasspy3/bin to $PATH
    env["PATH"] = f'{rhasspy.base_dir}/bin:${env["PATH"]}'

    # Ensure stdout is flushed for Python programs
    env["PYTHONUNBUFFERED"] = "1"

    server_dir = rhasspy.programs_dir / args.domain / args.server
    cwd = server_dir if server_dir.is_dir() else rhasspy.base_dir

    if server.shell:
        command: Union[str, List[str]] = command_str
    else:
        command = shlex.split(command_str)

    _LOGGER.debug(command)
    proc = subprocess.Popen(command, shell=server.shell, cwd=cwd, env=env)
    with proc:
        sys.exit(proc.wait())


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        pass


================================================
FILE: bin/snd_adapter_raw.py
================================================
#!/usr/bin/env python3
"""Play audio through a command that accepts raw PCM."""
import argparse
import logging
import shlex
import subprocess
from pathlib import Path

from rhasspy3.audio import (
    DEFAULT_OUT_CHANNELS,
    DEFAULT_OUT_RATE,
    DEFAULT_OUT_WIDTH,
    AudioChunk,
    AudioChunkConverter,
    AudioStop,
)
from rhasspy3.event import read_event, write_event
from rhasspy3.snd import Played

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "command",
        help="Command to run",
    )
    parser.add_argument(
        "--rate", type=int, default=DEFAULT_OUT_RATE, help="Sample rate (hertz)"
    )
    parser.add_argument(
        "--width", type=int, default=DEFAULT_OUT_WIDTH, help="Sample width (bytes)"
    )
    parser.add_argument(
        "--channels",
        type=int,
        default=DEFAULT_OUT_CHANNELS,
        help="Sample channel count",
    )
    parser.add_argument("--shell", action="store_true", help="Run command with shell")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    if args.shell:
        command = args.command
    else:
        command = shlex.split(args.command)

    try:
        proc = subprocess.Popen(
            command, stdin=subprocess.PIPE, stdout=subprocess.DEVNULL
        )
        assert proc.stdin is not None

        converter = AudioChunkConverter(args.rate, args.width, args.channels)
        with proc:
            while True:
                event = read_event()
                if event is None:
                    break

                if AudioChunk.is_type(event.type):
                    chunk = AudioChunk.from_event(event)
                    chunk = converter.convert(chunk)
                    proc.stdin.write(chunk.audio)
                    proc.stdin.flush()
                elif AudioStop.is_type(event.type):
                    break
    finally:
        write_event(Played().event())


if __name__ == "__main__":
    main()


================================================
FILE: bin/snd_play.py
================================================
#!/usr/bin/env python3
import argparse
import asyncio
import logging
import os
import sys
from pathlib import Path

from rhasspy3.audio import DEFAULT_SAMPLES_PER_CHUNK
from rhasspy3.core import Rhasspy
from rhasspy3.snd import play

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("wav_file", nargs="*", help="Path to WAV file(s) to play")
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument("-p", "--pipeline", default="default", help="Name of pipeline")
    parser.add_argument("--snd-program", help="Audio output program name")
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=DEFAULT_SAMPLES_PER_CHUNK,
        help="Samples to send to snd program at a time",
    )
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    snd_program = args.snd_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not snd_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        snd_program = pipeline.snd

    assert snd_program, "No snd program"

    if args.wav_file:
        for wav_path in args.wav_file:
            with open(wav_path, "rb") as wav_file:
                await play(rhasspy, snd_program, wav_file, args.samples_per_chunk)
    else:
        if os.isatty(sys.stdin.fileno()):
            print("Reading WAV data from stdin", file=sys.stderr)

        await play(
            rhasspy,
            snd_program,
            sys.stdin.buffer,
            args.samples_per_chunk,
        )


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: bin/tts_adapter_http.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import wave
from pathlib import Path
from urllib.parse import urlencode
from urllib.request import urlopen

from rhasspy3.audio import DEFAULT_SAMPLES_PER_CHUNK, AudioChunk, AudioStart, AudioStop
from rhasspy3.event import read_event, write_event
from rhasspy3.tts import Synthesize

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "url",
        help="URL of API endpoint",
    )
    parser.add_argument(
        "--param",
        nargs=2,
        action="append",
        metavar=("name", "value"),
        help="Name/value of query parameter",
    )
    #
    parser.add_argument(
        "--samples-per-chunk", type=int, default=DEFAULT_SAMPLES_PER_CHUNK
    )
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    params = {}
    if args.param:
        for key, value in params.items():
            # Don't include empty parameters
            if value:
                params[key] = value

    try:
        while True:
            event = read_event()
            if event is None:
                break

            if Synthesize.is_type(event.type):
                synthesize = Synthesize.from_event(event)

                params["text"] = synthesize.text
                url = args.url + "?" + urlencode(params)

                with urlopen(url) as response:
                    with wave.open(response, "rb") as wav_file:
                        rate = wav_file.getframerate()
                        width = wav_file.getsampwidth()
                        channels = wav_file.getnchannels()

                        num_frames = wav_file.getnframes()
                        audio_bytes = wav_file.readframes(num_frames)

                bytes_per_chunk = args.samples_per_chunk * width
                timestamp = 0
                write_event(
                    AudioStart(rate, width, channels, timestamp=timestamp).event()
                )
                while audio_bytes:
                    chunk = AudioChunk(
                        rate,
                        width,
                        channels,
                        audio_bytes[:bytes_per_chunk],
                        timestamp=timestamp,
                    )
                    write_event(chunk.event())
                    timestamp += chunk.milliseconds
                    audio_bytes = audio_bytes[bytes_per_chunk:]

                write_event(AudioStop(timestamp=timestamp).event())
    except KeyboardInterrupt:
        pass


if __name__ == "__main__":
    main()


================================================
FILE: bin/tts_adapter_text2wav.py
================================================
#!/usr/bin/env python3
"""
Runs a text to speech command that returns WAV audio on stdout or in a temp file.
"""
import argparse
import io
import logging
import shlex
import subprocess
import tempfile
import wave
from pathlib import Path

from rhasspy3.audio import DEFAULT_SAMPLES_PER_CHUNK, AudioChunk, AudioStart, AudioStop
from rhasspy3.event import read_event, write_event
from rhasspy3.tts import Synthesize

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "command",
        help="Command to run",
    )
    parser.add_argument(
        "--temp_file",
        action="store_true",
        help="Command has {temp_file} and will write output to it",
    )
    parser.add_argument(
        "--text",
        action="store_true",
        help="Command has {text} argument",
    )
    #
    parser.add_argument(
        "--samples-per-chunk", type=int, default=DEFAULT_SAMPLES_PER_CHUNK
    )
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    try:
        while True:
            event = read_event()
            if event is None:
                break

            if Synthesize.is_type(event.type):
                synthesize = Synthesize.from_event(event)
                wav_bytes = text_to_wav(args, synthesize.text)
                with io.BytesIO(wav_bytes) as wav_io:
                    with wave.open(wav_io, "rb") as wav_file:
                        rate = wav_file.getframerate()
                        width = wav_file.getsampwidth()
                        channels = wav_file.getnchannels()

                        num_frames = wav_file.getnframes()
                        audio_bytes = wav_file.readframes(num_frames)

                bytes_per_chunk = args.samples_per_chunk * width
                timestamp = 0
                write_event(
                    AudioStart(rate, width, channels, timestamp=timestamp).event()
                )
                while audio_bytes:
                    chunk = AudioChunk(
                        rate,
                        width,
                        channels,
                        audio_bytes[:bytes_per_chunk],
                        timestamp=timestamp,
                    )
                    write_event(chunk.event())
                    timestamp += chunk.milliseconds
                    audio_bytes = audio_bytes[bytes_per_chunk:]

                write_event(AudioStop(timestamp=timestamp).event())
    except KeyboardInterrupt:
        pass


def text_to_wav(args: argparse.Namespace, text: str) -> bytes:
    command_str = args.command
    format_args = {}
    if args.text:
        format_args["text"] = text
        text = ""  # Pass as arg instead

    if args.temp_file:
        with tempfile.NamedTemporaryFile(mode="wb", suffix=".wav") as wav_file:
            format_args["temp_file"] = wav_file.name
            command_str = command_str.format(**format_args)
            command = shlex.split(command_str)

            # Send stdout to devnull so it doesn't interfere with our events
            subprocess.run(
                command, check=True, stdout=subprocess.DEVNULL, input=text.encode()
            )
            wav_file.seek(0)
            return Path(wav_file.name).read_bytes()

    else:
        command_str = command_str.format(**format_args)
        command = shlex.split(command_str)
        return subprocess.check_output(command, input=text.encode())


if __name__ == "__main__":
    main()


================================================
FILE: bin/tts_speak.py
================================================
#!/usr/bin/env python3
"""Synthesize and speak audio."""
import argparse
import asyncio
import io
import json
import logging
import os
import sys
from pathlib import Path

from rhasspy3.core import Rhasspy
from rhasspy3.snd import play
from rhasspy3.tts import synthesize

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("text", nargs="*", help="Text to speak (default: stdin)")
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument("-p", "--pipeline", default="default", help="Name of pipeline")
    parser.add_argument("--tts-program", help="TTS program name")
    parser.add_argument("--snd-program", help="Audio output program name")
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=1024,
        help="Samples to send to snd program at a time",
    )
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    tts_program = args.tts_program
    snd_program = args.snd_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not tts_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        tts_program = pipeline.tts

    if not snd_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        snd_program = pipeline.snd

    assert tts_program, "No tts program"
    assert snd_program, "No snd program"

    if args.text:
        lines = args.text
    else:
        lines = sys.stdin
        if os.isatty(sys.stdin.fileno()):
            print("Reading text from stdin", file=sys.stderr)

    for line in lines:
        line = line.strip()
        if not line:
            continue

        with io.BytesIO() as wav_io:
            await synthesize(rhasspy, tts_program, line, wav_io)
            wav_io.seek(0)
            play_result = await play(
                rhasspy, snd_program, wav_io, args.samples_per_chunk
            )
            if play_result is not None:
                json.dump(play_result.event().to_dict(), sys.stdout, ensure_ascii=False)
                print("", flush=True)


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: bin/tts_synthesize.py
================================================
#!/usr/bin/env python3
"""Synthesize WAV audio from text."""
import argparse
import asyncio
import io
import logging
import sys
from pathlib import Path

from rhasspy3.core import Rhasspy
from rhasspy3.tts import synthesize

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("text", help="Text to speak")
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument("-p", "--pipeline", default="default", help="Name of pipeline")
    parser.add_argument("--tts-program", help="TTS program name")
    parser.add_argument("-f", "--file", help="Write to file instead of stdout")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    tts_program = args.tts_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not tts_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        tts_program = pipeline.tts

    assert tts_program, "No tts program"

    with io.BytesIO() as wav_out:
        await synthesize(rhasspy, tts_program, args.text, wav_out)
        wav_bytes = wav_out.getvalue()

        if args.file:
            output_path = Path(args.file)
            output_path.parent.mkdir(parents=True, exist_ok=True)
            output_path.write_bytes(wav_bytes)
        else:
            sys.stdout.buffer.write(wav_bytes)


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: bin/vad_adapter_raw.py
================================================
#!/usr/bin/env python3
"""Voice activity detection programs that accept raw PCM audio and print a speech probability for each chunk."""
import argparse
import logging
import shlex
import subprocess
import time
from pathlib import Path
from typing import Optional

from rhasspy3.audio import AudioChunk, AudioChunkConverter, AudioStop
from rhasspy3.event import read_event, write_event
from rhasspy3.vad import Segmenter, VoiceStarted, VoiceStopped

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "command",
        help="Command to run",
    )
    #
    parser.add_argument(
        "--rate",
        type=int,
        required=True,
        help="Sample rate (hz)",
    )
    parser.add_argument(
        "--width",
        type=int,
        required=True,
        help="Sample width bytes",
    )
    parser.add_argument(
        "--channels",
        type=int,
        required=True,
        help="Sample channel count",
    )
    parser.add_argument(
        "--samples-per-chunk",
        required=True,
        type=int,
        help="Samples to send to command at a time",
    )
    #
    parser.add_argument(
        "--threshold",
        type=float,
        default=0.5,
        help="Speech probability threshold (0-1)",
    )
    parser.add_argument(
        "--speech-seconds",
        type=float,
        default=0.3,
    )
    parser.add_argument(
        "--silence-seconds",
        type=float,
        default=0.5,
    )
    parser.add_argument(
        "--timeout-seconds",
        type=float,
        default=15.0,
    )
    parser.add_argument(
        "--reset-seconds",
        type=float,
        default=1,
    )
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    bytes_per_chunk = args.samples_per_chunk * args.width * args.channels
    seconds_per_chunk = args.samples_per_chunk / args.rate

    command = shlex.split(args.command)
    with subprocess.Popen(
        command, stdin=subprocess.PIPE, stdout=subprocess.PIPE
    ) as proc:
        assert proc.stdin is not None
        assert proc.stdout is not None

        segmenter = Segmenter(
            args.speech_seconds,
            args.silence_seconds,
            args.timeout_seconds,
            args.reset_seconds,
        )
        converter = AudioChunkConverter(args.rate, args.width, args.channels)
        audio_bytes = bytes()
        is_first_audio = True
        sent_started = False
        sent_stopped = False
        last_stop_timestamp: Optional[int] = None

        while True:
            event = read_event()
            if event is None:
                break

            if AudioChunk.is_type(event.type):
                if is_first_audio:
                    _LOGGER.debug("Receiving audio")
                    is_first_audio = False

                chunk = AudioChunk.from_event(event)
                chunk = converter.convert(chunk)
                audio_bytes += chunk.audio
                timestamp = (
                    time.monotonic_ns() if chunk.timestamp is None else chunk.timestamp
                )
                last_stop_timestamp = timestamp + chunk.milliseconds

                # Handle uneven chunk sizes
                while len(audio_bytes) >= bytes_per_chunk:
                    chunk_bytes = audio_bytes[:bytes_per_chunk]
                    proc.stdin.write(chunk_bytes)
                    proc.stdin.flush()

                    line = proc.stdout.readline().decode()
                    if line:
                        speech_probability = float(line)
                        is_speech = speech_probability > args.threshold
                        segmenter.process(
                            chunk=chunk_bytes,
                            chunk_seconds=seconds_per_chunk,
                            is_speech=is_speech,
                            timestamp=timestamp,
                        )

                        if (not sent_started) and segmenter.started:
                            _LOGGER.debug("Voice started")
                            write_event(
                                VoiceStarted(
                                    timestamp=segmenter.start_timestamp
                                ).event()
                            )
                            sent_started = True

                        if (not sent_stopped) and segmenter.stopped:
                            if segmenter.timeout:
                                _LOGGER.info("Voice timeout")
                            else:
                                _LOGGER.debug("Voice stopped")

                            write_event(
                                VoiceStopped(timestamp=segmenter.stop_timestamp).event()
                            )
                            sent_stopped = True

                    audio_bytes = audio_bytes[bytes_per_chunk:]

            elif AudioStop.is_type(event.type):
                _LOGGER.debug("Audio stopped")
                if not sent_stopped:
                    write_event(VoiceStopped(timestamp=last_stop_timestamp).event())
                    sent_stopped = True

                proc.stdin.close()
                break


if __name__ == "__main__":
    main()


================================================
FILE: bin/vad_segment_wav.py
================================================
#!/usr/bin/env python3
"""Prints voice start/stop in WAV file."""
import argparse
import asyncio
import io
import json
import logging
import sys
import time
import wave
from pathlib import Path
from typing import Iterable, Optional

from rhasspy3.audio import DEFAULT_SAMPLES_PER_CHUNK, AudioChunk, AudioStart, AudioStop
from rhasspy3.core import Rhasspy
from rhasspy3.event import async_read_event, async_write_event
from rhasspy3.program import create_process
from rhasspy3.vad import DOMAIN, VoiceStarted, VoiceStopped

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    parser.add_argument(
        "--vad-program", help="Name of vad program to use (overrides pipeline)"
    )
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=DEFAULT_SAMPLES_PER_CHUNK,
        help="Samples to process at a time",
    )
    parser.add_argument("wav", nargs="*", help="Path(s) to WAV file(s)")
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    vad_program = args.vad_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not vad_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        vad_program = pipeline.vad

    assert vad_program, "No vad program"

    async with (await create_process(rhasspy, DOMAIN, vad_program)) as vad_proc:
        assert vad_proc.stdin is not None
        assert vad_proc.stdout is not None

        for wav_bytes in get_wav_bytes(args):
            with io.BytesIO(wav_bytes) as wav_io:
                with wave.open(wav_io, "rb") as wav_file:
                    rate = wav_file.getframerate()
                    width = wav_file.getsampwidth()
                    channels = wav_file.getnchannels()

                    timestamp = 0
                    await async_write_event(
                        AudioStart(rate, width, channels, timestamp=timestamp).event(),
                        vad_proc.stdin,
                    )

                    audio_bytes = wav_file.readframes(args.samples_per_chunk)
                    while audio_bytes:
                        chunk = AudioChunk(
                            rate, width, channels, audio_bytes, timestamp=timestamp
                        )
                        await async_write_event(
                            chunk.event(),
                            vad_proc.stdin,
                        )
                        timestamp += chunk.milliseconds
                        audio_bytes = wav_file.readframes(args.samples_per_chunk)

                    await async_write_event(
                        AudioStop(timestamp=timestamp).event(), vad_proc.stdin
                    )

            voice_started: Optional[VoiceStarted] = None
            voice_stopped: Optional[VoiceStopped] = None
            while True:
                event = await async_read_event(vad_proc.stdout)
                if event is None:
                    break

                if VoiceStarted.is_type(event.type):
                    voice_started = VoiceStarted.from_event(event)
                    if voice_started.timestamp is None:
                        voice_started.timestamp = time.monotonic_ns()

                    _LOGGER.debug(voice_started)
                    json.dump(
                        voice_started.event().to_dict(), sys.stdout, ensure_ascii=False
                    )
                    print("", flush=True)
                elif VoiceStopped.is_type(event.type):
                    voice_stopped = VoiceStopped.from_event(event)
                    if voice_stopped.timestamp is None:
                        voice_stopped.timestamp = time.monotonic_ns()

                    _LOGGER.debug(voice_stopped)
                    json.dump(
                        voice_stopped.event().to_dict(), sys.stdout, ensure_ascii=False
                    )
                    print("", flush=True)
                    break


def get_wav_bytes(args: argparse.Namespace) -> Iterable[bytes]:
    """Yields WAV audio from stdin or args."""
    if args.wav:
        # WAV file path(s)
        for wav_path in args.wav:
            with open(wav_path, "rb") as wav_file:
                yield wav_file.read()
    else:
        # WAV on stdin
        yield sys.stdin.buffer.read()


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: bin/wake_adapter_raw.py
================================================
#!/usr/bin/env python3
"""Wake word detection with a command that accepts raw PCM audio and prints a line for each detection."""
import argparse
import logging
import shlex
import subprocess
import threading
import time
from dataclasses import dataclass
from pathlib import Path
from typing import IO

from rhasspy3.audio import AudioChunk, AudioStop
from rhasspy3.event import read_event, write_event
from rhasspy3.wake import Detection, NotDetected

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


@dataclass
class State:
    timestamp: int = 0
    detected: bool = False


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "command",
        help="Command to run",
    )
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    command = shlex.split(args.command)
    with subprocess.Popen(
        command, stdin=subprocess.PIPE, stdout=subprocess.PIPE
    ) as proc:
        assert proc.stdin is not None
        assert proc.stdout is not None

        state = State()
        threading.Thread(target=write_proc, args=(proc.stdout, state)).start()

        while not state.detected:
            event = read_event()
            if event is None:
                break

            if AudioChunk.is_type(event.type):
                chunk = AudioChunk.from_event(event)
                state.timestamp = (
                    chunk.timestamp
                    if chunk.timestamp is not None
                    else time.monotonic_ns()
                )
                proc.stdin.write(chunk.audio)
                proc.stdin.flush()
            elif AudioStop.is_type(event.type):
                proc.stdin.close()
                break

    if not state.detected:
        write_event(NotDetected().event())


def write_proc(reader: IO[bytes], state: State):
    try:
        for line in reader:
            line = line.strip()
            if line:
                write_event(
                    Detection(name=line.decode(), timestamp=state.timestamp).event()
                )
                state.detected = True
                break
    except Exception:
        _LOGGER.exception("Unexpected error in write thread")


if __name__ == "__main__":
    main()


================================================
FILE: bin/wake_detect.py
================================================
#!/usr/bin/env python3
"""Wait for wake word to be detected."""
import argparse
import asyncio
import json
import logging
import sys
from pathlib import Path

from rhasspy3.core import Rhasspy
from rhasspy3.mic import DOMAIN as MIC_DOMAIN
from rhasspy3.program import create_process
from rhasspy3.wake import detect

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "-p", "--pipeline", default="default", help="Name of pipeline to use"
    )
    parser.add_argument(
        "--mic-program", help="Name of mic program to use (overrides pipeline)"
    )
    parser.add_argument(
        "--wake-program", help="Name of wake program to use (overrides pipeline)"
    )
    #
    parser.add_argument("--loop", action="store_true", help="Keep detecting wake words")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    mic_program = args.mic_program
    wake_program = args.wake_program
    pipeline = rhasspy.config.pipelines.get(args.pipeline)

    if not mic_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        mic_program = pipeline.mic

    assert mic_program, "No mic program"
    _LOGGER.debug("mic program: %s", mic_program)

    if not wake_program:
        assert pipeline is not None, f"No pipeline named {args.pipeline}"
        wake_program = pipeline.wake

    assert wake_program, "No wake program"
    _LOGGER.debug("wake program: %s", wake_program)

    # Detect wake word
    while True:
        async with (await create_process(rhasspy, MIC_DOMAIN, mic_program)) as mic_proc:
            assert mic_proc.stdout is not None
            _LOGGER.debug("Detecting wake word")
            detection = await detect(rhasspy, wake_program, mic_proc.stdout)
            if detection is not None:
                json.dump(detection.event().to_dict(), sys.stdout, ensure_ascii=False)
                print("", flush=True)

        if not args.loop:
            break


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        pass


================================================
FILE: docs/README.md
================================================
# Rhasspy 3

* [Tutorial](tutorial.md)
* [Domains](domains.md)
* [Wyoming Protcol](wyoming.md)
* [Adapters](adapters.md)


================================================
FILE: docs/adapters.md
================================================
# Adapters

Scripts in `bin/`:

* `asr_adapter_raw2text.py`
    * Raw audio stream in, text or JSON out
* `asr_adapter_wav2text.py`
    * WAV file(s) in, text or JSON out (per file)
* `handle_adapter_json.py`
    * Intent JSON in, text response out
* `handle_adapter_text.py`
    * Transcription in, text response out
* `mic_adapter_raw.py`
    * Raw audio stream in
* `snd_adapter_raw.py`
    * Raw audio stream out
* `tts_adapter_http.py`
    * HTTP POST to endpoint with text, WAV out
* `tts_adapter_text2wav.py`
    * Text in, WAV out
* `vad_adapter_raw.py`
    * Raw audio stream in, speech probability out (one line per chunk)
* `wake_adapter_raw.py`
    * Raw audio stream in, name of detected model out (one line per detection)
* `client_unix_socket.py`
    * Send/receive events over Unix domain socket


![Wyoming protocol adapter](img/adapter.png)


================================================
FILE: docs/domains.md
================================================
# Domains

Programs belong to a specific domain. This defines the kinds of [events](wyoming.md) they are expected to receive and emit.

## mic

Emits `audio-chunk` events, ideally with a `timestamp`.


## wake

Receives `audio-chunk` events.
Emits `detection` event(s) or a `not-detected` event if the program exits without a detection.


## asr

Receives an `audio-start` event, followed by zero or more `audio-chunk` events.

An `audio-stop` event must trigger a `transcript` event to be emitted.


## vad

Receives `audio-chunk` events.

Emits `voice-started` with the `timestamp` of the `audio-chunk` when the user started speaking.

Emits `voice-stopped` with the `timestamp` of the `audio-chunk` when the user finished speaking.


## intent

Optional. The `handle` domain can handle `transcript` events directly.

Receives `recognize` events.

Emits either an `intent` or a `not-recognized` event.


## handle

Receives one of the following event types: `transcript`, `intent`, or `not-recognized`.

Emits either a `handle` or `not-handled` event.


## tts

Receives a `synthesize` event.

Emits an `audio-start` event followed by zero or more `audio-chunk` events, and then an `audio-stop` event.


## snd

Receives `audio-chunk` events until an `audio-stop` event.

Must emit `played` event when audio has finished playing.


================================================
FILE: docs/home_assistant.md
================================================
# Home Assistant

This will connect Rhasspy to Home Assistant via [Assist](https://www.home-assistant.io/docs/assist).

Install the Home Assistant intent handler:

```sh
mkdir -p config/programs/handle/
cp -R programs/handle/home_assistant config/programs/handle/
```

Create a long-lived access token in Home Assistant (inside your profile):

![Long-lived access token](img/ha_token.png)

Copy the **entire** access token (with CTRL+A, not just selecting what you can see) and put it in the data directory:

```sh
mkdir -p config/data/handle/home_assistant/
echo "MY-LONG-LIVED-ACCESS-TOKEN" > config/data/handle/home_assistant/token
```

Add to your `configuration.yaml`:


```yaml
programs:
  handle:
    home_assistant:
      command: |
        bin/converse.py --language "${language}" "${url}" "${token_file}"
      adapter: |
        handle_adapter_text.py
      template_args:
        url: "http://localhost:8123/api/conversation/process"
        token_file: "${data_dir}/token"
        language: "en"

pipelines:
  default:
    mic: ...
    vad: ...
    asr: ...
    wake: ...
    handle:
      name: home_assistant
    tts: ...
    snd: ...
```

Make sure your Home Assistant server is running, and test out a command:

```sh
script/run bin/handle_text.py "Turn on the bed light"
```

Replace "bed light" with the name of a device you have connected to Home Assistant.

If successful, you should see JSON printed with the response text, like:

```sh
{"type": "handled", "data": {"text": "Turned on light"}}
```

This also works over HTTP:

```sh
curl -X POST --data 'Turn on the bed light' 'localhost:13331/handle/handle'
```

Now you can run your full pipeline and control Home Assistant!


================================================
FILE: docs/satellite.md
================================================
# Satellite

Once you have a Rhasspy HTTP server running, you can use Rhasspy as a satellite on a separate device.

**NOTE:** Rhasspy satellites do not need to run Python or any Rhasspy software. They can use the websocket API directly, or talk directly to a running pipeline.

On your satellite, clone the repo:

```sh
git clone https://github.com/rhasspy/rhasspy3
cd rhasspy3
```

Install the websocket utility:

```sh
mkdir -p config/programs/remote/
cp -R programs/remote/websocket config/programs/remote/
config/programs/remote/websocket/script/setup
```

Install [Porcupine](https://github.com/Picovoice/porcupine):

```sh
mkdir -p config/programs/wake/
cp -R programs/wake/porcupine1 config/programs/wake/
config/programs/wake/porcupine1/script/setup
```

Check available wake word models by running 

```sh
config/programs/wake/porcupine1/script/list_models
```

and choose one. We'll use "porcupine_linux.ppn" as an example, but this will be **different on a Raspberry Pi**.

Next, create `config/configuration.yaml` with:

```yaml
programs:
  mic:
    arecord:
      command: |
        arecord -q -r 16000 -c 1 -f S16_LE -t raw -
      adapter: |
        mic_adapter_raw.py --samples-per-chunk 1024 --rate 16000 --width 2 --channels 1

  wake:
    porcupine1:
      command: |
        .venv/bin/python3 bin/porcupine_stream.py --model "${model}"
      template_args:
        model: "porcupine_linux.ppn"

  remote:
    websocket:
      command: |
        script/run "${uri}"
      template_args:
        uri: "ws://localhost:13331/pipeline/asr-tts"

satellites:
  default:
    mic:
      name: arecord
    wake:
      name: porcupine1
    remote:
      name: websocket
    snd:
      name: aplay
```

Replace the model in `porcupine1` with your selection, and adjust the URI in `websocket` to point to your Rhasspy server.

Now you can run your satellite:

```sh
script/run bin/satellite_run.py --debug --loop
```

(say "porcupine", *pause*, say voice command, *wait*)

If everything is working, you should hear a response being spoken. Press CTRL+C to quit.


================================================
FILE: docs/tutorial.md
================================================
# Tutorial

Welcome to Rhasspy 3! This is a developer preview, so many of the manual steps here will be replaced with something more user-friendly in the future.


## Installing Rhasspy 3

To get started, just clone the repo. Rhasspy's core does not currently have any dependencies outside the Python standard library.

```sh
git clone https://github.com/rhasspy/rhasspy3
cd rhasspy3
```


## Layout

Installed programs and downloaded models are stored in the `config` directory, which is empty by default:

* `rhasspy3/config/`
    * `configuration.yaml` - overrides `rhasspy3/configuration.yaml`
    * `programs/` - installed programs
        * `<domain>/`
            * `<name>/`
    * `data/` - downloaded models
        * `<domain>/`
            * `<name>/`
            
Programs in Rhasspy are divided into [domains](domains.md).


## Configuration

Rhasspy loads two configuration files:

1. `rhasspy3/configuration.yaml` (base)
2. `config/configuration.yaml` (user)

The file in `config` will override the base configuration. You can see what the final configuration looks like with:

```sh
script/run bin/config_print.py
```


## Microphone

Programs that were not designed for Rhasspy can be used with [adapters](adapters.md).
For example, add the following to your `configuration.yaml` (in the `config` directory):

```yaml
programs:
  mic:
    arecord:
      command: |
        arecord -q -r 16000 -c 1 -f S16_LE -t raw -
      adapter: |
        mic_adapter_raw.py --rate 16000 --width 2 --channels 1

pipelines:
  default:
    mic:
      name: arecord
```

Now you can run a microphone test:

```sh
script/run bin/mic_test_energy.py
```

When speaking, you should see the bar change with volume. If not, check the available devices with `arecord -L` and update the `arecord` command in `configuration.yaml` with `-D <device_name>` (prefer devices that start with `plughw:`).

Press CTRL+C to quit.

Pipelines will be discussed later. For now, know that the pipeline named `default` will be run if you don't specify one. The mic test script can do this:

```sh
script/run bin/mic_test_energy.py --pipeline my-pipeline
```

You can also override the mic program:

```sh
script/run bin/mic_test_energy.py --mic-program other-program-from-config
```


## Voice Activity Detection

Let's install our first program, [Silero VAD](https://github.com/snakers4/silero-vad/).
Start by copying from `programs/` to `config/programs`, then run the setup script:

```sh
mkdir -p config/programs/vad/
cp -R programs/vad/silero config/programs/vad/
config/programs/vad/silero/script/setup
```

Once the setup script completes, add the following to your `configuration.yaml`:

```yaml
programs:
  mic: ...
  vad:
    silero:
      command: |
        script/speech_prob "share/silero_vad.onnx"
      adapter: |
        vad_adapter_raw.py --rate 16000 --width 2 --channels 1 --samples-per-chunk 512

pipelines:
  default:
    mic: ...
    vad:
      name: silero
```


This calls a command inside `config/programs/vad/silero` and uses an adapter. Notice that the command's working directory will always be `config/programs/<domain>/<name>`.

You can test out the voice activity detection (VAD) by recording an audio sample:

```sh
script/run bin/mic_record_sample.py sample.wav
```

Say something for a few seconds and then wait for the program to finish. Afterwards, listen to `sample.wav` and verify that it sounds correct. You may need to adjust microphone settings with `alsamixer`


## Speech to Text

Now for the fun part! We'll be installing [faster-whisper](https://github.com/guillaumekln/faster-whisper/), an optimized version of Open AI's [Whisper](https://github.com/openai/whisper) model.


```sh
mkdir -p config/programs/asr/
cp -R programs/asr/faster-whisper config/programs/asr/
config/programs/asr/faster-whisper/script/setup
```

Before using faster-whisper, we need to download a model:

```sh
config/programs/asr/faster-whisper/script/download.py tiny-int8
```

Notice that the model was downloaded to `config/data/asr/faster-whisper`:

```sh
find config/data/asr/faster-whisper/
config/data/asr/faster-whisper/
config/data/asr/faster-whisper/tiny-int8
config/data/asr/faster-whisper/tiny-int8/vocabulary.txt
config/data/asr/faster-whisper/tiny-int8/model.bin
config/data/asr/faster-whisper/tiny-int8/config.json
```

The `tiny-int8` model is the smallest and fastest model, but may not give the best transcriptions. Run `download.py` without any arguments to see the available models, or follow [the instructions](https://github.com/guillaumekln/faster-whisper/#model-conversion) to make your own!

Add the following to `configuration.yaml`:

```yaml
programs:
  mic: ...
  vad: ...
  asr:
    faster-whisper:
      command: |
        script/wav2text "${data_dir}/tiny-int8" "{wav_file}"
      adapter: |
        asr_adapter_wav2text.py

pipelines:
  default:
    mic: ...
    vad: ...
    asr:
      name: faster-whisper
```

You can now transcribe a voice command:

```sh
script/run bin/asr_transcribe.py
```

(say something)

You should see a transcription of what you said as part of an [event](wyoming.md).

### Client/Server

Speech to text systems can take a while to load their models, so a lot of time is wasted if we start from scratch each time.

Some speech to text and text to speech programs have included servers. These usually use [Unix domain sockets](https://en.wikipedia.org/wiki/Unix_domain_socket) to communicate with a small client program.

Add the following to your `configuration.yaml`:


```yaml
programs:
  mic: ...
  vad: ...
  asr:
    faster-whisper: ...
    faster-whisper.client:
      command: |
        client_unix_socket.py var/run/faster-whisper.socket

servers:
  asr:
    faster-whisper:
      command: |
        script/server --language "en" "${data_dir}/tiny-int8"

pipelines:
  default:
    mic: ...
    vad: ...
    asr:
      name: faster-whisper.client
```

Start the server in a separate terminal:

```sh
script/run bin/server_run.py asr faster-whisper
```

When it prints "Ready", transcribe yourself speaking again:

```sh
script/run bin/asr_transcribe.py
```

(say something)

You should receive your transcription a bit faster than before.


### HTTP Server

Rhasspy includes a small HTTP server that allows you to access programs and pipelines over a web API. To get started, run the setup script:

```sh
script/setup_http_server
```

Run HTTP server in a separate terminal:

```sh
script/http_server --debug
```

Now you can transcribe a WAV file over HTTP:

```sh
curl -X POST -H 'Content-Type: audio/wav' --data-binary @etc/what_time_is_it.wav 'localhost:13331/asr/transcribe'
```

You can run one or more program servers along with the HTTP server:

```sh
script/http_server --debug --server asr faster-whisper
```

**NOTE:** You will need to restart the HTTP server when you change `configuration.yaml`


## Wake Word Detection

Next, we'll install [Porcupine](https://github.com/Picovoice/porcupine):

```sh
mkdir -p config/programs/wake/
cp -R programs/wake/porcupine1 config/programs/wake/
config/programs/wake/porcupine1/script/setup
```

Check available wake word models with:

```sh
config/programs/wake/porcupine1/script/list_models
alexa_linux.ppn
americano_linux.ppn
blueberry_linux.ppn
bumblebee_linux.ppn
computer_linux.ppn
grapefruit_linux.ppn
grasshopper_linux.ppn
hey google_linux.ppn
hey siri_linux.ppn
jarvis_linux.ppn
ok google_linux.ppn
pico clock_linux.ppn
picovoice_linux.ppn
porcupine_linux.ppn
smart mirror_linux.ppn
snowboy_linux.ppn
terminator_linux.ppn
view glass_linux.ppn
```

**NOTE:** These will be slightly different on a Raspberry Pi (`_raspberry-pi.ppn` instead of `_linux.ppn`).

Add to `configuration.yaml`:

```yaml
programs:
  mic: ...
  vad: ...
  asr: ...
  wake:
    porcupine1:
      command: |
        .venv/bin/python3 bin/porcupine_stream.py --model "${model}"
      template_args:
        model: "porcupine_linux.ppn"

servers:
  asr: ...

pipelines:
  default:
    mic: ...
    vad: ...
    asr: ...
    wake:
      name: porcupine1
```

Notice that we include `template_args` in the `programs` section. This lets us change specific settings in `pipelines`, which will be demonstrated in a moment.

Test wake word detection:

```sh
script/run bin/wake_detect.py --debug
```

(say "porcupine")

Now change the model in `configuration.yaml`:

```yaml
programs:
  mic: ...
  vad: ...
  asr: ...
  wake: ...

servers:
  asr: ...

pipelines:
  default:
    mic: ...
    vad: ...
    asr: ...
    wake:
      name: porcupine1
      template_args:
        model: "grasshopper_linux.ppn"
```

Test wake word detection again:

```sh
script/run bin/wake_detect.py --debug
```

(say "grasshopper")

For non-English models, first download the extra data files:

```sh
config/programs/wake/porcupine1/script/download.py
```

Next, adjust your `configuration.yaml`. For example, this uses the German keyword "ananas":

```yaml
programs:
  wake:
    porcupine1:
      command: |
        .venv/bin/python3 bin/porcupine_stream.py --model "${model}" --lang_model "${lang_model}"
      template_args:
        model: "${data_dir}/resources/keyword_files_de/linux/ananas_linux.ppn"
        lang_model: "${data_dir}/lib/common/porcupine_params_de.pv"

```

Inspect the files in `config/data/wake/porcupine1` for supported languages and keywords. At this time, English, German (de), French (fr), and Spanish (es) are available with keywords for `linux`, `raspberry-pi`, and many other platforms.

Going back to "grasshopper", we can test over HTTP server (restart server):

```sh
curl -X POST 'localhost:13331/pipeline/run?stop_after=wake'
```

(say "grasshopper")

Test full voice command:

```sh
curl -X POST 'localhost:13331/pipeline/run?stop_after=asr'
```

(say "grasshopper", *pause*, voice command, *wait*)


## Intent Handling

There are two types of intent handlers in Rhasspy, ones that handle transcripts directly (text) and others that handle structured intents (name + entities). For this example, we will be handling text directly from `asr`.

In `configuration.yaml`:

```yaml
programs:
  mic: ...
  vad: ...
  asr: ...
  wake: ...
  handle:
    date_time:
      command: |
        bin/date_time.py
      adapter: |
        handle_adapter_text.py

servers:
  asr: ...

pipelines:
  default:
    mic: ...
    vad: ...
    asr: ...
    wake: ...
    handle:
      name: date_time

```

Install date time demo script:

```sh
mkdir -p config/programs/handle/
cp -R programs/handle/date_time config/programs/handle/
```

This script just looks for the words "date" and "time" in the text, and responds appropriately.

You can test it on some text:

```sh
echo 'What time is it?' | script/run bin/handle_text.py --debug
```

Now let's test it with a full voice command:

```sh
script/run bin/pipeline_run.py --debug --stop-after handle
```

(say "grasshopper", *pause*, "what time is it?")

It works too over HTTP (restart server):

```sh
curl -X POST 'localhost:13331/pipeline/run?stop_after=handle'
```

(say "grasshopper", *pause*, "what's the date?")


## Text to Speech and Sound

The final stages of our pipeline will be text to speech (`tts`) and audio output (`snd`).

Install [Piper](https://github.com/rhasspy/piper):

```sh
mkdir -p config/programs/tts/
cp -R programs/tts/piper config/programs/tts/
config/programs/tts/piper/script/setup.py
```

and download an English voice:

```sh
config/programs/tts/piper/script/download.py english
```

Call `download.py` without any arguments to see available voices.

Add to `configuration.yaml`:

```yaml
programs:
  mic: ...
  vad: ...
  asr: ...
  wake: ...
  handle: ...
  tts:
    piper:
      command: |
        bin/piper --model "${model}" --output_file -
      adapter: |
        tts_adapter_text2wav.py
      template_args:
        model: "${data_dir}/en-us-blizzard_lessac-medium.onnx"
  snd:
    aplay:
      command: |
        aplay -q -r 22050 -f S16_LE -c 1 -t raw
      adapter: |
        snd_adapter_raw.py --rate 22050 --width 2 --channels 1

servers:
  asr: ...

pipelines:
  default:
    mic: ...
    vad: ...
    asr: ...
    wake: ...
    handle: ...
    tts:
      name: piper
    snd:
      name: aplay
```


We can test the text to speech and audio output programs:

```sh
script/run bin/tts_speak.py 'Welcome to the world of speech synthesis.'
```

The `bin/tts_synthesize.py` can be used if you want to just output a WAV file.

```sh
script/run bin/tts_synthesize.py 'Welcome to the world of speech synthesis.' > welcome.wav
```

This also works over HTTP (restart server):

```sh
curl -X POST \
  --data 'Welcome to the world of speech synthesis.' \
  --output welcome.wav \
  'localhost:13331/tts/synthesize'
```

Or to speak over HTTP:

```sh
curl -X POST --data 'Welcome to the world of speech synthesis.' 'localhost:13331/tts/speak'
```


### Client/Server

Like speech to text, text to speech models can take a while to load. Let's add a server for Piper to `configuration.yaml`:

```yaml
programs:
  mic: ...
  vad: ...
  asr: ...
  wake: ...
  handle: ...
  tts:
    piper.client:
      command: |
        client_unix_socket.py var/run/piper.socket
  snd: ...

servers:
  asr: ...
  tts:
    piper:
      command: |
        script/server "${model}"
      template_args:
        model: "${data_dir}/en-us-blizzard_lessac-medium.onnx"

pipelines:
  default:
    mic: ...
    vad: ...
    asr: ...
    wake: ...
    handle: ...
    tts:
      name: piper.client
    snd: ...
```

Now we can run both servers with the HTTP server:

```sh
script/http_server --debug --server asr faster-whisper --server tts piper
```

Text to speech requests should be faster now.


## Complete Pipeline

As a final example, let's run a complete pipeline from wake word detection to text to speech response:

```sh
script/run bin/pipeline_run.py --debug
```

(say "grasshopper", *pause*, "what time is it?", *wait*)

Rhasspy should speak the current time.

This also works over HTTP:

```sh
curl -X POST 'localhost:13331/pipeline/run'
```

(say "grasshopper", *pause*, "what is the date?", *wait*)

Rhasspy should speak the current date.


## Next Steps

* Connect Rhasspy to [Home Assistant](home_assistant.md)
* Run one or more [satellites](satellite.md)


================================================
FILE: docs/wyoming.md
================================================
# The Wyoming Protocol

An interprocess event protocol over stdin/stdout for Rhasspy v3.

(effectively [JSONL](https://jsonlines.org/) with an optional binary payload)

![Wyoming protocol](img/wyoming.png)


## Motivation

Rhasspy v2 was built on top of MQTT, and therefore required (1) an MQTT broker and (2) all services to talk over MQTT. Each open source voice program needed a custom service wrapper to talk to Rhasspy.

For v3, a project goal was to minimize the barrier for programs to talk to Rhasspy.


## Talking Directly to Programs

Many voice programs have similar command line interfaces. For example, most text to speech programs accept text through standard input and write a WAV file to standard output or a file:

```sh
echo “Input text” | text-to-speech > output.wav
```

A protocol based on standard input/output would be universal across languages, operating systems, etc. However, some voice programs need to consume or produce audio/event streams. For example, a speech to text system may return a result much quicker if it can process audio as it's being recorded.

## Event Streams

Standard input/output are byte streams, but they can be easily adapted to event streams that can also carry binary data. This lets us send, for example, chunks of audio to a speech to text program as well as an event to say the stream is finished. All without a broker or a socket!

Each **event** in the Wyoming protocol is:

1. A **single line** of JSON with an object:
    * **MUST** have a `type` field with an event type name
    * MAY have a `data` field with an object that contains event-specific data
    * MAY have a `payload_length` field with a number > 0
2. If `payload_length` is given, *exactly* that may bytes follows

Example:

```json
{ "type": "audio-chunk", "data": { "rate": 16000, "width", "channels": 1 }, "payload_length": 2048 }
<2048 bytes>
```


## Adapter

Using events over standard input/output unfortunately means we cannot talk to most programs directly. Fortunately, [small adapters](adapters.md) can be written and shared for programs with similar command-line interfaces. The adapter speaks events to Rhasspy, but calls the underlying program according to a common convention like “text in, WAV out”.

![Wyoming protocol adapter](img/adapter.png)

## Events Types

Voice programs vary significantly in their options, but programs within the same [domain](domains.md) have the same minimal requirements to function:

* mic
    * Audio input
    * Outputs fixed-sized chunks of PCM audio from a microphone, socket, etc.
    * Audio chunks may contain timestamps
* wake
    * Wake word detection
    * Inputs fixed-sized chunks of PCM audio
    * Outputs name of detected model, timestamp of audio chunk
* asr
    * Speech to text
    * Inputs fixed-sized chunks of PCM audio
    * Inputs an event indicating the end of the audio stream (or voice command)
    * Outputs a transcription
* vad
    * Voice activity detection
    * Inputs fixed-sized chunks of PCM audio
    * Outputs events indicating the beginning and end of a voice command
* intent
    * Intent recognition
    * Inputs text
    * Outputs an intent with a name and entities (slots)
* handle
    * Intent/text handling
    * Does something with an intent or directly with a transcription
    * Outputs a text response
* tts
    * Text to speech
    * Inputs text
    * Outputs one or more fixed-sized chunks of PCM audio
* snd
    * Audio output
    * Inputs fixed-sized chunks of PCM audio
    * Plays audio through a sound system

The following event types are currently defined:

| Domain | Type           | Data                             | Payload |
|--------|----------------|----------------------------------|---------|
| audio  | audio-start    | timestamp, rate, width, channels |         |
| audio  | audio-chunk    | timestamp, rate, width, channels | PCM     |
| audio  | audio-stop     | timestamp                        |         |
| wake   | detection      | name, timestamp                  |         |
| wake   | not-detected   |                                  |         |
| vad    | voice-started  | timestamp                        |         |
| vad    | voice-stopped  | timestamp                        |         |
| asr    | transcript     | text                             |         |
| intent | recognize      | text                             |         |
| intent | intent         | name, entities                   |         |
| intent | not-recognized | text                             |         |
| handle | handled        | text                             |         |
| handle | not-handled    | text                             |         |
| tts    | synthesize     | text                             |         |
| snd    | played         |                                  |         |


================================================
FILE: examples/satellite/configuration.yaml
================================================
satellites:
  default:
    mic:
      name: arecord
      template_args:
        device: "default"
    wake:
      name: porcupine1
      template_args:
        model: "porcupine_raspberry-pi.ppn"
    remote:
      name: websocket
      template_args:
        uri: "ws://homeassistant.local:13331/pipeline/asr-tts"
    snd:
      name: aplay
      template_args:
        device: "default"


================================================
FILE: mypy.ini
================================================
[mypy]
ignore_missing_imports = true

[mypy-setuptools.*]
ignore_missing_imports = True


================================================
FILE: programs/asr/coqui-stt/README.md
================================================
# Coqui STT

Speech to text service for Rhasspy based on [Coqui STT](https://stt.readthedocs.io/en/latest/).

Additional models can be downloaded here: https://coqui.ai/models/


## Installation

1. Copy the contents of this directory to `config/programs/asr/coqui-stt/`
2. Run `script/setup`
3. Download a model with `script/download.py`
    * Example: `script/download.py en_large`
    * Models are downloaded to `config/data/asr/coqui-stt` directory
4. Test with `script/wav2text`
    * Example `script/wav2text /path/to/english_v1.0.0-large-vocab/ /path/to/test.wav`


================================================
FILE: programs/asr/coqui-stt/bin/coqui_stt_raw2text.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import sys
from pathlib import Path

import numpy as np
from stt import Model

_LOGGER = logging.getLogger("coqui_stt_raw2text")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to Coqui STT model directory")
    parser.add_argument(
        "--scorer", help="Path to scorer (default: .scorer file in model directory)"
    )
    parser.add_argument(
        "--alpha-beta",
        type=float,
        nargs=2,
        metavar=("alpha", "beta"),
        help="Scorer alpha/beta",
    )
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=1024,
        help="Number of samples to process at a time",
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    model_dir = Path(args.model)
    model_path = next(model_dir.glob("*.tflite"))
    if args.scorer:
        scorer_path = Path(args.scorer)
    else:
        scorer_path = next(model_dir.glob("*.scorer"))

    _LOGGER.debug("Loading model: %s, scorer: %s", model_path, scorer_path)
    model = Model(str(model_path))
    model.enableExternalScorer(str(scorer_path))

    if args.alpha_beta is not None:
        model.setScorerAlphaBeta(*args.alpha_beta)

    model_stream = model.createStream()
    chunk = sys.stdin.buffer.read(args.samples_per_chunk)
    _LOGGER.debug("Processing audio")
    while chunk:
        chunk_array = np.frombuffer(chunk, dtype=np.int16)
        model_stream.feedAudioContent(chunk_array)
        chunk = sys.stdin.buffer.read(args.samples_per_chunk)

    text = model_stream.finishStream()
    _LOGGER.debug(text)

    print(text.strip())


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/coqui-stt/bin/coqui_stt_server.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import os
import socket
import threading
from pathlib import Path

import numpy as np
from stt import Model

_LOGGER = logging.getLogger("coqui_stt_server")


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to Coqui STT model directory")
    parser.add_argument(
        "--scorer", help="Path to scorer (default: .scorer file in model directory)"
    )
    parser.add_argument(
        "--alpha-beta",
        type=float,
        nargs=2,
        metavar=("alpha", "beta"),
        help="Scorer alpha/beta",
    )
    parser.add_argument(
        "--socketfile", required=True, help="Path to Unix domain socket file"
    )
    parser.add_argument(
        "-r",
        "--rate",
        type=int,
        default=16000,
        help="Input audio sample rate (default: 16000)",
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    # Need to unlink socket if it exists
    try:
        os.unlink(args.socketfile)
    except OSError:
        pass

    try:
        # Create socket server
        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        sock.bind(args.socketfile)
        sock.listen()

        model_dir = Path(args.model)
        model_path = next(model_dir.glob("*.tflite"))
        if args.scorer:
            scorer_path = Path(args.scorer)
        else:
            scorer_path = next(model_dir.glob("*.scorer"))

        _LOGGER.debug("Loading model: %s, scorer: %s", model_path, scorer_path)
        model = Model(str(model_path))
        model.enableExternalScorer(str(scorer_path))

        if args.alpha_beta is not None:
            model.setScorerAlphaBeta(*args.alpha_beta)

        _LOGGER.info("Ready")

        # Listen for connections
        while True:
            try:
                connection, client_address = sock.accept()
                _LOGGER.debug("Connection from %s", client_address)

                # Start new thread for client
                threading.Thread(
                    target=handle_client,
                    args=(connection, model, args.rate),
                    daemon=True,
                ).start()
            except KeyboardInterrupt:
                break
            except Exception:
                _LOGGER.exception("Error communicating with socket client")
    finally:
        os.unlink(args.socketfile)


def handle_client(connection: socket.socket, model: Model, rate: int) -> None:
    try:
        model_stream = model.createStream()
        is_first_audio = True

        with connection, connection.makefile(mode="rwb") as conn_file:
            while True:
                event_info = json.loads(conn_file.readline())
                event_type = event_info["type"]

                if event_type == "audio-chunk":
                    if is_first_audio:
                        _LOGGER.debug("Receiving audio")
                        is_first_audio = False

                    num_bytes = event_info["payload_length"]
                    chunk = conn_file.read(num_bytes)
                    chunk_array = np.frombuffer(chunk, dtype=np.int16)
                    model_stream.feedAudioContent(chunk_array)
                elif event_type == "audio-stop":
                    _LOGGER.info("Audio stopped")

                    text = model_stream.finishStream()
                    transcript_str = (
                        json.dumps(
                            {"type": "transcript", "data": {"text": text}},
                            ensure_ascii=False,
                        )
                        + "\n"
                    )
                    conn_file.write(transcript_str.encode())
                    break
    except Exception:
        _LOGGER.exception("Unexpected error in client thread")


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/coqui-stt/bin/coqui_stt_wav2text.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import wave
from pathlib import Path

import numpy as np
from stt import Model

_LOGGER = logging.getLogger("coqui_stt_wav2text")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to Coqui STT model directory")
    parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe")
    parser.add_argument(
        "--scorer", help="Path to scorer (default: .scorer file in model directory)"
    )
    parser.add_argument(
        "--alpha-beta",
        type=float,
        nargs=2,
        metavar=("alpha", "beta"),
        help="Scorer alpha/beta",
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    model_dir = Path(args.model)
    model_path = next(model_dir.glob("*.tflite"))
    if args.scorer:
        scorer_path = Path(args.scorer)
    else:
        scorer_path = next(model_dir.glob("*.scorer"))

    _LOGGER.debug("Loading model: %s, scorer: %s", model_path, scorer_path)
    model = Model(str(model_path))
    model.enableExternalScorer(str(scorer_path))

    if args.alpha_beta is not None:
        model.setScorerAlphaBeta(*args.alpha_beta)

    for wav_path in args.wav_file:
        _LOGGER.debug("Processing %s", wav_path)
        wav_file: wave.Wave_read = wave.open(wav_path, "rb")
        with wav_file:
            assert wav_file.getframerate() == 16000, "16Khz sample rate required"
            assert wav_file.getsampwidth() == 2, "16-bit samples required"
            assert wav_file.getnchannels() == 1, "Mono audio required"
            audio_bytes = wav_file.readframes(wav_file.getnframes())

            model_stream = model.createStream()
            audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
            model_stream.feedAudioContent(audio_array)

            text = model_stream.finishStream()
            print(text.strip())


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/coqui-stt/requirements.txt
================================================
stt>=1.4.0,<2.0
numpy


================================================
FILE: programs/asr/coqui-stt/script/download.py
================================================
#!/usr/bin/env python3
import argparse
import itertools
import logging
import tarfile
from pathlib import Path
from urllib.request import urlopen

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("setup")

MODELS = {"en_large": "english_v1.0.0-large-vocab"}


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "model",
        nargs="+",
        choices=list(itertools.chain(MODELS.keys(), MODELS.values())),
        help="Coqui STT model(s) to download",
    )
    parser.add_argument(
        "--destination", help="Path to destination directory (default: share)"
    )
    parser.add_argument(
        "--link-format",
        default="https://github.com/rhasspy/models/releases/download/v1.0/asr_coqui-stt-{model}.tar.gz",
        help="Format string for download URLs",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    if args.destination:
        args.destination = Path(args.destination)
    else:
        # Assume we're in programs/asr/coqui-stt/script
        data_dir = _DIR.parent.parent.parent.parent / "data"
        args.destination = data_dir / "asr" / "coqui-stt"

    args.destination.parent.mkdir(parents=True, exist_ok=True)

    for model in args.model:
        model = MODELS.get(model, model)
        url = args.link_format.format(model=model)
        _LOGGER.info("Downloading %s", url)
        with urlopen(url) as response:
            with tarfile.open(mode="r|*", fileobj=response) as tar_gz:
                _LOGGER.info("Extracting to %s", args.destination)
                tar_gz.extractall(args.destination)


if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/coqui-stt/script/raw2text
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/coqui_stt_raw2text.py" "$@"


================================================
FILE: programs/asr/coqui-stt/script/server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

socket_dir="${base_dir}/var/run"
mkdir -p "${socket_dir}"

python3 "${base_dir}/bin/coqui_stt_server.py" --socketfile "${socket_dir}/coqui-stt.socket" "$@"


================================================
FILE: programs/asr/coqui-stt/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/asr/coqui-stt/script/wav2text
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/coqui_stt_wav2text.py" "$@"


================================================
FILE: programs/asr/faster-whisper/README.md
================================================
# Faster Whisper

Speech to text service for Rhasspy based on [faster-whisper](https://github.com/guillaumekln/faster-whisper/).

Additional models can be downloaded here: https://github.com/rhasspy/models/releases/tag/v1.0

## Installation

1. Copy the contents of this directory to `config/programs/asr/faster-whisper/`
2. Run `script/setup.py`
3. Download a model with `script/download.py`
    * Example: `script/download.py tiny-int8`
    * Models are downloaded to `config/data/asr/faster-whisper` directory
4. Test with `script/wav2text`
    * Example `script/wav2text /path/to/tiny-int8/ /path/to/test.wav`


================================================
FILE: programs/asr/faster-whisper/bin/faster_whisper_server.py
================================================
#!/usr/bin/env python3
import argparse
import io
import logging
import os
import socket
import wave
from pathlib import Path

from faster_whisper import WhisperModel

from rhasspy3.asr import Transcript
from rhasspy3.audio import AudioChunk, AudioStop
from rhasspy3.event import read_event, write_event

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to faster-whisper model directory")
    parser.add_argument(
        "--socketfile", required=True, help="Path to Unix domain socket file"
    )
    parser.add_argument(
        "--device",
        default="cpu",
        help="Device to use for inference (default: cpu)",
    )
    parser.add_argument(
        "--language",
        help="Language to set for transcription",
    )
    parser.add_argument(
        "--compute-type",
        default="default",
        help="Compute type (float16, int8, etc.)",
    )
    parser.add_argument(
        "--beam-size",
        type=int,
        default=1,
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    # Need to unlink socket if it exists
    try:
        os.unlink(args.socketfile)
    except OSError:
        pass

    try:
        # Create socket server
        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        sock.bind(args.socketfile)
        sock.listen()

        # Load converted faster-whisper model
        model = WhisperModel(
            args.model, device=args.device, compute_type=args.compute_type
        )
        _LOGGER.info("Ready")

        # Listen for connections
        while True:
            try:
                connection, client_address = sock.accept()
                _LOGGER.debug("Connection from %s", client_address)

                is_first_audio = True
                with connection, connection.makefile(
                    mode="rwb"
                ) as conn_file, io.BytesIO() as wav_io:
                    wav_file: wave.Wave_write = wave.open(wav_io, "wb")
                    with wav_file:
                        while True:
                            event = read_event(conn_file)  # type: ignore
                            if event is None:
                                break

                            if AudioChunk.is_type(event.type):
                                chunk = AudioChunk.from_event(event)

                                if is_first_audio:
                                    _LOGGER.debug("Receiving audio")
                                    wav_file.setframerate(chunk.rate)
                                    wav_file.setsampwidth(chunk.width)
                                    wav_file.setnchannels(chunk.channels)
                                    is_first_audio = False

                                wav_file.writeframes(chunk.audio)
                            elif AudioStop.is_type(event.type):
                                _LOGGER.debug("Audio stopped")
                                break

                    wav_io.seek(0)
                    segments, _info = model.transcribe(
                        wav_io,
                        beam_size=args.beam_size,
                        language=args.language,
                    )
                    text = " ".join(segment.text for segment in segments)
                    _LOGGER.info(text)

                    write_event(Transcript(text=text).event(), conn_file)  # type: ignore
            except KeyboardInterrupt:
                break
            except Exception:
                _LOGGER.exception("Error communicating with socket client")
    finally:
        os.unlink(args.socketfile)


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/faster-whisper/bin/faster_whisper_wav2text.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import time
from pathlib import Path

from faster_whisper import WhisperModel

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to faster-whisper model directory")
    parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe")
    parser.add_argument(
        "--device",
        default="cpu",
        help="Device to use for inference (default: cpu)",
    )
    parser.add_argument(
        "--language",
        help="Language to set for transcription",
    )
    parser.add_argument(
        "--compute-type",
        default="default",
        help="Compute type (float16, int8, etc.)",
    )
    parser.add_argument(
        "--beam-size",
        type=int,
        default=1,
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    # Load converted faster-whisper model
    _LOGGER.debug("Loading model: %s", args.model)
    model = WhisperModel(args.model, device=args.device, compute_type=args.compute_type)
    _LOGGER.info("Model loaded")

    for wav_path in args.wav_file:
        _LOGGER.debug("Processing %s", wav_path)
        start_time = time.monotonic_ns()
        segments, _info = model.transcribe(
            wav_path,
            beam_size=args.beam_size,
            language=args.language,
        )
        text = " ".join(segment.text for segment in segments)
        end_time = time.monotonic_ns()
        _LOGGER.debug(
            "Transcribed %s in %s second(s)", wav_path, (end_time - start_time) / 1e9
        )
        _LOGGER.debug(text)

        print(text, flush=True)


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/faster-whisper/script/download.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import tarfile
from pathlib import Path
from urllib.request import urlopen

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("setup")

MODELS = [
    "tiny",
    "tiny-int8",
    "base",
    "base-int8",
    "small",
    "small-int8",
]


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "model",
        nargs="+",
        choices=MODELS,
        help="faster-whisper model(s) to download",
    )
    parser.add_argument(
        "--destination", help="Path to destination directory (default: share)"
    )
    parser.add_argument(
        "--link-format",
        default="https://github.com/rhasspy/models/releases/download/v1.0/asr_faster-whisper-{model}.tar.gz",
        help="Format string for download URLs",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    if args.destination:
        args.destination = Path(args.destination)
    else:
        # Assume we're in programs/asr/faster-whisper/script
        data_dir = _DIR.parent.parent.parent.parent / "data"
        args.destination = data_dir / "asr" / "faster-whisper"

    args.destination.parent.mkdir(parents=True, exist_ok=True)

    for model in args.model:
        url = args.link_format.format(model=model)
        _LOGGER.info("Downloading %s", url)
        with urlopen(url) as response:
            with tarfile.open(mode="r|*", fileobj=response) as tar_gz:
                _LOGGER.info("Extracting to %s", args.destination)
                tar_gz.extractall(args.destination)


if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/faster-whisper/script/server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

socket_dir="${base_dir}/var/run"
mkdir -p "${socket_dir}"

python3 "${base_dir}/bin/faster_whisper_server.py" --socketfile "${socket_dir}/faster-whisper.socket" "$@"


================================================
FILE: programs/asr/faster-whisper/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi


# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -e "${base_dir}/src"

# Install rhasspy3
rhasspy3_dir="${base_dir}/../../../.."
pip3 install -e "${rhasspy3_dir}"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/asr/faster-whisper/script/wav2text
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/faster_whisper_wav2text.py" "$@"


================================================
FILE: programs/asr/faster-whisper/src/LICENSE
================================================
MIT License

Copyright (c) 2023 Guillaume Klein

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: programs/asr/faster-whisper/src/README.md
================================================
# Faster Whisper transcription with CTranslate2

This repository demonstrates how to implement the Whisper transcription using [CTranslate2](https://github.com/OpenNMT/CTranslate2/), which is a fast inference engine for Transformer models.

This implementation is about 4 times faster than [openai/whisper](https://github.com/openai/whisper) for the same accuracy while using less memory. The efficiency can be further improved with 8-bit quantization on both CPU and GPU.

## Installation

```bash
pip install -e .[conversion]
```

The model conversion requires the modules `transformers` and `torch` which are installed by the `[conversion]` requirement. Once a model is converted, these modules are no longer needed and the installation could be simplified to:

```bash
pip install -e .
```

## Usage

### Model conversion

A Whisper model should be first converted into the CTranslate2 format. For example the command below converts the "medium" Whisper model and saves the weights in FP16:

```bash
ct2-transformers-converter --model openai/whisper-medium --output_dir whisper-medium-ct2 --quantization float16
```

If needed, models can also be converted from the code. See the [conversion API](https://opennmt.net/CTranslate2/python/ctranslate2.converters.TransformersConverter.html).

### Transcription

```python
from faster_whisper import WhisperModel

model_path = "whisper-medium-ct2/"

# Run on GPU with FP16
model = WhisperModel(model_path, device="cuda", compute_type="float16")

# or run on GPU with INT8
# model = WhisperModel(model_path, device="cuda", compute_type="int8_float16")
# or run on CPU with INT8
# model = WhisperModel(model_path, device="cpu", compute_type="int8")

segments, info = model.transcribe("audio.mp3", beam_size=5)

print("Detected language '%s' with probability %f" % (info.language, info.language_probability))

for segment in segments:
    print("[%ds -> %ds] %s" % (segment.start, segment.end, segment.text))
```

## Comparing performance against openai/whisper

If you are comparing the performance against [openai/whisper](https://github.com/openai/whisper), you should make sure to use the same settings in both frameworks. In particular:

* In openai/whisper, `model.transcribe` uses a beam size of 1 by default. A different beam size will have an important impact on performance so make sure to use the same.
* When running on CPU, make sure to set the same number of threads. Both frameworks will read the environment variable `OMP_NUM_THREADS`, which can be set when running your script:

```bash
OMP_NUM_THREADS=4 python3 my_script.py
```


================================================
FILE: programs/asr/faster-whisper/src/faster_whisper/__init__.py
================================================
from faster_whisper.transcribe import WhisperModel


================================================
FILE: programs/asr/faster-whisper/src/faster_whisper/audio.py
================================================
import av
import numpy as np


def decode_audio(input_file, sampling_rate=16000):
    """Decodes the audio.

    Args:
      input_file: Path to the input file or a file-like object.
      sampling_rate: Resample the audio to this sample rate.

    Returns:
      A float32 Numpy array.
    """
    fifo = av.audio.fifo.AudioFifo()
    resampler = av.audio.resampler.AudioResampler(
        format="s16",
        layout="mono",
        rate=sampling_rate,
    )

    with av.open(input_file) as container:
        # Decode and resample each audio frame.
        for frame in container.decode(audio=0):
            frame.pts = None
            for new_frame in resampler.resample(frame):
                fifo.write(new_frame)

        # Flush the resampler.
        for new_frame in resampler.resample(None):
            fifo.write(new_frame)

    frame = fifo.read()

    # Convert s16 back to f32.
    return frame.to_ndarray().flatten().astype(np.float32) / 32768.0


================================================
FILE: programs/asr/faster-whisper/src/faster_whisper/feature_extractor.py
================================================
import numpy as np


# Adapted from https://github.com/huggingface/transformers/blob/main/src/transformers/models/whisper/feature_extraction_whisper.py
class FeatureExtractor:
    def __init__(
        self,
        feature_size=80,
        sampling_rate=16000,
        hop_length=160,
        chunk_length=30,
        n_fft=400,
    ):
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.chunk_length = chunk_length
        self.n_samples = chunk_length * sampling_rate
        self.nb_max_frames = self.n_samples // hop_length
        self.time_per_frame = hop_length / sampling_rate
        self.sampling_rate = sampling_rate
        self.mel_filters = self.get_mel_filters(
            sampling_rate, n_fft, n_mels=feature_size
        )

    def get_mel_filters(self, sr, n_fft, n_mels=128, dtype=np.float32):
        # Initialize the weights
        n_mels = int(n_mels)
        weights = np.zeros((n_mels, int(1 + n_fft // 2)), dtype=dtype)

        # Center freqs of each FFT bin
        fftfreqs = np.fft.rfftfreq(n=n_fft, d=1.0 / sr)

        # 'Center freqs' of mel bands - uniformly spaced between limits
        min_mel = 0.0
        max_mel = 45.245640471924965

        mels = np.linspace(min_mel, max_mel, n_mels + 2)

        mels = np.asanyarray(mels)

        # Fill in the linear scale
        f_min = 0.0
        f_sp = 200.0 / 3
        freqs = f_min + f_sp * mels

        # And now the nonlinear scale
        min_log_hz = 1000.0  # beginning of log region (Hz)
        min_log_mel = (min_log_hz - f_min) / f_sp  # same (Mels)
        logstep = np.log(6.4) / 27.0  # step size for log region

        # If we have vector data, vectorize
        log_t = mels >= min_log_mel
        freqs[log_t] = min_log_hz * np.exp(logstep * (mels[log_t] - min_log_mel))

        mel_f = freqs

        fdiff = np.diff(mel_f)
        ramps = np.subtract.outer(mel_f, fftfreqs)

        for i in range(n_mels):
            # lower and upper slopes for all bins
            lower = -ramps[i] / fdiff[i]
            upper = ramps[i + 2] / fdiff[i + 1]

            # .. then intersect them with each other and zero
            weights[i] = np.maximum(0, np.minimum(lower, upper))

        # Slaney-style mel is scaled to be approx constant energy per channel
        enorm = 2.0 / (mel_f[2 : n_mels + 2] - mel_f[:n_mels])
        weights *= enorm[:, np.newaxis]

        return weights

    def fram_wave(self, waveform, center=True):
        """
        Transform a raw waveform into a list of smaller waveforms.
        The window length defines how much of the signal is
        contain in each frame (smalle waveform), while the hope length defines the step
        between the beginning of each new frame.
        Centering is done by reflecting the waveform which is first centered around
        `frame_idx * hop_length`.
        """
        frames = []
        for i in range(0, waveform.shape[0] + 1, self.hop_length):
            half_window = (self.n_fft - 1) // 2 + 1
            if center:
                start = i - half_window if i > half_window else 0
                end = (
                    i + half_window
                    if i < waveform.shape[0] - half_window
                    else waveform.shape[0]
                )

                frame = waveform[start:end]

                if start == 0:
                    padd_width = (-i + half_window, 0)
                    frame = np.pad(frame, pad_width=padd_width, mode="reflect")

                elif end == waveform.shape[0]:
                    padd_width = (0, (i - waveform.shape[0] + half_window))
                    frame = np.pad(frame, pad_width=padd_width, mode="reflect")

            else:
                frame = waveform[i : i + self.n_fft]
                frame_width = frame.shape[0]
                if frame_width < waveform.shape[0]:
                    frame = np.lib.pad(
                        frame,
                        pad_width=(0, self.n_fft - frame_width),
                        mode="constant",
                        constant_values=0,
                    )

            frames.append(frame)
        return np.stack(frames, 0)

    def stft(self, frames, window):
        """
        Calculates the complex Short-Time Fourier Transform (STFT) of the given framed signal.
        Should give the same results as `torch.stft`.
        """
        frame_size = frames.shape[1]
        fft_size = self.n_fft

        if fft_size is None:
            fft_size = frame_size

        if fft_size < frame_size:
            raise ValueError("FFT size must greater or equal the frame size")
        # number of FFT bins to store
        num_fft_bins = (fft_size >> 1) + 1

        data = np.empty((len(frames), num_fft_bins), dtype=np.complex64)
        fft_signal = np.zeros(fft_size)

        for f, frame in enumerate(frames):
            if window is not None:
                np.multiply(frame, window, out=fft_signal[:frame_size])
            else:
                fft_signal[:frame_size] = frame
            data[f] = np.fft.fft(fft_signal, axis=0)[:num_fft_bins]
        return data.T

    def __call__(self, waveform):
        """
        Compute the log-Mel spectrogram of the provided audio, gives similar results
        whisper's original torch implementation with 1e-5 tolerance.
        """
        window = np.hanning(self.n_fft + 1)[:-1]

        frames = self.fram_wave(waveform)
        stft = self.stft(frames, window=window)
        magnitudes = np.abs(stft[:, :-1]) ** 2

        filters = self.mel_filters
        mel_spec = filters @ magnitudes

        log_spec = np.log10(np.clip(mel_spec, a_min=1e-10, a_max=None))
        log_spec = np.maximum(log_spec, log_spec.max() - 8.0)
        log_spec = (log_spec + 4.0) / 4.0

        return log_spec


================================================
FILE: programs/asr/faster-whisper/src/faster_whisper/transcribe.py
================================================
import collections
import os
import zlib

import ctranslate2
import numpy as np
import tokenizers
from faster_whisper.audio import decode_audio
from faster_whisper.feature_extractor import FeatureExtractor


class Segment(collections.namedtuple("Segment", ("start", "end", "text"))):
    pass


class AudioInfo(
    collections.namedtuple("AudioInfo", ("language", "language_probability"))
):
    pass


class TranscriptionOptions(
    collections.namedtuple(
        "TranscriptionOptions",
        (
            "beam_size",
            "best_of",
            "patience",
            "log_prob_threshold",
            "no_speech_threshold",
            "compression_ratio_threshold",
            "condition_on_previous_text",
            "temperatures",
        ),
    )
):
    pass


class WhisperModel:
    def __init__(
        self,
        model_path,
        device="auto",
        compute_type="default",
        cpu_threads=0,
    ):
        """Initializes the Whisper model.

        Args:
          model_path: Path to the converted model.
          device: Device to use for computation ("cpu", "cuda", "auto").
          compute_type: Type to use for computation.
            See https://opennmt.net/CTranslate2/quantization.html.
          cpu_threads: Number of threads to use when running on CPU (4 by default).
            A non zero value overrides the OMP_NUM_THREADS environment variable.
        """
        self.model = ctranslate2.models.Whisper(
            model_path,
            device=device,
            compute_type=compute_type,
            intra_threads=cpu_threads,
        )

        self.feature_extractor = FeatureExtractor()
        self.decoder = tokenizers.decoders.ByteLevel()

        with open(os.path.join(model_path, "vocabulary.txt")) as vocab_file:
            self.ids_to_tokens = [line.rstrip("\n") for line in vocab_file]
            self.tokens_to_ids = {
                token: i for i, token in enumerate(self.ids_to_tokens)
            }

        self.eot_id = self.tokens_to_ids["<|endoftext|>"]
        self.timestamp_begin_id = self.tokens_to_ids["<|notimestamps|>"] + 1
        self.input_stride = 2
        self.time_precision = 0.02
        self.max_length = 448

    def transcribe(
        self,
        input_file,
        language=None,
        beam_size=5,
        best_of=5,
        patience=1,
        temperature=[0.0, 0.2, 0.4, 0.6, 0.8, 1.0],
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        condition_on_previous_text=True,
    ):
        """Transcribes an input file.

        Arguments:
          input_file: Path to the input file or a file-like object.
          language: The language spoken in the audio. If not set, the language will be
            detected in the first 30 seconds of audio.
          beam_size: Beam size to use for decoding.
          best_of: Number of candidates when sampling with non-zero temperature.
          patience: Beam search patience factor.
          temperature: Temperature for sampling. It can be a tuple of temperatures,
            which will be successively used upon failures according to either
            `compression_ratio_threshold` or `logprob_threshold`.
          compression_ratio_threshold: If the gzip compression ratio is above this value,
            treat as failed.
          log_prob_threshold: If the average log probability over sampled tokens is
            below this value, treat as failed.
          no_speech_threshold: If the no_speech probability is higher than this value AND
            the average log probability over sampled tokens is below `logprob_threshold`,
            consider the segment as silent.
          condition_on_previous_text: If True, the previous output of the model is provided
            as a prompt for the next window; disabling may make the text inconsistent across
            windows, but the model becomes less prone to getting stuck in a failure loop,
            such as repetition looping or timestamps going out of sync.

        Returns:
          A tuple with:

            - a generator over transcribed segments
            - an instance of AudioInfo
        """
        audio = decode_audio(
            input_file, sampling_rate=self.feature_extractor.sampling_rate
        )
        features = self.feature_extractor(audio)

        if language is None:
            segment = self.get_segment(features)
            input = self.get_input(segment)
            results = self.model.detect_language(input)
            language_token, language_probability = results[0][0]
            language = language_token[2:-2]
        else:
            language_probability = 1

        options = TranscriptionOptions(
            beam_size=beam_size,
            best_of=best_of,
            patience=patience,
            log_prob_threshold=log_prob_threshold,
            no_speech_threshold=no_speech_threshold,
            compression_ratio_threshold=compression_ratio_threshold,
            condition_on_previous_text=condition_on_previous_text,
            temperatures=(
                temperature if isinstance(temperature, (list, tuple)) else [temperature]
            ),
        )

        segments = self.generate_segments(features, language, options)

        audio_info = AudioInfo(
            language=language,
            language_probability=language_probability,
        )

        return segments, audio_info

    def generate_segments(self, features, language, options):
        tokenized_segments = self.generate_tokenized_segments(
            features, language, options
        )

        for start, end, tokens in tokenized_segments:
            text = self.decode_text_tokens(tokens)
            if not text.strip():
                continue

            yield Segment(
                start=start,
                end=end,
                text=text,
            )

    def generate_tokenized_segments(self, features, language, options):
        num_frames = features.shape[-1]
        offset = 0
        all_tokens = []
        prompt_reset_since = 0

        while offset < num_frames:
            time_offset = offset * self.feature_extractor.time_per_frame
            segment = self.get_segment(features, offset)
            segment_duration = segment.shape[-1] * self.feature_extractor.time_per_frame

            previous_tokens = all_tokens[prompt_reset_since:]
            prompt = self.get_prompt(language, previous_tokens)
            result, temperature = self.generate_with_fallback(segment, prompt, options)

            if (
                result.no_speech_prob > options.no_speech_threshold
                and result.scores[0] < options.log_prob_threshold
            ):
                offset += segment.shape[-1]
                continue

            tokens = result.sequences_ids[0]

            consecutive_timestamps = [
                i
                for i in range(len(tokens))
                if i > 0
                and tokens[i] >= self.timestamp_begin_id
                and tokens[i - 1] >= self.timestamp_begin_id
            ]

            if len(consecutive_timestamps) > 0:
                last_slice = 0
                for i, current_slice in enumerate(consecutive_timestamps):
                    sliced_tokens = tokens[last_slice:current_slice]
                    start_timestamp_position = (
                        sliced_tokens[0] - self.timestamp_begin_id
                    )
                    end_timestamp_position = sliced_tokens[-1] - self.timestamp_begin_id
                    start_time = (
                        time_offset + start_timestamp_position * self.time_precision
                    )
                    end_time = (
                        time_offset + end_timestamp_position * self.time_precision
                    )

                    last_in_window = i + 1 == len(consecutive_timestamps)

                    # Include the last timestamp so that all tokens are included in a segment.
                    if last_in_window:
                        sliced_tokens.append(tokens[current_slice])

                    yield start_time, end_time, sliced_tokens
                    last_slice = current_slice

                last_timestamp_position = (
                    tokens[last_slice - 1] - self.timestamp_begin_id
                )
                offset += last_timestamp_position * self.input_stride
                all_tokens.extend(tokens[: last_slice + 1])

            else:
                duration = segment_duration
                timestamps = [
                    token for token in tokens if token >= self.timestamp_begin_id
                ]
                if len(timestamps) > 0 and timestamps[-1] != self.timestamp_begin_id:
                    last_timestamp_position = timestamps[-1] - self.timestamp_begin_id
                    duration = last_timestamp_position * self.time_precision

                yield time_offset, time_offset + duration, tokens

                offset += segment.shape[-1]
                all_tokens.extend(tokens)

            if not options.condition_on_previous_text or temperature > 0.5:
                prompt_reset_since = len(all_tokens)

    def decode_text_tokens(self, tokens):
        text_tokens = [
            self.ids_to_tokens[token] for token in tokens if token < self.eot_id
        ]

        return self.decoder.decode(text_tokens)

    def generate_with_fallback(self, segment, prompt, options):
        features = self.get_input(segment)
        result = None
        final_temperature = None

        for temperature in options.temperatures:
            if temperature > 0:
                kwargs = {
                    "beam_size": 1,
                    "num_hypotheses": options.best_of,
                    "sampling_topk": 0,
                    "sampling_temperature": temperature,
                }
            else:
                kwargs = {
                    "beam_size": options.beam_size,
                    "patience": options.patience,
                }

            final_temperature = temperature
            result = self.model.generate(
                features,
                [prompt],
                max_length=self.max_length,
                return_scores=True,
                return_no_speech_prob=True,
                **kwargs,
            )[0]

            tokens = result.sequences_ids[0]
            text = self.decode_text_tokens(tokens)
            compression_ratio = get_compression_ratio(text)

            if (
                compression_ratio <= options.compression_ratio_threshold
                and result.scores[0] >= options.log_prob_threshold
            ):
                break

        return result, final_temperature

    def get_prompt(self, language, previous_tokens):
        prompt = []

        if previous_tokens:
            prompt.append(self.tokens_to_ids["<|startofprev|>"])
            prompt.extend(previous_tokens[-(self.max_length // 2 - 1) :])

        prompt += [
            self.tokens_to_ids["<|startoftranscript|>"],
            self.tokens_to_ids["<|%s|>" % language],
            self.tokens_to_ids["<|transcribe|>"],
        ]

        return prompt

    def get_segment(self, features, offset=0):
        if offset > 0:
            features = features[:, offset:]

        num_frames = features.shape[-1]
        required_num_frames = self.feature_extractor.nb_max_frames

        if num_frames > required_num_frames:
            features = features[:, :required_num_frames]
        elif num_frames < required_num_frames:
            pad_widths = [(0, 0), (0, required_num_frames - num_frames)]
            features = np.pad(features, pad_widths)

        features = np.ascontiguousarray(features)
        return features

    def get_input(self, segment):
        segment = np.expand_dims(segment, 0)
        segment = ctranslate2.StorageView.from_array(segment)
        return segment


def get_compression_ratio(text):
    text_bytes = text.encode("utf-8")
    return len(text_bytes) / len(zlib.compress(text_bytes))


================================================
FILE: programs/asr/faster-whisper/src/requirements.conversion.txt
================================================
transformers[torch]>=4.23


================================================
FILE: programs/asr/faster-whisper/src/requirements.txt
================================================
av==10.*
ctranslate2>=3.5,<4
tokenizers==0.13.*


================================================
FILE: programs/asr/faster-whisper/src/setup.py
================================================
import os

from setuptools import find_packages, setup


def get_requirements(path):
    with open(path, encoding="utf-8") as requirements:
        return [requirement.strip() for requirement in requirements]


base_dir = os.path.dirname(os.path.abspath(__file__))
install_requires = get_requirements(os.path.join(base_dir, "requirements.txt"))
conversion_requires = get_requirements(
    os.path.join(base_dir, "requirements.conversion.txt")
)

setup(
    name="faster-whisper",
    version="0.1.0",
    description="Faster Whisper transcription with CTranslate2",
    author="Guillaume Klein",
    python_requires=">=3.7",
    install_requires=install_requires,
    extras_require={
        "conversion": conversion_requires,
    },
    packages=find_packages(),
)


================================================
FILE: programs/asr/pocketsphinx/README.md
================================================
# Pocketsphinx

Speech to text service for Rhasspy based on [Pocketsphinx](https://github.com/cmusphinx/pocketsphinx).

Additional models can be downloaded here: https://github.com/synesthesiam/voice2json-profiles

Model directories should have this layout:

* model/
    * acoustic_model/
    * dictionary.txt
    * language_model.txt
    
These correspond to the `-hmm`, `-dict`, and `-lm` decoder arguments.

## Installation

1. Copy the contents of this directory to `config/programs/asr/pocketsphinx/`
2. Run `script/setup`
3. Download a model with `script/download.py`
    * Example: `script/download.py en_cmu`
    * Models are downloaded to `config/data/asr/pocketsphinx` directory
4. Test with `script/wav2text`
    * Example `script/wav2text /path/to/en-us_pocketsphinx-cmu/ /path/to/test.wav`


================================================
FILE: programs/asr/pocketsphinx/bin/pocketsphinx_raw2text.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import sys
from pathlib import Path

import pocketsphinx

_LOGGER = logging.getLogger("pocketsphinx_raw2text")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to Pocketsphinx model directory")
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=1024,
        help="Number of samples to process at a time",
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    model_dir = Path(args.model)

    _LOGGER.debug("Loading model from %s", model_dir.absolute())
    decoder_config = pocketsphinx.Decoder.default_config()
    decoder_config.set_string("-hmm", str(model_dir / "acoustic_model"))
    decoder_config.set_string("-dict", str(model_dir / "dictionary.txt"))
    decoder_config.set_string("-lm", str(model_dir / "language_model.txt"))
    decoder = pocketsphinx.Decoder(decoder_config)

    decoder.start_utt()

    chunk = sys.stdin.buffer.read(args.samples_per_chunk)
    _LOGGER.debug("Processing audio")
    while chunk:
        decoder.process_raw(chunk, False, False)
        chunk = sys.stdin.buffer.read(args.samples_per_chunk)

    decoder.end_utt()
    hyp = decoder.hyp()
    if hyp:
        text = hyp.hypstr
    else:
        text = ""

    _LOGGER.debug(text)

    print(text.strip())


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/pocketsphinx/bin/pocketsphinx_server.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import os
import socket
import threading
from pathlib import Path

import pocketsphinx

_LOGGER = logging.getLogger("pocketsphinx_server")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to Pocketsphinx model directory")
    parser.add_argument(
        "--socketfile", required=True, help="Path to Unix domain socket file"
    )
    parser.add_argument(
        "-r",
        "--rate",
        type=int,
        default=16000,
        help="Input audio sample rate (default: 16000)",
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    # Need to unlink socket if it exists
    try:
        os.unlink(args.socketfile)
    except OSError:
        pass

    try:
        # Create socket server
        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        sock.bind(args.socketfile)
        sock.listen()

        model_dir = Path(args.model)

        decoder_config = pocketsphinx.Decoder.default_config()
        decoder_config.set_string("-hmm", str(model_dir / "acoustic_model"))
        decoder_config.set_string("-dict", str(model_dir / "dictionary.txt"))
        decoder_config.set_string("-lm", str(model_dir / "language_model.txt"))
        decoder = pocketsphinx.Decoder(decoder_config)

        _LOGGER.info("Ready")

        # Listen for connections
        while True:
            try:
                connection, client_address = sock.accept()
                _LOGGER.debug("Connection from %s", client_address)

                # Start new thread for client
                threading.Thread(
                    target=handle_client,
                    args=(connection, decoder, args.rate),
                    daemon=True,
                ).start()
            except KeyboardInterrupt:
                break
            except Exception:
                _LOGGER.exception("Error communicating with socket client")
    finally:
        os.unlink(args.socketfile)


def handle_client(
    connection: socket.socket, decoder: pocketsphinx.Decoder, rate: int
) -> None:
    try:
        decoder.start_utt()
        is_first_audio = True

        with connection, connection.makefile(mode="rwb") as conn_file:
            while True:
                event_info = json.loads(conn_file.readline())
                event_type = event_info["type"]

                if event_type == "audio-chunk":
                    if is_first_audio:
                        _LOGGER.debug("Receiving audio")
                        is_first_audio = False

                    num_bytes = event_info["payload_length"]
                    chunk = conn_file.read(num_bytes)
                    decoder.process_raw(chunk, False, False)
                elif event_type == "audio-stop":
                    _LOGGER.info("Audio stopped")

                    decoder.end_utt()
                    hyp = decoder.hyp()
                    if hyp:
                        text = hyp.hypstr.strip()
                    else:
                        text = ""

                    transcript_str = (
                        json.dumps(
                            {"type": "transcript", "data": {"text": text}},
                            ensure_ascii=False,
                        )
                        + "\n"
                    )
                    conn_file.write(transcript_str.encode())
                    break
    except Exception:
        _LOGGER.exception("Unexpected error in client thread")


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/pocketsphinx/bin/pocketsphinx_wav2text.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import wave
from pathlib import Path

import pocketsphinx

_LOGGER = logging.getLogger("pocketsphinx_wav2text")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to Pocketsphinx model directory")
    parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe")
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    model_dir = Path(args.model)

    _LOGGER.debug("Loading model from %s", model_dir.absolute())
    decoder_config = pocketsphinx.Decoder.default_config()
    decoder_config.set_string("-hmm", str(model_dir / "acoustic_model"))
    decoder_config.set_string("-dict", str(model_dir / "dictionary.txt"))
    decoder_config.set_string("-lm", str(model_dir / "language_model.txt"))
    decoder = pocketsphinx.Decoder(decoder_config)

    for wav_path in args.wav_file:
        _LOGGER.debug("Processing %s", wav_path)
        wav_file: wave.Wave_read = wave.open(wav_path, "rb")
        with wav_file:
            assert wav_file.getframerate() == 16000, "16Khz sample rate required"
            assert wav_file.getsampwidth() == 2, "16-bit samples required"
            assert wav_file.getnchannels() == 1, "Mono audio required"
            audio_bytes = wav_file.readframes(wav_file.getnframes())

            decoder.start_utt()
            decoder.process_raw(audio_bytes, False, True)
            decoder.end_utt()
            hyp = decoder.hyp()
            if hyp:
                text = hyp.hypstr
            else:
                text = ""

            print(text.strip())


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/pocketsphinx/requirements.txt
================================================
pocketsphinx @ https://github.com/synesthesiam/pocketsphinx-python/releases/download/v1.0/pocketsphinx-python.tar.gz


================================================
FILE: programs/asr/pocketsphinx/script/download.py
================================================
#!/usr/bin/env python3
import argparse
import itertools
import logging
import tarfile
from pathlib import Path
from urllib.request import urlopen

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("setup")

MODELS = {"en_cmu": "en-us_pocketsphinx-cmu"}


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "model",
        nargs="+",
        choices=list(itertools.chain(MODELS.keys(), MODELS.values())),
        help="Pocketsphinx model(s) to download",
    )
    parser.add_argument("--destination", help="Path to destination directory")
    parser.add_argument(
        "--link-format",
        default="https://github.com/rhasspy/models/releases/download/v1.0/asr_pocketsphinx-{model}.tar.gz",
        help="Format string for download URLs",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    if args.destination:
        args.destination = Path(args.destination)
    else:
        # Assume we're in programs/asr/pocketsphinx/script
        data_dir = _DIR.parent.parent.parent.parent / "data"
        args.destination = data_dir / "asr" / "pocketsphinx"

    args.destination.parent.mkdir(parents=True, exist_ok=True)

    for model in args.model:
        model = MODELS.get(model, model)
        url = args.link_format.format(model=model)
        _LOGGER.info("Downloading %s", url)
        with urlopen(url) as response:
            with tarfile.open(mode="r|*", fileobj=response) as tar_gz:
                _LOGGER.info("Extracting to %s", args.destination)
                tar_gz.extractall(args.destination)


if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/pocketsphinx/script/raw2text
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/pocketsphinx_raw2text.py" "$@"


================================================
FILE: programs/asr/pocketsphinx/script/server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

socket_dir="${base_dir}/var/run"
mkdir -p "${socket_dir}"

python3 "${base_dir}/bin/pocketsphinx_server.py" --socketfile "${socket_dir}/pocketsphinx.socket" "$@"


================================================
FILE: programs/asr/pocketsphinx/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/asr/pocketsphinx/script/wav2text
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/pocketsphinx_wav2text.py" "$@"


================================================
FILE: programs/asr/vosk/README.md
================================================
# Vosk

Speech to text service for Rhasspy based on [Vosk](https://alphacephei.com/vosk/).

You can download additional models here: https://alphacephei.com/vosk/models


## Installation

1. Copy the contents of this directory to `config/programs/asr/vosk/`
2. Run `script/setup`
3. Download a model with `script/download.py`
    * Example: `script/download.py en_small`
    * Models are downloaded to `config/data/asr/vosk` directory
4. Test with `script/wav2text`
    * Example `script/wav2text /path/to/vosk-model-small-en-us-0.15/ /path/to/test.wav`


================================================
FILE: programs/asr/vosk/bin/vosk_raw2text.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import sys

from vosk import KaldiRecognizer, Model, SetLogLevel

_LOGGER = logging.getLogger("vosk_raw2text")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to Vosk model directory")
    parser.add_argument(
        "-r",
        "--rate",
        type=int,
        default=16000,
        help="Model sample rate (default: 16000)",
    )
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=1024,
        help="Number of samples to process at a time",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    SetLogLevel(0)

    model = Model(args.model)
    recognizer = KaldiRecognizer(
        model,
        args.rate,
    )

    chunk = sys.stdin.buffer.read(args.samples_per_chunk)
    _LOGGER.debug("Processing audio")
    while chunk:
        recognizer.AcceptWaveform(chunk)
        chunk = sys.stdin.buffer.read(args.samples_per_chunk)

    result = json.loads(recognizer.FinalResult())
    print(result["text"].strip())


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/vosk/bin/vosk_server.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import os
import socket
import threading

from vosk import KaldiRecognizer, Model, SetLogLevel

_LOGGER = logging.getLogger("vosk_server")


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to Vosk model directory")
    parser.add_argument(
        "--socketfile", required=True, help="Path to Unix domain socket file"
    )
    parser.add_argument(
        "-r",
        "--rate",
        type=int,
        default=16000,
        help="Input audio sample rate (default: 16000)",
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    # Need to unlink socket if it exists
    try:
        os.unlink(args.socketfile)
    except OSError:
        pass

    try:
        # Create socket server
        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        sock.bind(args.socketfile)
        sock.listen()

        # Load Kaldi model
        SetLogLevel(0)
        model = Model(args.model)

        # Listen for connections
        while True:
            try:
                connection, client_address = sock.accept()
                _LOGGER.debug("Connection from %s", client_address)

                # Start new thread for client
                threading.Thread(
                    target=handle_client,
                    args=(connection, model, args.rate),
                    daemon=True,
                ).start()
            except KeyboardInterrupt:
                break
            except Exception:
                _LOGGER.exception("Error communicating with socket client")
    finally:
        os.unlink(args.socketfile)


def handle_client(connection: socket.socket, model: Model, rate: int) -> None:
    try:
        recognizer = KaldiRecognizer(
            model,
            rate,
        )
        is_first_audio = True

        with connection, connection.makefile(mode="rwb") as conn_file:
            while True:
                event_info = json.loads(conn_file.readline())
                event_type = event_info["type"]

                if event_type == "audio-chunk":
                    if is_first_audio:
                        _LOGGER.debug("Receiving audio")
                        is_first_audio = False

                    num_bytes = event_info["payload_length"]
                    chunk = conn_file.read(num_bytes)
                    recognizer.AcceptWaveform(chunk)
                elif event_type == "audio-stop":
                    _LOGGER.info("Audio stopped")

                    result = json.loads(recognizer.FinalResult())
                    _LOGGER.info(result)
                    text = result["text"]

                    transcript_str = (
                        json.dumps(
                            {"type": "transcript", "data": {"text": text}},
                            ensure_ascii=False,
                        )
                        + "\n"
                    )
                    conn_file.write(transcript_str.encode())
                    break
    except Exception:
        _LOGGER.exception("Unexpected error in client thread")


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/vosk/bin/vosk_wav2text.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import wave
from pathlib import Path

from vosk import KaldiRecognizer, Model, SetLogLevel

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to Vosk model directory")
    parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe")
    parser.add_argument(
        "-r",
        "--rate",
        type=int,
        default=16000,
        help="Model sample rate (default: 16000)",
    )
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=1024,
        help="Number of samples to process at a time",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    SetLogLevel(0)

    model = Model(args.model)
    recognizer = KaldiRecognizer(
        model,
        args.rate,
    )

    for wav_path in args.wav_file:
        _LOGGER.debug("Processing %s", wav_path)
        wav_file: wave.Wave_read = wave.open(wav_path, "rb")
        with wav_file:
            assert wav_file.getframerate() == 16000, "16Khz sample rate required"
            assert wav_file.getsampwidth() == 2, "16-bit samples required"
            assert wav_file.getnchannels() == 1, "Mono audio required"
            audio_bytes = wav_file.readframes(wav_file.getnframes())
            recognizer.AcceptWaveform(audio_bytes)

            result = json.loads(recognizer.FinalResult())
            print(result["text"].strip())


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/vosk/requirements.txt
================================================
vosk


================================================
FILE: programs/asr/vosk/script/download.py
================================================
#!/usr/bin/env python3
import argparse
import itertools
import logging
import tarfile
from pathlib import Path
from urllib.request import urlopen

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("setup")

MODELS = {"en_medium": "en-us-0.22-lgraph", "en_small": "small-en-us-0.15"}


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "model",
        nargs="+",
        choices=list(itertools.chain(MODELS.keys(), MODELS.values())),
        help="Vosk model(s) to download",
    )
    parser.add_argument(
        "--destination", help="Path to destination directory (default: share)"
    )
    parser.add_argument(
        "--link-format",
        default="https://github.com/rhasspy/models/releases/download/v1.0/asr_vosk-model-{model}.tar.gz",
        help="Format string for download URLs",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    if args.destination:
        args.destination = Path(args.destination)
    else:
        # Assume we're in programs/asr/vosk/script
        data_dir = _DIR.parent.parent.parent.parent / "data"
        args.destination = data_dir / "asr" / "vosk"

    args.destination.parent.mkdir(parents=True, exist_ok=True)

    for model in args.model:
        model = MODELS.get(model, model)
        url = args.link_format.format(model=model)
        _LOGGER.info("Downloading %s", url)
        with urlopen(url) as response:
            with tarfile.open(mode="r|*", fileobj=response) as tar_gz:
                _LOGGER.info("Extracting to %s", args.destination)
                tar_gz.extractall(args.destination)


if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/vosk/script/raw2text
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/vosk_raw2text.py" "$@"


================================================
FILE: programs/asr/vosk/script/server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

socket_dir="${base_dir}/var/run"
mkdir -p "${socket_dir}"

python3 "${base_dir}/bin/vosk_server.py" --socketfile "${socket_dir}/vosk.socket" "$@"


================================================
FILE: programs/asr/vosk/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/asr/vosk/script/wav2text
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/vosk_wav2text.py" "$@"


================================================
FILE: programs/asr/whisper/README.md
================================================
# Whisper

Speech to text service for Rhasspy based on [Whisper](https://github.com/openai/whisper).

Models are downloaded automatically the first time they're used to the `config/data/asr/whisper` directory.

Available models:

* tiny.en
* tiny
* base.en
* base
* small.en
* small
* medium.en
* medium
* large-v1
* large-v2
* large

## Installation

1. Copy the contents of this directory to `config/programs/asr/whisper/`
2. Run `script/setup`
3. Test with `script/wav2text`
    * Example `script/wav2text 'tiny.en' /path/to/test.wav`


================================================
FILE: programs/asr/whisper/bin/whisper_server.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import os
import socket
import threading

import numpy as np
from whisper import Whisper, load_model, transcribe

_LOGGER = logging.getLogger("whisper_server")


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Name of Whisper model to use")
    parser.add_argument(
        "--language",
        help="Whisper language",
    )
    parser.add_argument("--device", default="cpu", choices=("cpu", "cuda"))
    parser.add_argument(
        "--socketfile", required=True, help="Path to Unix domain socket file"
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    # Need to unlink socket if it exists
    try:
        os.unlink(args.socketfile)
    except OSError:
        pass

    try:
        # Create socket server
        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        sock.bind(args.socketfile)
        sock.listen()

        _LOGGER.debug("Loading model: %s", args.model)
        model = load_model(args.model, device=args.device)
        _LOGGER.info("Ready")

        # Listen for connections
        while True:
            try:
                connection, client_address = sock.accept()
                _LOGGER.debug("Connection from %s", client_address)

                # Start new thread for client
                threading.Thread(
                    target=handle_client,
                    args=(connection, model, args),
                    daemon=True,
                ).start()
            except KeyboardInterrupt:
                break
            except Exception:
                _LOGGER.exception("Error communicating with socket client")
    finally:
        os.unlink(args.socketfile)


def handle_client(
    connection: socket.socket, model: Whisper, args: argparse.Namespace
) -> None:
    try:
        is_first_audio = True

        with connection, connection.makefile(mode="rwb") as conn_file:
            audio_bytes = bytes()
            while True:
                event_info = json.loads(conn_file.readline())
                event_type = event_info["type"]

                if event_type == "audio-chunk":
                    if is_first_audio:
                        _LOGGER.debug("Receiving audio")
                        is_first_audio = False

                    num_bytes = event_info["payload_length"]
                    audio_bytes += conn_file.read(num_bytes)
                elif event_type == "audio-stop":
                    _LOGGER.debug("Audio stopped")
                    break

            audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
            audio_array = audio_array.astype(np.float32) / 32768.0
            result = transcribe(model, audio_array, language=args.language)
            _LOGGER.debug(result)

            text = result["text"]
            transcript_str = (
                json.dumps(
                    {"type": "transcript", "data": {"text": text}}, ensure_ascii=False
                )
                + "\n"
            )
            conn_file.write(transcript_str.encode())
    except Exception:
        _LOGGER.exception("Unexpected error in client thread")


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/whisper/bin/whisper_wav2text.py
================================================
#!/usr/bin/env python3
import argparse
import logging

from whisper import load_model, transcribe

_LOGGER = logging.getLogger("whisper_wav2text")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Name of Whisper model to use")
    parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe")
    parser.add_argument(
        "--language",
        help="Whisper language",
    )
    parser.add_argument("--device", default="cpu", choices=("cpu", "cuda"))
    #
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    _LOGGER.debug("Loading model: %s", args.model)
    model = load_model(args.model, device=args.device)
    for wav_file in args.wav_file:
        _LOGGER.debug("Processing %s", wav_file)
        result = transcribe(model, wav_file, language=args.language)
        _LOGGER.debug(result)

        text = result["text"]
        print(text.strip())


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/whisper/requirements.txt
================================================
git+https://github.com/openai/whisper.git


================================================
FILE: programs/asr/whisper/script/server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

socket_dir="${base_dir}/var/run"
mkdir -p "${socket_dir}"

python3 "${base_dir}/bin/whisper_server.py" --socketfile "${socket_dir}/whisper.socket" "$@"


================================================
FILE: programs/asr/whisper/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/asr/whisper/script/wav2text
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/whisper_wav2text.py" "$@"


================================================
FILE: programs/asr/whisper-cpp/.gitignore
================================================
/build/


================================================
FILE: programs/asr/whisper-cpp/Dockerfile.libwhisper
================================================
FROM debian:bullseye as build
ARG TARGETARCH
ARG TARGETVARIANT

ENV LANG C.UTF-8
ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && \
    apt-get install --yes build-essential wget

WORKDIR /build

ARG VERSION=1.1.0
RUN wget "https://github.com/ggerganov/whisper.cpp/archive/refs/tags/v${VERSION}.tar.gz" && \
    tar -xzf "v${VERSION}.tar.gz"

RUN mv "whisper.cpp-${VERSION}/" 'whisper.cpp'
COPY lib/Makefile ./
RUN cd "whisper.cpp" && make -j8
RUN make

# -----------------------------------------------------------------------------

FROM scratch

COPY --from=build /build/libwhisper.so .


================================================
FILE: programs/asr/whisper-cpp/Dockerfile.libwhisper.dockerignore
================================================
*
!lib/Makefile


================================================
FILE: programs/asr/whisper-cpp/README.md
================================================
# Whisper.cpp

Speech to text service for Rhasspy based on [whisper.cpp](https://github.com/ggerganov/whisper.cpp/).

Additional models can be downloaded here: https://huggingface.co/datasets/ggerganov/whisper.cpp

## Installation

1. Copy the contents of this directory to `config/programs/asr/whisper-cpp/`
2. Run `script/setup.py`
3. Download a model with `script/download.py`
    * Example: `script/download.py en_tiny`
    * Models are downloaded to `config/data/asr/whisper-cpp` directory
4. Test with `script/wav2text`
    * Example `script/wav2text /path/to/ggml-tiny.en.bin /path/to/test.wav`


================================================
FILE: programs/asr/whisper-cpp/bin/whisper_cpp_server.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import os
import socket
import threading

import numpy as np
from whisper_cpp import Whisper

_LOGGER = logging.getLogger("whisper_cpp_server")


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to whisper.cpp model file")
    parser.add_argument(
        "--socketfile", required=True, help="Path to Unix domain socket file"
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    # Need to unlink socket if it exists
    try:
        os.unlink(args.socketfile)
    except OSError:
        pass

    try:
        # Create socket server
        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        sock.bind(args.socketfile)
        sock.listen()

        _LOGGER.debug("Loading model: %s", args.model)
        with Whisper(args.model) as whisper:
            _LOGGER.info("Ready")

            # Listen for connections
            while True:
                try:
                    connection, client_address = sock.accept()
                    _LOGGER.debug("Connection from %s", client_address)

                    # Start new thread for client
                    threading.Thread(
                        target=handle_client,
                        args=(connection, whisper),
                        daemon=True,
                    ).start()
                except KeyboardInterrupt:
                    break
                except Exception:
                    _LOGGER.exception("Error communicating with socket client")
    finally:
        os.unlink(args.socketfile)


def handle_client(connection: socket.socket, whisper: Whisper) -> None:
    try:
        is_first_audio = True

        with connection, connection.makefile(mode="rwb") as conn_file:
            audio_bytes = bytes()
            while True:
                event_info = json.loads(conn_file.readline())
                event_type = event_info["type"]

                if event_type == "audio-chunk":
                    if is_first_audio:
                        _LOGGER.debug("Receiving audio")
                        is_first_audio = False

                    num_bytes = event_info["payload_length"]
                    audio_bytes += conn_file.read(num_bytes)
                elif event_type == "audio-stop":
                    _LOGGER.debug("Audio stopped")
                    break

            audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
            audio_array = audio_array.astype(np.float32) / 32768.0
            text = " ".join(whisper.transcribe(audio_array))
            _LOGGER.debug(text)

            transcript_str = (
                json.dumps(
                    {"type": "transcript", "data": {"text": text}}, ensure_ascii=False
                )
                + "\n"
            )
            conn_file.write(transcript_str.encode())
    except Exception:
        _LOGGER.exception("Unexpected error in client thread")


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/whisper-cpp/bin/whisper_cpp_wav2text.py
================================================
#!/usr/bin/env python3
import argparse
import audioop
import logging
import wave
from pathlib import Path

import numpy as np
from whisper_cpp import Whisper

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to whisper.cpp model file")
    parser.add_argument("wav_file", nargs="+", help="Path to WAV file(s) to transcribe")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    _LOGGER.debug("Loading model: %s", args.model)

    with Whisper(args.model) as whisper:
        for wav_path in args.wav_file:
            wav_file: wave.Wave_read = wave.open(wav_path, "rb")
            with wav_file:
                rate = wav_file.getframerate()
                width = wav_file.getsampwidth()
                channels = wav_file.getnchannels()
                audio_bytes = wav_file.readframes(wav_file.getnframes())

                if width != 2:
                    audio_bytes = audioop.lin2lin(audio_bytes, width, 2)

                if channels != 1:
                    audio_bytes = audioop.tomono(audio_bytes, 2, 1.0, 1.0)

                if rate != 16000:
                    audio_bytes, _state = audioop.ratecv(
                        audio_bytes, 2, 1, rate, 16000, None
                    )

                audio_array = np.frombuffer(audio_bytes, dtype=np.int16)
                audio_array = audio_array.astype(np.float32) / 32768.0

                text = " ".join(whisper.transcribe(audio_array))
                print(text)


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/whisper-cpp/lib/Makefile
================================================
UNAME_M := $(shell uname -m)

CFLAGS   = -Iwhisper.cpp -O3 -std=c11   -fPIC
CXXFLAGS = -Iwhisper.cpp -O3 -std=c++11 -fPIC --shared -static-libstdc++
LDFLAGS  =

# Linux
CFLAGS   += -pthread
CXXFLAGS += -pthread

AVX1_M := $(shell grep "avx " /proc/cpuinfo)
ifneq (,$(findstring avx,$(AVX1_M)))
	CFLAGS += -mavx
endif
AVX2_M := $(shell grep "avx2 " /proc/cpuinfo)
ifneq (,$(findstring avx2,$(AVX2_M)))
	CFLAGS += -mavx2
endif
FMA_M := $(shell grep "fma " /proc/cpuinfo)
ifneq (,$(findstring fma,$(FMA_M)))
	CFLAGS += -mfma
endif
F16C_M := $(shell grep "f16c " /proc/cpuinfo)
ifneq (,$(findstring f16c,$(F16C_M)))
	CFLAGS += -mf16c
endif
SSE3_M := $(shell grep "sse3 " /proc/cpuinfo)
ifneq (,$(findstring sse3,$(SSE3_M)))
	CFLAGS += -msse3
endif

# amd64 and arm64 only
ifeq ($(UNAME_M),amd64)
	CFLAGS += -mavx -mavx2 -mfma -mf16c
endif

ifneq ($(filter armv8%,$(UNAME_M)),)
	# Raspberry Pi 4
	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
endif

default: libwhisper.so

libwhisper.so: whisper.cpp/ggml.o whisper.cpp/whisper.cpp
	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)


================================================
FILE: programs/asr/whisper-cpp/lib/whisper_cpp.py
================================================
import ctypes
from pathlib import Path
from typing import Iterable, Union

import numpy as np


# Must match struct in whisper.h
class WhisperFullParams(ctypes.Structure):
    _fields_ = [
        ("strategy", ctypes.c_int),
        #
        ("n_max_text_ctx", ctypes.c_int),
        ("n_threads", ctypes.c_int),
        ("offset_ms", ctypes.c_int),
        ("duration_ms", ctypes.c_int),
        #
        ("translate", ctypes.c_bool),
        ("no_context", ctypes.c_bool),
        ("single_segment", ctypes.c_bool),
        ("print_special", ctypes.c_bool),
        ("print_progress", ctypes.c_bool),
        ("print_realtime", ctypes.c_bool),
        ("print_timestamps", ctypes.c_bool),
        #
        ("token_timestamps", ctypes.c_bool),
        ("thold_pt", ctypes.c_float),
        ("thold_ptsum", ctypes.c_float),
        ("max_len", ctypes.c_int),
        ("max_tokens", ctypes.c_int),
        #
        ("speed_up", ctypes.c_bool),
        ("audio_ctx", ctypes.c_int),
        #
        ("prompt_tokens", ctypes.c_void_p),
        ("prompt_n_tokens", ctypes.c_int),
        #
        ("language", ctypes.c_char_p),
        #
        ("suppress_blank", ctypes.c_bool),
        #
        ("temperature_inc", ctypes.c_float),
        ("entropy_thold", ctypes.c_float),
        ("logprob_thold", ctypes.c_float),
        ("no_speech_thold", ctypes.c_float),
        #
        ("greedy", ctypes.c_int * 1),
        ("beam_search", ctypes.c_int * 3),
        #
        ("new_segment_callback", ctypes.c_void_p),
        ("new_segment_callback_user_data", ctypes.c_void_p),
        #
        ("encoder_begin_callback", ctypes.c_void_p),
        ("encoder_begin_callback_user_data", ctypes.c_void_p),
    ]


class WhisperError(Exception):
    pass


class Whisper:
    def __init__(
        self,
        model_path: Union[str, Path],
        libwhisper_path: Union[str, Path] = "libwhisper.so",
        language: str = "en",
    ):
        self.model_path = Path(model_path)
        self.whisper = ctypes.CDLL(str(libwhisper_path))

        # Set return types
        self.whisper.whisper_init_from_file.restype = ctypes.c_void_p
        self.whisper.whisper_full_default_params.restype = WhisperFullParams
        self.whisper.whisper_full_get_segment_text.restype = ctypes.c_char_p

        # initialize whisper.cpp context
        filename_bytes = str(self.model_path.absolute()).encode("utf-8")
        self.ctx = self.whisper.whisper_init_from_file(filename_bytes)

        # get default whisper parameters and adjust as needed
        self.params = self.whisper.whisper_full_default_params()
        self.params.print_realtime = False
        self.params.print_special = False
        self.params.print_progress = False
        self.params.print_timestamps = False
        self.params.language = language.encode("utf-8")

    def transcribe(self, audio_array: np.ndarray) -> Iterable[str]:
        """Transcribe float32 audio in [0, 1] to text."""
        result = self.whisper.whisper_full(
            ctypes.c_void_p(self.ctx),
            self.params,
            audio_array.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
            len(audio_array),
        )
        if result != 0:
            raise WhisperError(str(result))

        num_segments = self.whisper.whisper_full_n_segments(ctypes.c_void_p(self.ctx))
        for i in range(num_segments):
            text_bytes = self.whisper.whisper_full_get_segment_text(
                ctypes.c_void_p(self.ctx), i
            )
            yield text_bytes.decode("utf-8")

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.whisper.whisper_free(ctypes.c_void_p(self.ctx))
        self.whisper = None
        self.ctx = None


================================================
FILE: programs/asr/whisper-cpp/requirements.txt
================================================
numpy


================================================
FILE: programs/asr/whisper-cpp/script/build_libwhisper
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

docker buildx build "${base_dir}" \
    -f "${base_dir}/Dockerfile.libwhisper" \
    --platform 'linux/amd64,linux/arm64' \
    --output "type=local,dest=${base_dir}/build/"

# -----------------------------------------------------------------------------

echo "Copy the appropriate libwhisper.so from build/ to lib/"


================================================
FILE: programs/asr/whisper-cpp/script/download.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import tarfile
from pathlib import Path
from urllib.request import urlopen

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("setup")

MODELS = ["tiny.en", "base.en"]


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "model",
        nargs="+",
        choices=MODELS,
        help="Pocketsphinx model(s) to download",
    )
    parser.add_argument(
        "--destination", help="Path to destination directory (default: share)"
    )
    parser.add_argument(
        "--link-format",
        default="https://github.com/rhasspy/models/releases/download/v1.0/asr_whisper-cpp-ggml-{model}.tar.gz",
        help="Format string for download URLs",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    if args.destination:
        args.destination = Path(args.destination)
    else:
        # Assume we're in programs/asr/whisper-cpp/script
        data_dir = _DIR.parent.parent.parent.parent / "data"
        args.destination = data_dir / "asr" / "whisper-cpp"

    args.destination.parent.mkdir(parents=True, exist_ok=True)

    for model in args.model:
        url = args.link_format.format(model=model)
        _LOGGER.info("Downloading %s", url)
        with urlopen(url) as response:
            with tarfile.open(mode="r|*", fileobj=response) as tar_gz:
                _LOGGER.info("Extracting to %s", args.destination)
                tar_gz.extractall(args.destination)


if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/whisper-cpp/script/server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

socket_dir="${base_dir}/var/run"
mkdir -p "${socket_dir}"

lib_dir="${base_dir}/lib"
export LD_LIBRARY_PATH="${lib_dir}:${LD_LIBRARY_PATH}"
export PYTHONPATH="${lib_dir}:${PYTHONPATH}"

python3 "${base_dir}/bin/whisper_cpp_server.py" --socketfile "${socket_dir}/whisper-cpp.socket" "$@"


================================================
FILE: programs/asr/whisper-cpp/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

python3 "${this_dir}/setup.py"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/asr/whisper-cpp/script/setup.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import platform
import tarfile
from pathlib import Path
from urllib.request import urlopen

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("setup")

PLATFORMS = {"x86_64": "amd64", "aarch64": "arm64"}


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--platform",
        help="CPU architecture to download (amd64, arm64)",
    )
    parser.add_argument(
        "--destination", help="Path to destination directory (default: lib)"
    )
    parser.add_argument(
        "--link-format",
        default="https://github.com/rhasspy/models/releases/download/v1.0/libwhisper_{platform}.tar.gz",
        help="Format string for download URLs",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    if not args.platform:
        args.platform = platform.machine()

    args.platform = PLATFORMS.get(args.platform, args.platform)

    if not args.destination:
        args.destination = _DIR.parent / "lib"

    args.destination.parent.mkdir(parents=True, exist_ok=True)

    url = args.link_format.format(platform=args.platform)
    _LOGGER.info("Downloading %s", url)
    with urlopen(url) as response:
        with tarfile.open(mode="r|*", fileobj=response) as tar_gz:
            _LOGGER.info("Extracting to %s", args.destination)
            tar_gz.extractall(args.destination)


if __name__ == "__main__":
    main()


================================================
FILE: programs/asr/whisper-cpp/script/wav2text
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

lib_dir="${base_dir}/lib"
export LD_LIBRARY_PATH="${lib_dir}:${LD_LIBRARY_PATH}"
export PYTHONPATH="${lib_dir}:${PYTHONPATH}"

python3 "${base_dir}/bin/whisper_cpp_wav2text.py" "$@"


================================================
FILE: programs/handle/date_time/bin/date_time.py
================================================
#!/usr/bin/env python3
import re
import sys
from datetime import datetime


def main() -> None:
    text = sys.stdin.read().strip().lower()
    words = [re.sub(r"\W", "", word) for word in text.split()]

    now = datetime.now()
    if "time" in words:
        print(now.strftime("%I:%M %p"))
    elif "date" in words:
        print(now.strftime("%A, %B %d, %Y"))


if __name__ == "__main__":
    main()


================================================
FILE: programs/handle/home_assistant/bin/converse.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import sys
from pathlib import Path
from urllib.request import Request, urlopen

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "url",
        help="URL of API endpoint",
    )
    parser.add_argument("token_file", help="Path to file with authorization token")
    parser.add_argument("--language", help="Language code to use")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    token = Path(args.token_file).read_text(encoding="utf-8").strip()
    headers = {"Authorization": f"Bearer {token}"}

    data_dict = {"text": sys.stdin.read()}
    if args.language:
        data_dict["language"] = args.language

    data = json.dumps(data_dict, ensure_ascii=False).encode("utf-8")
    request = Request(args.url, data=data, headers=headers)

    with urlopen(request) as response:
        response = json.loads(response.read())
        response_text = (
            response.get("response", {})
            .get("speech", {})
            .get("plain", {})
            .get("speech", "")
        )
        print(response_text)


if __name__ == "__main__":
    main()


================================================
FILE: programs/intent/regex/bin/regex.py
================================================
#!/usr/bin/env python3
import argparse
import re
from collections import defaultdict
from typing import Dict, List, Optional

from rhasspy3.event import read_event, write_event
from rhasspy3.intent import Entity, Intent, NotRecognized, Recognize


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-i",
        "--intent",
        required=True,
        nargs=2,
        metavar=("name", "regex"),
        action="append",
        default=[],
        help="Intent name and regex",
    )
    args = parser.parse_args()

    # intent name -> [pattern]
    patterns: Dict[str, List[re.Pattern]] = defaultdict(list)

    for intent_name, pattern_str in args.intent:
        patterns[intent_name].append(re.compile(pattern_str, re.IGNORECASE))

    try:
        while True:
            event = read_event()
            if event is None:
                break

            if Recognize.is_type(event.type):
                recognize = Recognize.from_event(event)
                text = _clean(recognize.text)
                intent = _recognize(text, patterns)
                if intent is None:
                    write_event(NotRecognized().event())
                else:
                    write_event(intent.event())
    except KeyboardInterrupt:
        pass


def _clean(text: str) -> str:
    text = " ".join(text.split())
    return text


def _recognize(text: str, patterns: Dict[str, List[re.Pattern]]) -> Optional[Intent]:
    for intent_name, intent_patterns in patterns.items():
        for intent_pattern in intent_patterns:
            match = intent_pattern.match(text)
            if match is None:
                continue

            return Intent(
                name=intent_name,
                entities=[
                    Entity(name=name, value=value)
                    for name, value in match.groupdict().items()
                ],
            )

    return None


if __name__ == "__main__":
    main()


================================================
FILE: programs/mic/pyaudio/README.md
================================================
# PyAudio

Audio input service for Rhasspy based on [PyAudio](https://people.csail.mit.edu/hubert/pyaudio/docs/).


================================================
FILE: programs/mic/pyaudio/bin/pyaudio_events.py
================================================
#!/usr/bin/env python3
import argparse
import logging
from pathlib import Path

from pyaudio_shared import iter_chunks

from rhasspy3.audio import (
    DEFAULT_IN_CHANNELS,
    DEFAULT_IN_RATE,
    DEFAULT_IN_WIDTH,
    DEFAULT_SAMPLES_PER_CHUNK,
    AudioChunk,
)
from rhasspy3.event import write_event

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--rate", type=int, default=DEFAULT_IN_RATE, help="Sample rate (hertz)"
    )
    parser.add_argument(
        "--width", type=int, default=DEFAULT_IN_WIDTH, help="Sample width (bytes)"
    )
    parser.add_argument(
        "--channels", type=int, default=DEFAULT_IN_CHANNELS, help="Sample channel count"
    )
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=DEFAULT_SAMPLES_PER_CHUNK,
        help="Number of samples to process at a time",
    )
    parser.add_argument("--device", help="Name or index of device to use")
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    for chunk in iter_chunks(
        args.device, args.rate, args.width, args.channels, args.samples_per_chunk
    ):
        write_event(AudioChunk(args.rate, args.width, args.channels, chunk).event())


if __name__ == "__main__":
    main()


================================================
FILE: programs/mic/pyaudio/bin/pyaudio_list_mics.py
================================================
#!/usr/bin/env python3

import pyaudio


def main() -> None:
    audio_system = pyaudio.PyAudio()
    for i in range(audio_system.get_device_count()):
        print(audio_system.get_device_info_by_index(i))


if __name__ == "__main__":
    main()


================================================
FILE: programs/mic/pyaudio/bin/pyaudio_raw.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import sys
from pathlib import Path

from pyaudio_shared import iter_chunks

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--rate", type=int, required=True, help="Sample rate (hertz)")
    parser.add_argument("--width", type=int, required=True, help="Sample width (bytes)")
    parser.add_argument(
        "--channels", type=int, required=True, help="Sample channel count"
    )
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        required=True,
        help="Number of samples to process at a time",
    )
    parser.add_argument("--device", help="Name or index of device to use")
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    for chunk in iter_chunks(
        args.device, args.rate, args.width, args.channels, args.samples_per_chunk
    ):
        sys.stdout.buffer.write(chunk)


if __name__ == "__main__":
    main()


================================================
FILE: programs/mic/pyaudio/bin/pyaudio_shared.py
================================================
import logging
from pathlib import Path
from typing import Iterable, Optional, Union

import pyaudio

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def iter_chunks(
    device: Optional[Union[int, str]],
    rate: int,
    width: int,
    channels: int,
    samples_per_chunk: int,
) -> Iterable[bytes]:
    """Open input stream and yield audio chunks."""
    audio_system = pyaudio.PyAudio()
    try:
        if isinstance(device, str):
            try:
                device = int(device)
            except ValueError:
                for i in range(audio_system.get_device_count()):
                    info = audio_system.get_device_info_by_index(i)
                    if device == info["name"]:
                        device = i
                        break

                assert device is not None, f"No device named: {device}"

        _LOGGER.debug("Device: %s", device)
        stream = audio_system.open(
            input_device_index=device,
            format=audio_system.get_format_from_width(width),
            channels=channels,
            rate=rate,
            input=True,
            frames_per_buffer=samples_per_chunk,
        )

        chunk = stream.read(samples_per_chunk)
        while chunk:
            yield chunk
            chunk = stream.read(samples_per_chunk)
    except KeyboardInterrupt:
        pass
    finally:
        audio_system.terminate()


================================================
FILE: programs/mic/pyaudio/requirements.txt
================================================
pyaudio


================================================
FILE: programs/mic/pyaudio/script/events
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/pyaudio_events.py" "$@"


================================================
FILE: programs/mic/pyaudio/script/list_mics
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/pyaudio_list_mics.py" "$@"


================================================
FILE: programs/mic/pyaudio/script/raw
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/pyaudio_raw.py" "$@"


================================================
FILE: programs/mic/pyaudio/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# Install rhasspy3
rhasspy3_dir="${base_dir}/../../../.."
pip3 install -e "${rhasspy3_dir}"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/mic/sounddevice/README.md
================================================
# sounddevice

Audio input service for Rhasspy based on [sounddevice](https://python-sounddevice.readthedocs.io).


================================================
FILE: programs/mic/sounddevice/bin/sounddevice_events.py
================================================
#!/usr/bin/env python3
import argparse
import logging
from pathlib import Path

from sounddevice_shared import iter_chunks

from rhasspy3.audio import (
    DEFAULT_IN_CHANNELS,
    DEFAULT_IN_RATE,
    DEFAULT_IN_WIDTH,
    DEFAULT_SAMPLES_PER_CHUNK,
    AudioChunk,
)
from rhasspy3.event import write_event

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--rate", type=int, default=DEFAULT_IN_RATE, help="Sample rate (hertz)"
    )
    parser.add_argument(
        "--width", type=int, default=DEFAULT_IN_WIDTH, help="Sample width (bytes)"
    )
    parser.add_argument(
        "--channels", type=int, default=DEFAULT_IN_CHANNELS, help="Sample channel count"
    )
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        default=DEFAULT_SAMPLES_PER_CHUNK,
        help="Number of samples to process at a time",
    )
    parser.add_argument("--device", help="Name or index of device to use")
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    for chunk in iter_chunks(
        args.device, args.rate, args.width, args.channels, args.samples_per_chunk
    ):
        write_event(AudioChunk(args.rate, args.width, args.channels, chunk).event())


if __name__ == "__main__":
    main()


================================================
FILE: programs/mic/sounddevice/bin/sounddevice_list_mics.py
================================================
#!/usr/bin/env python3

import sounddevice


def main() -> None:
    for info in sounddevice.query_devices():
        print(info)


if __name__ == "__main__":
    main()


================================================
FILE: programs/mic/sounddevice/bin/sounddevice_raw.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import sys
from pathlib import Path

from sounddevice_shared import iter_chunks

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--rate", type=int, required=True, help="Sample rate (hertz)")
    parser.add_argument("--width", type=int, required=True, help="Sample width (bytes)")
    parser.add_argument(
        "--channels", type=int, required=True, help="Sample channel count"
    )
    parser.add_argument(
        "--samples-per-chunk",
        type=int,
        required=True,
        help="Number of samples to process at a time",
    )
    parser.add_argument("--device", help="Name or index of device to use")
    #
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    for chunk in iter_chunks(
        args.device, args.rate, args.width, args.channels, args.samples_per_chunk
    ):
        sys.stdout.buffer.write(chunk)


if __name__ == "__main__":
    main()


================================================
FILE: programs/mic/sounddevice/bin/sounddevice_shared.py
================================================
import logging
from pathlib import Path
from typing import Iterable, Optional, Union

import sounddevice

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def iter_chunks(
    device: Optional[Union[int, str]],
    rate: int,
    width: int,
    channels: int,
    samples_per_chunk: int,
) -> Iterable[bytes]:
    """Open input stream and yield audio chunks."""
    try:
        if isinstance(device, str):
            try:
                device = int(device)
            except ValueError:
                for i, info in enumerate(sounddevice.query_devices()):
                    if device == info["name"]:
                        device = i
                        break

                assert device is not None, f"No device named: {device}"

        _LOGGER.debug("Device: %s", device)

        with sounddevice.RawInputStream(
            samplerate=rate,
            blocksize=samples_per_chunk,
            device=device,
            channels=channels,
            dtype="int16",
        ) as stream:
            chunk, _overflowed = stream.read(samples_per_chunk)
            while chunk:
                yield chunk
                chunk, _overflowed = stream.read(samples_per_chunk)
    except KeyboardInterrupt:
        pass


================================================
FILE: programs/mic/sounddevice/requirements.txt
================================================
sounddevice


================================================
FILE: programs/mic/sounddevice/script/events
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/sounddevice_events.py" "$@"


================================================
FILE: programs/mic/sounddevice/script/list_mics
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 -m sounddevice


================================================
FILE: programs/mic/sounddevice/script/raw
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/sounddevice_raw.py" "$@"


================================================
FILE: programs/mic/sounddevice/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# Install rhasspy3
rhasspy3_dir="${base_dir}/../../../.."
pip3 install -e "${rhasspy3_dir}"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/mic/udp_raw/bin/udp_raw.py
================================================
#!/usr/bin/env python3
import argparse
import socketserver
from functools import partial

from rhasspy3.audio import (
    DEFAULT_IN_CHANNELS,
    DEFAULT_IN_RATE,
    DEFAULT_IN_WIDTH,
    AudioChunk,
)
from rhasspy3.event import write_event


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--port", type=int, required=True)
    parser.add_argument("--host", default="0.0.0.0")
    #
    parser.add_argument(
        "--rate", type=int, default=DEFAULT_IN_RATE, help="Sample rate (hertz)"
    )
    parser.add_argument(
        "--width", type=int, default=DEFAULT_IN_WIDTH, help="Sample width (bytes)"
    )
    parser.add_argument(
        "--channels",
        type=int,
        default=DEFAULT_IN_CHANNELS,
        help="Sample channel count",
    )
    args = parser.parse_args()

    with socketserver.UDPServer(
        (args.host, args.port),
        partial(MicUDPHandler, args.rate, args.width, args.channels),
    ) as server:
        server.serve_forever()


class MicUDPHandler(socketserver.BaseRequestHandler):
    def __init__(self, rate: int, width: int, channels: int, *args, **kwargs):
        self.rate = rate
        self.width = width
        self.channels = channels
        self.state = None
        super().__init__(*args, **kwargs)

    def handle(self):
        audio_bytes = self.request[0]
        write_event(
            AudioChunk(
                rate=self.rate,
                width=self.width,
                channels=self.channels,
                audio=audio_bytes,
            ).event()
        )


if __name__ == "__main__":
    main()


================================================
FILE: programs/remote/websocket/bin/stream2stream.py
================================================
#!/usr/bin/env python3
import argparse
import asyncio
import json
import logging
import sys
from pathlib import Path
from typing import Optional

from websockets import connect
from websockets.exceptions import ConnectionClosedOK

from rhasspy3.audio import AudioChunk, AudioStart, AudioStop
from rhasspy3.event import Event, read_event, write_event

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("uri")
    args = parser.parse_args()

    async with connect(args.uri) as websocket:
        recv_task = asyncio.create_task(websocket.recv())
        pending = {recv_task}

        while True:
            mic_event = read_event()
            if mic_event is None:
                break

            if not AudioChunk.is_type(mic_event.type):
                continue

            mic_chunk = AudioChunk.from_event(mic_event)
            send_task = asyncio.create_task(websocket.send(mic_chunk.audio))
            pending.add(send_task)

            done, pending = await asyncio.wait(
                pending, return_when=asyncio.FIRST_COMPLETED
            )
            if recv_task in done:
                for task in pending:
                    task.cancel()
                break

            if send_task not in done:
                await send_task

        try:
            while True:
                start: Optional[AudioStart] = None
                data = await websocket.recv()
                try:
                    event = Event.from_dict(json.loads(data))
                    if AudioStart.is_type(event.type):
                        start = AudioStart.from_event(event)
                        break
                except Exception:
                    continue

            assert start is not None

            while True:
                data = await websocket.recv()
                if isinstance(data, bytes):
                    write_event(
                        AudioChunk(
                            start.rate,
                            start.width,
                            start.channels,
                            data,
                        ).event()
                    )
                else:
                    try:
                        event = Event.from_dict(json.loads(data))
                        if AudioStop.is_type(event.type):
                            break
                    except Exception:
                        pass
        except ConnectionClosedOK:
            pass
        finally:
            write_event(AudioStop().event())


async def play(websocket, done_event: asyncio.Event):
    try:
        while True:
            audio_bytes = await websocket.recv()
            if isinstance(audio_bytes, bytes):
                done_event.set()
                sys.stdout.buffer.write(audio_bytes)
                sys.stdout.buffer.flush()
    except ConnectionClosedOK:
        pass


if __name__ == "__main__":
    try:
        asyncio.run(main())
    except KeyboardInterrupt:
        pass


================================================
FILE: programs/remote/websocket/requirements.txt
================================================
websockets


================================================
FILE: programs/remote/websocket/script/run
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/stream2stream.py" "$@"


================================================
FILE: programs/remote/websocket/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# Install rhasspy3
rhasspy3_dir="${base_dir}/../../../.."
pip3 install -e "${rhasspy3_dir}"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/snd/udp_raw/bin/udp_raw.py
================================================
#!/usr/bin/env python3
import argparse
import socket

from rhasspy3.audio import (
    DEFAULT_OUT_CHANNELS,
    DEFAULT_OUT_RATE,
    DEFAULT_OUT_WIDTH,
    AudioChunk,
    AudioChunkConverter,
    AudioStop,
)
from rhasspy3.event import read_event, write_event
from rhasspy3.snd import Played


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("--port", type=int, required=True)
    parser.add_argument("--host", required=True)
    #
    parser.add_argument(
        "--rate", type=int, default=DEFAULT_OUT_RATE, help="Sample rate (hertz)"
    )
    parser.add_argument(
        "--width", type=int, default=DEFAULT_OUT_WIDTH, help="Sample width (bytes)"
    )
    parser.add_argument(
        "--channels",
        type=int,
        default=DEFAULT_OUT_CHANNELS,
        help="Sample channel count",
    )
    #
    args = parser.parse_args()

    converter = AudioChunkConverter(args.rate, args.width, args.channels)
    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    while True:
        event = read_event()
        if event is None:
            break

        if AudioChunk.is_type(event.type):
            chunk = AudioChunk.from_event(event)
            chunk = converter.convert(chunk)
            sock.sendto(chunk.audio, (args.host, args.port))
        elif AudioStop.is_type(event.type):
            break

    write_event(Played().event())


if __name__ == "__main__":
    main()


================================================
FILE: programs/tts/coqui-tts/README.md
================================================
# Coqui-TTS

Text to speech service for Rhasspy based on [Coqui-TTS](https://tts.readthedocs.io).


================================================
FILE: programs/tts/coqui-tts/requirements.txt
================================================
tts


================================================
FILE: programs/tts/coqui-tts/script/list_models
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

tts --list_models "$@"


================================================
FILE: programs/tts/coqui-tts/script/server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

tts-server --model_name 'tts_models/en/ljspeech/vits' "$@"


================================================
FILE: programs/tts/coqui-tts/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/tts/flite/script/download.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import tarfile
from pathlib import Path
from urllib.request import urlopen

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("download")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "language",
        nargs="+",
        choices=("en", "indic"),
        help="Voice language(s) to download",
    )
    parser.add_argument(
        "--destination", help="Path to destination directory (default: share)"
    )
    parser.add_argument(
        "--link-format",
        default="https://github.com/rhasspy/models/releases/download/v1.0/tts_flite-{language}.tar.gz",
        help="Format string for download URLs",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    if not args.destination:
        args.destination = _DIR.parent / "share"

    args.destination.parent.mkdir(parents=True, exist_ok=True)

    for language in args.language:
        url = args.link_format.format(language=language)
        _LOGGER.info("Downloading %s", url)
        with urlopen(url) as response:
            with tarfile.open(mode="r|*", fileobj=response) as tar_gz:
                _LOGGER.info("Extracting to %s", args.destination)
                tar_gz.extractall(args.destination)


if __name__ == "__main__":
    main()


================================================
FILE: programs/tts/flite/script/setup
================================================
#!/usr/bin/env bash
sudo apt-get update
sudo apt-get install flite

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/tts/larynx/README.md
================================================
# Larynx

Text to speech service for Rhasspy based on [Larynx](https://github.com/rhasspy/larynx/).


================================================
FILE: programs/tts/larynx/bin/larynx_client.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import shutil
import sys
from pathlib import Path
from urllib.parse import urlencode
from urllib.request import urlopen

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "url",
        help="URL of API endpoint",
    )
    parser.add_argument("voice", help="VOICE parameter")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    params = {"INPUT_TEXT": sys.stdin.read(), "VOICE": args.voice}
    url = args.url + "?" + urlencode(params)

    _LOGGER.debug(url)

    with urlopen(url) as response:
        shutil.copyfileobj(response, sys.stdout.buffer)


if __name__ == "__main__":
    main()


================================================
FILE: programs/tts/larynx/requirements.txt
================================================
larynx


================================================
FILE: programs/tts/larynx/script/list_models
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

larynx --list "$@"


================================================
FILE: programs/tts/larynx/script/server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

larynx-server "$@"


================================================
FILE: programs/tts/larynx/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt" \
    'jinja2<3.1.0' \
    -f 'https://synesthesiam.github.io/prebuilt-apps/' \
    -f 'https://download.pytorch.org/whl/cpu/torch_stable.html'

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/tts/marytts/bin/marytts.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import shutil
import sys
from pathlib import Path
from urllib.parse import urlencode
from urllib.request import urlopen

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "url",
        help="URL of API endpoint",
    )
    parser.add_argument("voice", help="VOICE parameter")
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    params = {"INPUT_TEXT": sys.stdin.read(), "VOICE": args.voice}
    url = args.url + "?" + urlencode(params)

    _LOGGER.debug(url)

    with urlopen(url) as response:
        shutil.copyfileobj(response, sys.stdout.buffer)


if __name__ == "__main__":
    main()


================================================
FILE: programs/tts/mimic3/README.md
================================================
# Mimic 3

Text to speech service for Rhasspy based on [Mimic 3](https://github.com/mycroftAI/mimic3).


================================================
FILE: programs/tts/mimic3/bin/mimic3_server.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import os
import socket
import threading
from pathlib import Path

from mimic3_tts import (
    DEFAULT_VOICE,
    AudioResult,
    Mimic3Settings,
    Mimic3TextToSpeechSystem,
)

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("voices_dir", help="Path to directory with <language>/<voice>")
    parser.add_argument("--voice", default=DEFAULT_VOICE, help="Name of voice to use")
    parser.add_argument(
        "--socketfile", required=True, help="Path to Unix domain socket file"
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    # Need to unlink socket if it exists
    try:
        os.unlink(args.socketfile)
    except OSError:
        pass

    try:
        # Create socket server
        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        sock.bind(args.socketfile)
        sock.listen()

        mimic3 = Mimic3TextToSpeechSystem(
            Mimic3Settings(
                voices_directories=[args.voices_dir],
                voices_download_dir=args.voices_dir,
            )
        )

        if "#" in args.voice:
            # Case to handle a multi-speaker voice definition
            voice, _speaker = args.voice.split("#", maxsplit=1)
            _LOGGER.debug("Preloading voice: %s", voice)
            mimic3.preload_voice(voice)
        else:
            _LOGGER.debug("Preloading voice: %s", args.voice)
            mimic3.preload_voice(args.voice)

        _LOGGER.info("Ready")

        mimic3.voice = args.voice

        # Listen for connections
        while True:
            try:
                connection, client_address = sock.accept()
                _LOGGER.debug("Connection from %s", client_address)

                # Start new thread for client
                threading.Thread(
                    target=handle_client,
                    args=(connection, mimic3),
                    daemon=True,
                ).start()
            except KeyboardInterrupt:
                break
            except Exception:
                _LOGGER.exception("Error communicating with socket client")
    finally:
        os.unlink(args.socketfile)


def handle_client(connection: socket.socket, mimic3: Mimic3TextToSpeechSystem) -> None:
    try:
        with connection, connection.makefile(mode="rwb") as conn_file:
            while True:
                event_info = json.loads(conn_file.readline())
                event_type = event_info["type"]

                if event_type == "synthesize":
                    text = event_info["data"]["text"]
                    _LOGGER.debug("synthesize: text='%s'", text)

                    mimic3.begin_utterance()
                    mimic3.speak_text(text)
                    results = mimic3.end_utterance()

                    is_first_audio = True
                    for result in results:
                        if not isinstance(result, AudioResult):
                            continue

                        data = {
                            "rate": result.sample_rate_hz,
                            "width": result.sample_width_bytes,
                            "channels": result.num_channels,
                        }

                        if is_first_audio:
                            is_first_audio = False
                            conn_file.write(
                                (
                                    json.dumps({"type": "audio-start", "data": data})
                                    + "\n"
                                ).encode()
                            )

                        conn_file.write(
                            (
                                json.dumps(
                                    {
                                        "type": "audio-chunk",
                                        "data": data,
                                        "payload_length": len(result.audio_bytes),
                                    }
                                )
                                + "\n"
                            ).encode()
                        )
                        conn_file.write(result.audio_bytes)

                    conn_file.write(
                        (
                            json.dumps({"type": "audio-stop"}, ensure_ascii=False)
                            + "\n"
                        ).encode()
                    )
                    break
    except Exception:
        _LOGGER.exception("Unexpected error in client thread")


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/tts/mimic3/requirements.txt
================================================
mycroft-mimic3-tts[all]


================================================
FILE: programs/tts/mimic3/script/server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

socket_dir="${base_dir}/var/run"
mkdir -p "${socket_dir}"

python3 "${base_dir}/bin/mimic3_server.py" --socketfile "${socket_dir}/mimic3.socket" "$@"


================================================
FILE: programs/tts/mimic3/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

mimic3-download --output-dir "${base_dir}/share" 'apope'

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/tts/piper/README.md
================================================
# Piper

Text to speech service for Rhasspy based on [Piper](https://github.com/rhasspy/piper).


## Installation

1. Copy the contents of this directory to `config/programs/tts/piper/`
2. Run `script/setup`
3. Download a model with `script/download.py`
    * Example: `script/download.py english`
    * Models are downloaded to `config/data/tts/piper` directory
4. Test with `bin/piper`
    * Example `echo 'Welcome to the world of speech synthesis.' | bin/piper --model /path/to/en-us-blizzard_lessac-medium.onnx --output_file welcome.wav`


================================================
FILE: programs/tts/piper/bin/piper_server.py
================================================
#!/usr/bin/env python3
import argparse
import json
import logging
import os
import socket
import subprocess
import tempfile
import wave
from pathlib import Path

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to model file (.onnx)")
    parser.add_argument(
        "--socketfile", required=True, help="Path to Unix domain socket file"
    )
    parser.add_argument(
        "--auto-punctuation", default=".?!", help="Automatically add punctuation"
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()

    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    # Need to unlink socket if it exists
    try:
        os.unlink(args.socketfile)
    except OSError:
        pass

    try:
        # Create socket server
        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        sock.bind(args.socketfile)
        sock.listen()

        with tempfile.TemporaryDirectory() as temp_dir:
            command = [
                str(_DIR / "piper"),
                "--model",
                str(args.model),
                "--output_dir",
                temp_dir,
            ]
            with subprocess.Popen(
                command,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                universal_newlines=True,
            ) as proc:
                _LOGGER.info("Ready")

                # Listen for connections
                while True:
                    try:
                        connection, client_address = sock.accept()
                        _LOGGER.debug("Connection from %s", client_address)
                        handle_connection(connection, proc, args)
                    except KeyboardInterrupt:
                        break
                    except Exception:
                        _LOGGER.exception("Error communicating with socket client")
    finally:
        os.unlink(args.socketfile)


def handle_connection(
    connection: socket.socket, proc: subprocess.Popen, args: argparse.Namespace
) -> None:
    assert proc.stdin is not None
    assert proc.stdout is not None

    with connection, connection.makefile(mode="rwb") as conn_file:
        while True:
            event_info = json.loads(conn_file.readline())
            event_type = event_info["type"]

            if event_type != "synthesize":
                continue

            raw_text = event_info["data"]["text"]
            text = raw_text.strip()
            if args.auto_punctuation and text:
                has_punctuation = False
                for punc_char in args.auto_punctuation:
                    if text[-1] == punc_char:
                        has_punctuation = True
                        break

                if not has_punctuation:
                    text = text + args.auto_punctuation[0]

            _LOGGER.debug("synthesize: raw_text=%s, text='%s'", raw_text, text)

            # Text in, file path out
            print(text.strip(), file=proc.stdin, flush=True)
            output_path = proc.stdout.readline().strip()
            _LOGGER.debug(output_path)

            wav_file: wave.Wave_read = wave.open(output_path, "rb")
            with wav_file:
                data = {
                    "rate": wav_file.getframerate(),
                    "width": wav_file.getsampwidth(),
                    "channels": wav_file.getnchannels(),
                }

                conn_file.write(
                    (
                        json.dumps(
                            {"type": "audio-start", "data": data}, ensure_ascii=False
                        )
                        + "\n"
                    ).encode()
                )

                # Audio
                audio_bytes = wav_file.readframes(wav_file.getnframes())
                conn_file.write(
                    (
                        json.dumps(
                            {
                                "type": "audio-chunk",
                                "data": data,
                                "payload_length": len(audio_bytes),
                            },
                            ensure_ascii=False,
                        )
                        + "\n"
                    ).encode()
                )
                conn_file.write(audio_bytes)

            conn_file.write(
                (json.dumps({"type": "audio-stop"}, ensure_ascii=False) + "\n").encode()
            )
            os.unlink(output_path)
            break


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/tts/piper/script/download.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import tarfile
from pathlib import Path
from urllib.request import urlopen

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("setup")

_VOICE_NAMES = [
    "ca-upc_ona-x-low",
    "ca-upc_pau-x-low",
    "da-nst_talesyntese-medium",
    "de-eva_k-x-low",
    "de-thorsten-low",
    "en-gb-alan-low",
    "en-gb-southern_english_female-low",
    "en-us-amy-low",
    "en-us-kathleen-low",
    "en-us-lessac-low",
    "en-us-lessac-medium",
    "en-us-libritts-high",
    "en-us-ryan-high",
    "en-us-ryan-low",
    "en-us-ryan-medium",
    "es-carlfm-x-low",
    "fi-harri-low",
    "fr-siwis-low",
    "fr-siwis-medium",
    "it-riccardo_fasol-x-low",
    "kk-iseke-x-low",
    "kk-issai-high",
    "kk-raya-x-low",
    "ne-google-medium",
    "ne-google-x-low",
    "nl-mls_7432-low",
    "nl-nathalie-x-low",
    "nl-rdh-medium",
    "nl-rdh-x-low",
    "no-talesyntese-medium",
    "pl-mls_6892-low",
    "pt-br-edresson-low",
    "uk-lada-x-low",
    "vi-25hours-single-low",
    "vi-vivos-x-low",
    "zh-cn-huayan-x-low",
]

_VOICES = {
    "catalan": "ca",
    "ca": "ca-upc_ona-x-low",
    #
    "danish": "da",
    "da": "da-nst_talesyntese-medium",
    #
    "german": "de",
    "de": "de-thorsten-low",
    #
    "english": "en",
    "en": "en-us",
    "en-us": "en-us-lessac-low",
    "en-gb": "en-gb-alan-low",
    #
    "spanish": "es",
    "es": "es-carlfm-x-low",
    #
    "french": "fr",
    "fr": "fr-siwis-low",
    #
    "italian": "it",
    "it": "it-riccardo_fasol-x-low",
    #
    "kazakh": "kk",
    "kk": "kk-iseke-x-low",
    #
    "nepali": "ne",
    "ne": "ne-google-x-low",
    #
    "dutch": "nl",
    "nl": "nl-rdh-x-low",
    #
    "norwegian": "no",
    "no": "no-talesyntese-medium",
    #
    "polish": "pl",
    "pl": "pl-mls_6892-low",
    #
    "portuguese": "pt",
    "pt": "pt-br",
    "pt-br": "pt-br-edresson-low",
    #
    "ukrainian": "uk",
    "uk": "uk-lada-x-low",
    #
    "vietnamese": "vi",
    "vi": "vi-25hours-single-low",
    #
    "chinese": "zh",
    "zh": "zh-cn",
    "zh-cn": "zh-cn-huayan-x-low",
}


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "name",
        nargs="+",
        choices=sorted(_VOICES.keys()),
        help="Voice language(s) to download",
    )
    parser.add_argument(
        "--destination", help="Path to destination directory (default: share)"
    )
    parser.add_argument(
        "--link-format",
        default="https://github.com/rhasspy/piper/releases/download/v0.0.2/voice-{name}.tar.gz",
        help="Format string for download URLs",
    )
    parser.add_argument(
        "--dry-run", action="store_true", help="Don't actually download"
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    if args.destination:
        args.destination = Path(args.destination)
    else:
        # Assume we're in programs/tts/piper/script
        data_dir = _DIR.parent.parent.parent.parent / "data"
        args.destination = data_dir / "tts" / "piper"

    args.destination.parent.mkdir(parents=True, exist_ok=True)

    for voice_name in _VOICE_NAMES:
        _VOICES[voice_name] = voice_name

    for name in args.name:
        resolved_name = _VOICES[name]
        while resolved_name != name:
            name = resolved_name
            resolved_name = _VOICES[name]

        url = args.link_format.format(name=name)
        _LOGGER.info("Downloading %s", url)

        if args.dry_run:
            return

        with urlopen(url) as response:
            with tarfile.open(mode="r|*", fileobj=response) as tar_gz:
                _LOGGER.info("Extracting to %s", args.destination)
                tar_gz.extractall(args.destination)


if __name__ == "__main__":
    main()


================================================
FILE: programs/tts/piper/script/server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

socket_dir="${base_dir}/var/run"
mkdir -p "${socket_dir}"

python3 "${base_dir}/bin/piper_server.py" --socketfile "${socket_dir}/piper.socket" "$@"


================================================
FILE: programs/tts/piper/script/setup.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import platform
import shutil
import tarfile
import tempfile
from pathlib import Path
from urllib.request import urlopen

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("setup")

PLATFORMS = {"x86_64": "amd64", "aarch64": "arm64"}


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--platform",
        help="CPU architecture to download (amd64, arm64)",
    )
    parser.add_argument(
        "--destination", help="Path to destination directory (default: bin)"
    )
    parser.add_argument(
        "--link-format",
        default="https://github.com/rhasspy/piper/releases/download/v0.0.2/piper_{platform}.tar.gz",
        help="Format string for download URLs",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    if not args.platform:
        args.platform = platform.machine()

    args.platform = PLATFORMS.get(args.platform, args.platform)

    if not args.destination:
        args.destination = _DIR.parent / "bin"
    else:
        args.destination = Path(args.destination)

    args.destination.mkdir(parents=True, exist_ok=True)

    url = args.link_format.format(platform=args.platform)
    _LOGGER.info("Downloading %s", url)
    with urlopen(url) as response, tempfile.TemporaryDirectory() as temp_dir_str:
        temp_dir = Path(temp_dir_str)
        with tarfile.open(mode="r|*", fileobj=response) as tar_gz:
            _LOGGER.info("Extracting to %s", temp_dir)
            tar_gz.extractall(temp_dir)

        # Move piper/ contents
        piper_dir = temp_dir / "piper"
        for path in piper_dir.iterdir():
            rel_path = path.relative_to(piper_dir)
            if path.is_dir():
                shutil.copytree(path, args.destination / rel_path, symlinks=True)
            else:
                shutil.copy(path, args.destination / rel_path, follow_symlinks=False)


if __name__ == "__main__":
    main()


================================================
FILE: programs/vad/energy/bin/energy_speech_prob.py
================================================
#!/usr/bin/env python3
import argparse
import audioop
import logging
import sys
from pathlib import Path

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--threshold",
        type=float,
        required=True,
        help="Energy threshold above which is considered speech",
    )
    parser.add_argument(
        "--width",
        type=int,
        required=True,
        help="Sample width bytes",
    )
    parser.add_argument(
        "--samples-per-chunk",
        required=True,
        type=int,
        help="Samples to send to command at a time",
    )
    #
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    bytes_per_chunk = args.samples_per_chunk * args.width

    try:
        chunk = sys.stdin.buffer.read(bytes_per_chunk)
        while chunk:
            energy = get_debiased_energy(chunk, args.width)
            speech_probability = 1 if energy > args.threshold else 0
            print(speech_probability, flush=True)
            chunk = sys.stdin.buffer.read(bytes_per_chunk)
    except KeyboardInterrupt:
        pass


# -----------------------------------------------------------------------------


def get_debiased_energy(audio_data: bytes, width: int) -> float:
    """Compute RMS of debiased audio."""
    # Thanks to the speech_recognition library!
    # https://github.com/Uberi/speech_recognition/blob/master/speech_recognition/__init__.py
    energy = -audioop.rms(audio_data, width)
    energy_bytes = bytes([energy & 0xFF, (energy >> 8) & 0xFF])
    debiased_energy = audioop.rms(
        audioop.add(audio_data, energy_bytes * (len(audio_data) // width), width),
        width,
    )

    return debiased_energy


if __name__ == "__main__":
    main()


================================================
FILE: programs/vad/silero/README.md
================================================
# Silero VAD

Voice activity detection service for Rhasspy based on [silero-vad](https://github.com/snakers4/silero-vad).


================================================
FILE: programs/vad/silero/bin/silero_speech_prob.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Union

import numpy as np
import onnxruntime

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to Silero model")
    parser.add_argument("--samples-per-chunk", type=int, default=512)
    #
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    bytes_per_chunk = args.samples_per_chunk * 2  # 16-bit
    detector = SileroDetector(args.model)
    detector.start()

    try:
        chunk = sys.stdin.buffer.read(bytes_per_chunk)
        while chunk:
            speech_probability = detector.get_speech_probability(chunk)
            print(speech_probability, flush=True)
            chunk = sys.stdin.buffer.read(bytes_per_chunk)
    except (KeyboardInterrupt, BrokenPipeError):
        pass


# -----------------------------------------------------------------------------


@dataclass
class SileroDetector:
    model: Union[str, Path]
    _session: Optional[onnxruntime.InferenceSession] = None
    _h_array: Optional[np.ndarray] = None
    _c_array: Optional[np.ndarray] = None

    def start(self):
        _LOGGER.debug("Loading VAD model: %s", self.model)
        self._session = onnxruntime.InferenceSession(str(self.model))
        self._session.intra_op_num_threads = 1
        self._session.inter_op_num_threads = 1

        self._h_array = np.zeros((2, 1, 64)).astype("float32")
        self._c_array = np.zeros((2, 1, 64)).astype("float32")

    def get_speech_probability(self, chunk: bytes) -> float:
        assert self._session is not None
        audio_array = np.frombuffer(chunk, dtype=np.int16)

        # Add batch dimension
        audio_array = np.expand_dims(audio_array, 0)

        ort_inputs = {
            "input": audio_array.astype(np.float32),
            "h0": self._h_array,
            "c0": self._c_array,
        }
        ort_outs = self._session.run(None, ort_inputs)
        out, self._h_array, self._c_array = ort_outs
        probability = out.squeeze(2)[:, 1].item()
        return probability

    def stop(self):
        self._session = None
        self._h_array = None
        self._c_array = None

    def reset(self):
        self._h_array.fill(0)
        self._c_array.fill(0)


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/vad/silero/requirements.txt
================================================
onnxruntime
numpy


================================================
FILE: programs/vad/silero/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# Install rhasspy3
rhasspy3_dir="${base_dir}/../../../.."
pip3 install -e "${rhasspy3_dir}"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/vad/silero/script/speech_prob
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/silero_speech_prob.py" "$@"


================================================
FILE: programs/vad/webrtcvad/README.md
================================================
# webrtcvad

Voice activity detection service for Rhasspy based on [webrtcvad](https://pypi.org/project/webrtcvad/).


================================================
FILE: programs/vad/webrtcvad/bin/webrtcvad_speech_prob.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import sys
from pathlib import Path

import webrtcvad

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "mode",
        choices=(0, 1, 2, 3),
        type=int,
        help="Aggressiveness in filtering out non-speech",
    )
    parser.add_argument(
        "--rate",
        type=int,
        default=16000,
        help="Sample rate (hz)",
    )
    parser.add_argument(
        "--width",
        type=int,
        default=2,
        help="Sample width bytes",
    )
    parser.add_argument("--samples-per-chunk", type=int, default=480)
    #
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    chunk_ms = 1000 * (args.samples_per_chunk / args.rate)
    assert chunk_ms in [10, 20, 30], (
        "Sample rate and chunk size must make for 10, 20, or 30 ms buffer sizes,"
        + f" assuming mono audio (got {chunk_ms} ms)"
    )

    bytes_per_chunk = args.samples_per_chunk * args.width
    vad = webrtcvad.Vad()
    vad.set_mode(args.mode)

    try:
        chunk = sys.stdin.buffer.read(bytes_per_chunk)
        while chunk:
            speech_probability = 1 if vad.is_speech(chunk, args.rate) else 0
            print(speech_probability, flush=True)
            chunk = sys.stdin.buffer.read(bytes_per_chunk)
    except KeyboardInterrupt:
        pass


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/vad/webrtcvad/requirements.txt
================================================
webrtcvad


================================================
FILE: programs/vad/webrtcvad/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# Install rhasspy3
rhasspy3_dir="${base_dir}/../../../.."
pip3 install -e "${rhasspy3_dir}"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/vad/webrtcvad/script/speech_prob
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/webrtcvad_speech_prob.py" "$@"


================================================
FILE: programs/wake/porcupine1/bin/list_models.py
================================================
#!/usr/bin/env python3
from pathlib import Path

import pvporcupine


def main() -> None:
    """Main method."""

    for keyword_path in sorted(pvporcupine.pv_keyword_paths("").values()):
        model_name = Path(keyword_path).name
        print(model_name)


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/wake/porcupine1/bin/porcupine_raw_text.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import struct
import sys
from pathlib import Path

from porcupine_shared import get_arg_parser, load_porcupine

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)

# -----------------------------------------------------------------------------


def main() -> None:
    """Main method."""
    parser = get_arg_parser()
    parser.add_argument("--samples-per-chunk", type=int, default=512)
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    porcupine, names = load_porcupine(args)

    chunk_format = "h" * porcupine.frame_length
    bytes_per_chunk = porcupine.frame_length * 2

    # Read 16Khz, 16-bit mono PCM from stdin
    try:
        chunk = bytes()
        next_chunk = sys.stdin.buffer.read(bytes_per_chunk)
        while next_chunk:
            while len(chunk) >= bytes_per_chunk:
                unpacked_chunk = struct.unpack_from(
                    chunk_format, chunk[:bytes_per_chunk]
                )
                keyword_index = porcupine.process(unpacked_chunk)
                if keyword_index >= 0:
                    print(names[keyword_index], flush=True)

                chunk = chunk[bytes_per_chunk:]

            next_chunk = sys.stdin.buffer.read(bytes_per_chunk)
            chunk += next_chunk
    except KeyboardInterrupt:
        pass


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/wake/porcupine1/bin/porcupine_shared.py
================================================
import argparse
from pathlib import Path
from typing import List, Tuple

import pvporcupine


def get_arg_parser() -> argparse.ArgumentParser:
    """Get shared command-line argument parser."""
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        required=True,
        action="append",
        nargs="+",
        help="Keyword model settings (path, [sensitivity])",
    )
    parser.add_argument(
        "--lang_model",
        help="Path of the language model (.pv file), default is English",
    )
    parser.add_argument(
        "--debug", action="store_true", help="Print DEBUG messages to console"
    )
    return parser


def load_porcupine(args: argparse.Namespace) -> Tuple[pvporcupine.Porcupine, List[str]]:
    """Loads porcupine keywords. Returns Porcupine object and list of keyword names (in order)."""
    # Path to embedded keywords
    keyword_dir = Path(next(iter(pvporcupine.pv_keyword_paths("").values()))).parent

    names: List[str] = []
    keyword_paths: List[Path] = []
    sensitivities: List[float] = []

    model_path = (
        str(Path(args.lang_model).absolute()) if args.lang_model is not None else None
    )

    for model_settings in args.model:
        keyword_path_str = model_settings[0]
        keyword_path = Path(keyword_path_str)
        if not keyword_path.exists():
            keyword_path = keyword_dir / keyword_path_str
            assert keyword_path.exists(), f"Cannot find {keyword_path_str}"

        keyword_paths.append(keyword_path)
        names.append(keyword_path.stem)

        sensitivity = 0.5
        if len(model_settings) > 1:
            sensitivity = float(model_settings[1])

        sensitivities.append(sensitivity)

    porcupine = pvporcupine.create(
        keyword_paths=[str(keyword_path.absolute()) for keyword_path in keyword_paths],
        sensitivities=sensitivities,
        model_path=model_path,
    )

    return porcupine, names


================================================
FILE: programs/wake/porcupine1/bin/porcupine_stream.py
================================================
#!/usr/bin/env python3
import logging
import struct
from pathlib import Path

from porcupine_shared import get_arg_parser, load_porcupine

from rhasspy3.audio import AudioChunk, AudioStop
from rhasspy3.event import read_event, write_event
from rhasspy3.wake import Detection, NotDetected

_FILE = Path(__file__)
_DIR = _FILE.parent
_LOGGER = logging.getLogger(_FILE.stem)

# -----------------------------------------------------------------------------


def main() -> None:
    """Main method."""
    parser = get_arg_parser()
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    porcupine, names = load_porcupine(args)

    chunk_format = "h" * porcupine.frame_length
    bytes_per_chunk = porcupine.frame_length * 2  # 16-bit width
    audio_bytes = bytes()
    is_detected = False

    try:
        while True:
            event = read_event()
            if event is None:
                break

            if AudioStop.is_type(event.type):
                break

            if not AudioChunk.is_type(event.type):
                continue

            chunk = AudioChunk.from_event(event)
            audio_bytes += chunk.audio

            while len(audio_bytes) >= bytes_per_chunk:
                unpacked_chunk = struct.unpack_from(
                    chunk_format, audio_bytes[:bytes_per_chunk]
                )
                keyword_index = porcupine.process(unpacked_chunk)
                if keyword_index >= 0:
                    write_event(
                        Detection(
                            name=names[keyword_index], timestamp=chunk.timestamp
                        ).event()
                    )
                    is_detected = True

                audio_bytes = audio_bytes[bytes_per_chunk:]

        if is_detected:
            write_event(NotDetected().event())
    except KeyboardInterrupt:
        pass


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/wake/porcupine1/requirements.txt
================================================
pvporcupine~=1.9.0


================================================
FILE: programs/wake/porcupine1/script/download.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import tarfile
from pathlib import Path
from urllib.request import urlopen

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("download")


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--destination", help="Path to destination directory (default: data)"
    )
    parser.add_argument(
        "--url",
        default="https://github.com/rhasspy/models/releases/download/v1.0/wake_porcupine1-data.tar.gz",
        help="URL of porcupine1 data",
    )
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    if args.destination:
        args.destination = Path(args.destination)
    else:
        # Assume we're in programs/wake/porcupine1/script
        data_dir = _DIR.parent.parent.parent.parent / "data"
        args.destination = data_dir / "wake" / "porcupine1"

    args.destination.parent.mkdir(parents=True, exist_ok=True)

    _LOGGER.info("Downloading %s", args.url)
    with urlopen(args.url) as response:
        with tarfile.open(mode="r|*", fileobj=response) as tar_gz:
            _LOGGER.info("Extracting to %s", args.destination)
            tar_gz.extractall(args.destination)


if __name__ == "__main__":
    main()


================================================
FILE: programs/wake/porcupine1/script/list_models
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/list_models.py" "$@"


================================================
FILE: programs/wake/porcupine1/script/raw2text
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

python3 "${base_dir}/bin/porcupine_raw_text.py" "$@"


================================================
FILE: programs/wake/porcupine1/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/wake/precise-lite/bin/precise.py
================================================
#!/usr/bin/env python3
# Copyright 2021 Mycroft AI Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import logging
import os
import sys
import typing
from dataclasses import dataclass
from enum import IntEnum
from math import floor
from pathlib import Path
from typing import Any, Optional, Union

import numpy as np
import tflite_runtime.interpreter as tflite
from sonopy import mfcc_spec

MAX_WAV_VALUE = 32768
_log = logging.getLogger("mycroft_hotword")


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("model", help="Path to TFLite model")
    parser.add_argument(
        "--sensitivity",
        type=float,
        default=0.8,
        help="Model sensitivity (0-1, default: 0.8)",
    )
    parser.add_argument(
        "--trigger-level",
        type=int,
        default=4,
        help="Number of activations before detection occurs (default: 4)",
    )
    parser.add_argument(
        "--chunk-size",
        type=int,
        default=2048,
        help="Number of bytes to read at a time from stdin",
    )
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    args.model = Path(args.model).absolute()
    engine = TFLiteHotWordEngine(
        local_model_file=args.model,
        sensitivity=args.sensitivity,
        trigger_level=args.trigger_level,
        chunk_size=args.chunk_size,
    )

    if os.isatty(sys.stdin.fileno()):
        print("Reading raw 16-bit 16khz mono audio from stdin", file=sys.stderr)

    is_first_audio = True
    try:
        while True:
            chunk = sys.stdin.buffer.read(args.chunk_size)
            if not chunk:
                break

            if is_first_audio:
                _log.info("Receiving audio")
                is_first_audio = False

            engine.update(chunk)

            if engine.found_wake_word(None):
                print(args.model.name, flush=True)
    except KeyboardInterrupt:
        pass


# -----------------------------------------------------------------------------


class TFLiteHotWordEngine:
    def __init__(
        self,
        local_model_file: Union[str, Path],
        sensitivity: float = 0.7,
        trigger_level: int = 4,
        chunk_size: int = 2048,
    ):
        self.sensitivity = sensitivity
        self.trigger_level = trigger_level
        self.chunk_size = chunk_size

        self.model_path = Path(local_model_file).absolute()

        self._interpreter: Optional[tflite.Interpreter] = None
        self._params: Optional[ListenerParams] = None
        self._input_details: Optional[Any] = None
        self._output_details: Optional[Any] = None

        # Rolling window of MFCCs (fixed sized)
        self._inputs: Optional[np.ndarray] = None

        # Current MFCC timestep
        self._inputs_idx: int = 0

        # Bytes for one window of audio
        self._window_bytes: int = 0

        # Bytes for one MFCC hop
        self._hop_bytes: int = 0

        # Raw audio
        self._chunk_buffer = bytes()

        # Activation level (> trigger_level = wake word found)
        self._activation: int = 0

        # True if wake word was found during last update
        self._is_found = False

        # There doesn't seem to be an initialize() method for wake word plugins,
        # so we'll load the model here.
        self._load_model()

        # Last probability
        self._probability: Optional[float] = None

    def _load_model(self):
        _log.debug("Loading model from %s", self.model_path)
        self._interpreter = tflite.Interpreter(model_path=str(self.model_path))
        self._interpreter.allocate_tensors()
        self._input_details = self._interpreter.get_input_details()
        self._output_details = self._interpreter.get_output_details()

        # TODO: Load these from adjacent file
        self._params = ListenerParams()

        self._window_bytes = self._params.window_samples * self._params.sample_depth
        self._hop_bytes = self._params.hop_samples * self._params.sample_depth

        # Rolling window of MFCCs (fixed sized)
        self._inputs = np.zeros(
            (1, self._params.n_features, self._params.n_mfcc), dtype=np.float32
        )

    def update(self, chunk):
        self._is_found = False
        self._chunk_buffer += chunk
        self._probability = None

        # Process all available windows
        while len(self._chunk_buffer) >= self._window_bytes:
            # Process current audio
            audio = buffer_to_audio(self._chunk_buffer)

            # TODO: Implement different MFCC algorithms
            mfccs = mfcc_spec(
                audio,
                self._params.sample_rate,
                (self._params.window_samples, self._params.hop_samples),
                num_filt=self._params.n_filt,
                fft_size=self._params.n_fft,
                num_coeffs=self._params.n_mfcc,
            )

            num_timesteps = mfccs.shape[0]

            # Remove processed audio from buffer
            self._chunk_buffer = self._chunk_buffer[num_timesteps * self._hop_bytes :]

            # Check if we have a full set of inputs yet
            inputs_end_idx = self._inputs_idx + num_timesteps
            if inputs_end_idx > self._inputs.shape[1]:
                # Full set, need to roll back existing inputs
                self._inputs = np.roll(self._inputs, -num_timesteps, axis=1)
                inputs_end_idx = self._inputs.shape[1]
                self._inputs_idx = inputs_end_idx - num_timesteps

            # Insert new MFCCs at the end
            self._inputs[0, self._inputs_idx : inputs_end_idx, :] = mfccs
            self._inputs_idx += num_timesteps
            if inputs_end_idx < self._inputs.shape[1]:
                # Don't have a full set of inputs yet
                continue

            # TODO: Add deltas

            # raw_output
            self._interpreter.set_tensor(self._input_details[0]["index"], self._inputs)
            self._interpreter.invoke()
            raw_output = self._interpreter.get_tensor(self._output_details[0]["index"])
            prob = raw_output[0][0]

            if (prob < 0.0) or (prob > 1.0):
                # TODO: Handle out of range.
                # Not seeing these currently, so ignoring.
                continue

            self._probability = prob.item()

            # Decode
            activated = prob > 1.0 - self.sensitivity
            triggered = False
            if activated or (self._activation < 0):
                # Increase activation
                self._activation += 1

                triggered = self._activation > self.trigger_level
                if triggered or (activated and (self._activation < 0)):
                    # Push activation down far to avoid an accidental re-activation
                    self._activation = -(8 * 2048) // self.chunk_size
            elif self._activation > 0:
                # Decrease activation
                self._activation -= 1

            if triggered:
                self._is_found = True
                _log.debug("Triggered")
                break

        return self._is_found

    def found_wake_word(self, frame_data):
        return self._is_found

    def reset(self):
        self._inputs = np.zeros(
            (1, self._params.n_features, self._params.n_mfcc), dtype=np.float32
        )
        self._activation = 0
        self._is_found = False
        self._inputs_idx = 0
        self._chunk_buffer = bytes()

    @property
    def probability(self) -> Optional[float]:
        return self._probability


# -----------------------------------------------------------------------------


class Vectorizer(IntEnum):
    """
    Chooses which function to call to vectorize audio

    Options:
        mels: Convert to a compressed Mel spectrogram
        mfccs: Convert to a MFCC spectrogram
        speechpy_mfccs: Legacy option to convert to MFCCs using old library
    """

    mels = 1
    mfccs = 2
    speechpy_mfccs = 3


@dataclass
class ListenerParams:
    """
    General pipeline information:
     - Audio goes through a series of transformations to convert raw audio into machine readable data
     - These transformations are as follows:
       - Raw audio -> chopped audio
         - buffer_t, sample_depth: Input audio loaded and truncated using these value
         - window_t, hop_t: Linear audio chopped into overlapping frames using a sliding window
       - Chopped audio -> FFT spectrogram
         - n_fft, sample_rate: Each audio frame is converted to n_fft frequency intensities
       - FFT spectrogram -> Mel spectrogram (compressed)
         - n_filt: Each fft frame is compressed to n_filt summarized mel frequency bins/bands
       - Mel spectrogram -> MFCC
         - n_mfcc: Each mel frame is converted to MFCCs and the first n_mfcc values are taken
       - Disabled by default: Last phase -> Delta vectors
         - use_delta: If this value is true, the difference between consecutive vectors is concatenated to each frame

    Parameters for audio pipeline:
     - buffer_t: Input size of audio. Wakeword must fit within this time
     - window_t: Time of the window used to calculate a single spectrogram frame
     - hop_t: Time the window advances forward to calculate the next spectrogram frame
     - sample_rate: Input audio sample rate
     - sample_depth: Bytes per input audio sample
     - n_fft: Size of FFT to generate from audio frame
     - n_filt: Number of filters to compress FFT to
     - n_mfcc: Number of MFCC coefficients to use
     - use_delta: If True, generates "delta vectors" before sending to network
     - vectorizer: The type of input fed into the network. Options listed in class Vectorizer
     - threshold_config: Output distribution configuration automatically generated from precise-calc-threshold
     - threshold_center: Output distribution center automatically generated from precise-calc-threshold
    """

    buffer_t: float = 1.5
    window_t: float = 0.1
    hop_t: float = 0.05
    sample_rate: int = 16000
    sample_depth: int = 2
    n_fft: int = 512
    n_filt: int = 20
    n_mfcc: int = 13
    use_delta: bool = False
    vectorizer: int = Vectorizer.mfccs
    threshold_config: typing.Tuple[typing.Tuple[int, ...], ...] = ((6, 4),)
    threshold_center: float = 0.2

    @property
    def buffer_samples(self):
        """buffer_t converted to samples, truncating partial frames"""
        samples = int(self.sample_rate * self.buffer_t + 0.5)
        return self.hop_samples * (samples // self.hop_samples)

    @property
    def n_features(self):
        """Number of timesteps in one input to the network"""
        return 1 + int(
            floor((self.buffer_samples - self.window_samples) / self.hop_samples)
        )

    @property
    def window_samples(self):
        """window_t converted to samples"""
        return int(self.sample_rate * self.window_t + 0.5)

    @property
    def hop_samples(self):
        """hop_t converted to samples"""
        return int(self.sample_rate * self.hop_t + 0.5)

    @property
    def max_samples(self):
        """The input size converted to audio samples"""
        return int(self.buffer_t * self.sample_rate)

    @property
    def feature_size(self):
        """The size of an input vector generated with these parameters"""
        num_features = {
            Vectorizer.mfccs: self.n_mfcc,
            Vectorizer.mels: self.n_filt,
            Vectorizer.speechpy_mfccs: self.n_mfcc,
        }[self.vectorizer]
        if self.use_delta:
            num_features *= 2
        return num_features


def chunk_audio(
    audio: np.ndarray, chunk_size: int
) -> typing.Generator[np.ndarray, None, None]:
    for i in range(chunk_size, len(audio), chunk_size):
        yield audio[i - chunk_size : i]


def buffer_to_audio(audio_buffer: bytes) -> np.ndarray:
    """Convert a raw mono audio byte string to numpy array of floats"""
    return np.frombuffer(audio_buffer, dtype="<i2").astype(
        np.float32, order="C"
    ) / float(MAX_WAV_VALUE)


def audio_to_buffer(audio: np.ndarray) -> bytes:
    """Convert a numpy array of floats to raw mono audio"""
    return (audio * MAX_WAV_VALUE).astype("<i2").tobytes()


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/wake/precise-lite/requirements.txt
================================================
numpy
sonopy~=0.1.0
tflite_runtime>=2.5.0,<3.0


================================================
FILE: programs/wake/precise-lite/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: programs/wake/snowboy/bin/snowboy_raw_text.py
================================================
#!/usr/bin/env python3
import argparse
import logging
import sys
from pathlib import Path
from typing import Dict

from snowboy import snowboydecoder, snowboydetect

_LOGGER = logging.getLogger("snowboy_raw_text")

# -----------------------------------------------------------------------------


def main() -> None:
    """Main method."""
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--model",
        required=True,
        action="append",
        nargs="+",
        help="Snowboy model settings (path, [sensitivity], [audio_gain], [apply_frontend])",
    )
    parser.add_argument("--samples-per-chunk", type=int, default=1024)
    args = parser.parse_args()

    # logging.basicConfig wouldn't work if a handler already existed.
    # snowboy must mess with logging, so this resets it.
    logging.getLogger().handlers = []
    logging.basicConfig(level=logging.INFO)

    # Load model settings
    detectors: Dict[str, snowboydetect.SnowboyDetect] = {}

    for model_settings in args.model:
        model_path = Path(model_settings[0])

        sensitivity = "0.5"
        if len(model_settings) > 1:
            sensitivity = model_settings[1]

        audio_gain = 1.0
        if len(model_settings) > 2:
            audio_gain = float(model_settings[2])

        apply_frontend = False
        if len(model_settings) > 3:
            apply_frontend = model_settings[3].strip().lower() == "true"

        detector = snowboydetect.SnowboyDetect(
            snowboydecoder.RESOURCE_FILE.encode(), str(model_path).encode()
        )

        detector.SetSensitivity(sensitivity.encode())
        detector.SetAudioGain(audio_gain)
        detector.ApplyFrontend(apply_frontend)

        detectors[model_path.stem] = detector

    # Read 16Khz, 16-bit mono PCM from stdin
    bytes_per_chunk = args.samples_per_chunk * 2
    try:
        chunk = bytes()
        next_chunk = sys.stdin.buffer.read(bytes_per_chunk)
        while next_chunk:
            while len(chunk) >= bytes_per_chunk:
                for name, detector in detectors.items():
                    # Return is:
                    # -2 silence
                    # -1 error
                    #  0 voice
                    #  n index n-1
                    result_index = detector.RunDetection(chunk[:bytes_per_chunk])

                    if result_index > 0:
                        # Detection
                        print(name, flush=True)

                chunk = chunk[bytes_per_chunk:]

            next_chunk = sys.stdin.buffer.read(args.samples_per_chunk)
            chunk += next_chunk
    except KeyboardInterrupt:
        pass


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: programs/wake/snowboy/requirements.txt
================================================
snowboy @ https://github.com/Kitt-AI/snowboy/archive/v1.3.0.tar.gz


================================================
FILE: programs/wake/snowboy/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

if [ ! -d "${venv}" ]; then
    # Create virtual environment
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install -r "${base_dir}/requirements.txt"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: pylintrc
================================================
[MESSAGES CONTROL]
disable=
  format,
  abstract-method,
  cyclic-import,
  duplicate-code,
  global-statement,
  import-outside-toplevel,
  inconsistent-return-statements,
  locally-disabled,
  not-context-manager,
  too-few-public-methods,
  too-many-arguments,
  too-many-branches,
  too-many-instance-attributes,
  too-many-lines,
  too-many-locals,
  too-many-public-methods,
  too-many-return-statements,
  too-many-statements,
  too-many-boolean-expressions,
  unnecessary-pass,
  unused-argument,
  broad-except,
  too-many-nested-blocks,
  invalid-name,
  unused-import,
  fixme,
  useless-super-delegation,
  missing-module-docstring,
  missing-class-docstring,
  missing-function-docstring,
  import-error,
  consider-using-with

[FORMAT]
expected-line-ending-format=LF


================================================
FILE: requirements_dev.txt
================================================
black==22.12.0
flake8==6.0.0
isort==5.11.3
mypy==0.991
pylint==2.15.9
pytest==7.2.0


================================================
FILE: requirements_http_api.txt
================================================
quart
Quart-CORS
hypercorn


================================================
FILE: rhasspy3/VERSION
================================================
0.0.1


================================================
FILE: rhasspy3/__init__.py
================================================


================================================
FILE: rhasspy3/asr.py
================================================
"""Speech to text."""
import asyncio
import logging
import wave
from dataclasses import dataclass
from typing import IO, AsyncIterable, Optional, Union

from .audio import AudioChunk, AudioStart, AudioStop, wav_to_chunks
from .config import PipelineProgramConfig
from .core import Rhasspy
from .event import Event, Eventable, async_read_event, async_write_event
from .program import create_process
from .vad import DOMAIN as VAD_DOMAIN
from .vad import VoiceStarted, VoiceStopped

DOMAIN = "asr"
_TRANSCRIPT_TYPE = "transcript"

_LOGGER = logging.getLogger(__name__)


@dataclass
class Transcript(Eventable):
    text: str

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _TRANSCRIPT_TYPE

    def event(self) -> Event:
        return Event(type=_TRANSCRIPT_TYPE, data={"text": self.text})

    @staticmethod
    def from_event(event: Event) -> "Transcript":
        assert event.data is not None
        return Transcript(text=event.data["text"])


async def transcribe(
    rhasspy: Rhasspy,
    program: Union[str, PipelineProgramConfig],
    wav_in: IO[bytes],
    samples_per_chunk: int,
) -> Optional[Transcript]:
    transcript: Optional[Transcript] = None
    wav_file: wave.Wave_read = wave.open(wav_in, "rb")
    with wav_file:
        rate = wav_file.getframerate()
        width = wav_file.getsampwidth()
        channels = wav_file.getnchannels()

        async with (await create_process(rhasspy, DOMAIN, program)) as asr_proc:
            assert asr_proc.stdin is not None
            assert asr_proc.stdout is not None

            timestamp = 0
            await async_write_event(
                AudioStart(rate, width, channels, timestamp=timestamp).event(),
                asr_proc.stdin,
            )

            is_first_chunk = True

            for chunk in wav_to_chunks(wav_file, samples_per_chunk=samples_per_chunk):
                if is_first_chunk:
                    is_first_chunk = False
                    _LOGGER.debug("transcribe: processing audio")

                await async_write_event(chunk.event(), asr_proc.stdin)
                if chunk.timestamp is not None:
                    timestamp = chunk.timestamp
                else:
                    timestamp += chunk.milliseconds

            await async_write_event(
                AudioStop(timestamp=timestamp).event(), asr_proc.stdin
            )

            _LOGGER.debug("transcribe: audio finished")

            while True:
                event = await async_read_event(asr_proc.stdout)
                if event is None:
                    break

                if Transcript.is_type(event.type):
                    transcript = Transcript.from_event(event)
                    _LOGGER.debug("transcribe: %s", transcript)
                    break

    return transcript


async def transcribe_stream(
    rhasspy: Rhasspy,
    asr_program: Union[str, PipelineProgramConfig],
    vad_program: Union[str, PipelineProgramConfig],
    audio_stream: AsyncIterable[bytes],
    rate: int,
    width: int,
    channels: int,
) -> Optional[Transcript]:
    transcript: Optional[Transcript] = None
    async with (await create_process(rhasspy, DOMAIN, asr_program)) as asr_proc, (
        await create_process(rhasspy, VAD_DOMAIN, vad_program)
    ) as vad_proc:
        assert asr_proc.stdin is not None
        assert asr_proc.stdout is not None
        assert vad_proc.stdin is not None
        assert vad_proc.stdout is not None

        timestamp = 0
        audio_start_event = AudioStart(
            rate, width, channels, timestamp=timestamp
        ).event()
        await asyncio.gather(
            async_write_event(
                audio_start_event,
                asr_proc.stdin,
            ),
            async_write_event(
                audio_start_event,
                vad_proc.stdin,
            ),
        )

        async def next_chunk():
            """Get the next chunk from audio stream."""
            async for chunk_bytes in audio_stream:
                return chunk_bytes

        is_first_chunk = True
        audio_task = asyncio.create_task(next_chunk())
        vad_task = asyncio.create_task(async_read_event(vad_proc.stdout))
        pending = {audio_task, vad_task}

        while True:
            done, pending = await asyncio.wait(
                pending, return_when=asyncio.FIRST_COMPLETED
            )

            if vad_task in done:
                vad_event = vad_task.result()
                if vad_event is None:
                    break

                if VoiceStarted.is_type(vad_event.type):
                    _LOGGER.debug("transcribe: voice started")
                elif VoiceStopped.is_type(vad_event.type):
                    _LOGGER.debug("transcribe: voice stopped")
                    break

                vad_task = asyncio.create_task(async_read_event(vad_proc.stdout))
                pending.add(vad_task)

            if audio_task in done:
                chunk_bytes = audio_task.result()
                if not chunk_bytes:
                    # End of audio stream
                    break

                if is_first_chunk:
                    _LOGGER.debug("transcribe: processing audio")
                    is_first_chunk = False

                chunk = AudioChunk(rate, width, channels, chunk_bytes)
                chunk_event = chunk.event()
                await asyncio.gather(
                    async_write_event(chunk_event, asr_proc.stdin),
                    async_write_event(chunk_event, vad_proc.stdin),
                )
                timestamp += chunk.milliseconds

                audio_task = asyncio.create_task(next_chunk())
                pending.add(audio_task)

        await async_write_event(AudioStop(timestamp=timestamp).event(), asr_proc.stdin)
        _LOGGER.debug("transcribe: audio finished")

        while True:
            event = await async_read_event(asr_proc.stdout)
            if event is None:
                break

            if Transcript.is_type(event.type):
                transcript = Transcript.from_event(event)
                _LOGGER.debug("transcribe: %s", transcript)
                break

    return transcript


================================================
FILE: rhasspy3/audio.py
================================================
"""Audio input/output."""
import audioop
import wave
from dataclasses import dataclass
from typing import Iterable, Optional

from .event import Event, Eventable

_TYPE = "audio-chunk"
_START_TYPE = "audio-start"
_STOP_TYPE = "audio-stop"

DEFAULT_IN_RATE = 16000  # Hz
DEFAULT_OUT_RATE = 22050  # Hz

DEFAULT_IN_WIDTH = 2  # bytes
DEFAULT_OUT_WIDTH = 2  # bytes

DEFAULT_IN_CHANNELS = 1  # mono
DEFAULT_OUT_CHANNELS = 1  # mono

DEFAULT_SAMPLES_PER_CHUNK = 1024


@dataclass
class AudioChunk(Eventable):
    """Chunk of raw PCM audio."""

    rate: int
    """Hertz"""

    width: int
    """Bytes"""

    channels: int
    """Mono = 1"""

    audio: bytes
    """Raw audio"""

    timestamp: Optional[int] = None
    """Milliseconds"""

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _TYPE

    def event(self) -> Event:
        return Event(
            type=_TYPE,
            data={
                "rate": self.rate,
                "width": self.width,
                "channels": self.channels,
                "timestamp": self.timestamp,
            },
            payload=self.audio,
        )

    @staticmethod
    def from_event(event: Event) -> "AudioChunk":
        assert event.data is not None
        assert event.payload is not None

        return AudioChunk(
            rate=event.data["rate"],
            width=event.data["width"],
            channels=event.data["channels"],
            audio=event.payload,
            timestamp=event.data.get("timestamp"),
        )

    @property
    def samples(self) -> int:
        return len(self.audio) // (self.width * self.channels)

    @property
    def seconds(self) -> float:
        return self.samples / self.rate

    @property
    def milliseconds(self) -> int:
        return int(self.seconds * 1_000)


@dataclass
class AudioStart(Eventable):
    """Audio stream has started."""

    rate: int
    """Hertz"""

    width: int
    """Bytes"""

    channels: int
    """Mono = 1"""

    timestamp: Optional[int] = None
    """Milliseconds"""

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _START_TYPE

    def event(self) -> Event:

        return Event(
            type=_START_TYPE,
            data={
                "rate": self.rate,
                "width": self.width,
                "channels": self.channels,
                "timestamp": self.timestamp,
            },
        )

    @staticmethod
    def from_event(event: Event) -> "AudioStart":
        assert event.data is not None
        return AudioStart(
            rate=event.data["rate"],
            width=event.data["width"],
            channels=event.data["channels"],
            timestamp=event.data.get("timestamp"),
        )


@dataclass
class AudioStop(Eventable):
    """Audio stream has stopped."""

    timestamp: Optional[int] = None
    """Milliseconds"""

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _STOP_TYPE

    def event(self) -> Event:
        return Event(
            type=_STOP_TYPE,
            data={"timestamp": self.timestamp},
        )

    @staticmethod
    def from_event(event: Event) -> "AudioStop":
        return AudioStop(timestamp=event.data.get("timestamp"))


@dataclass
class AudioChunkConverter:
    """Converts audio chunks using audioop."""

    rate: Optional[int] = None
    width: Optional[int] = None
    channels: Optional[int] = None
    _ratecv_state = None

    def convert(self, chunk: AudioChunk) -> AudioChunk:
        """Converts sample rate, width, and channels as necessary."""
        if (
            ((self.rate is None) or (chunk.rate == self.rate))
            and ((self.width is None) or (chunk.width == self.width))
            and ((self.channels is None) or (chunk.channels == self.channels))
        ):
            return chunk

        audio_bytes = chunk.audio
        width = chunk.width

        if (self.width is not None) and (chunk.width != self.width):
            # Convert sample width
            audio_bytes = audioop.lin2lin(audio_bytes, chunk.width, self.width)
            width = self.width

        channels = chunk.channels
        if (self.channels is not None) and (chunk.channels != self.channels):
            # Convert to mono or stereo
            if self.channels == 1:
                audio_bytes = audioop.tomono(audio_bytes, width, 1.0, 1.0)
            elif self.channels == 2:
                audio_bytes = audioop.tostereo(audio_bytes, width, 1.0, 1.0)
            else:
                raise ValueError(f"Cannot convert to channels: {self.channels}")

            channels = self.channels

        rate = chunk.rate
        if (self.rate is not None) and (chunk.rate != self.rate):
            # Resample
            audio_bytes, self._ratecv_state = audioop.ratecv(
                audio_bytes,
                width,
                channels,
                chunk.rate,
                self.rate,
                self._ratecv_state,
            )
            rate = self.rate

        return AudioChunk(rate, width, channels, audio_bytes, timestamp=chunk.timestamp)


def wav_to_chunks(
    wav_file: wave.Wave_read, samples_per_chunk: int, timestamp: int = 0
) -> Iterable[AudioChunk]:
    """Splits WAV file into AudioChunks."""
    rate = wav_file.getframerate()
    width = wav_file.getsampwidth()
    channels = wav_file.getnchannels()
    audio_bytes = wav_file.readframes(samples_per_chunk)
    while audio_bytes:
        chunk = AudioChunk(
            rate=rate,
            width=width,
            channels=channels,
            audio=audio_bytes,
            timestamp=timestamp,
        )
        yield chunk
        timestamp += chunk.milliseconds
        audio_bytes = wav_file.readframes(samples_per_chunk)


================================================
FILE: rhasspy3/config.py
================================================
import argparse
from dataclasses import dataclass, field
from typing import Any, Dict, Optional

from .util import merge_dict
from .util.dataclasses_json import DataClassJsonMixin
from .util.jaml import safe_load


@dataclass
class CommandConfig(DataClassJsonMixin):
    command: str
    shell: bool = False


@dataclass
class ProgramDownloadConfig(DataClassJsonMixin):
    description: Optional[str] = None
    check_file: Optional[str] = None


@dataclass
class ProgramInstallConfig(CommandConfig):
    check_file: Optional[str] = None
    download: Optional[CommandConfig] = None
    downloads: Optional[Dict[str, ProgramDownloadConfig]] = None


@dataclass
class ProgramConfig(CommandConfig):
    adapter: Optional[str] = None
    template_args: Optional[Dict[str, Any]] = None
    installed: bool = True
    install: Optional[ProgramInstallConfig] = None


@dataclass
class PipelineProgramConfig(DataClassJsonMixin):
    name: str
    template_args: Optional[Dict[str, Any]] = None
    after: Optional[CommandConfig] = None


@dataclass
class PipelineConfig(DataClassJsonMixin):
    inherit: Optional[str] = None
    mic: Optional[PipelineProgramConfig] = None
    wake: Optional[PipelineProgramConfig] = None
    vad: Optional[PipelineProgramConfig] = None
    asr: Optional[PipelineProgramConfig] = None
    intent: Optional[PipelineProgramConfig] = None
    handle: Optional[PipelineProgramConfig] = None
    tts: Optional[PipelineProgramConfig] = None
    snd: Optional[PipelineProgramConfig] = None


@dataclass
class SatelliteConfig(DataClassJsonMixin):
    mic: Optional[PipelineProgramConfig] = None
    wake: Optional[PipelineProgramConfig] = None
    remote: Optional[PipelineProgramConfig] = None
    snd: Optional[PipelineProgramConfig] = None


@dataclass
class ServerConfig(DataClassJsonMixin):
    command: str
    shell: bool = False
    template_args: Optional[Dict[str, Any]] = None


@dataclass
class Config(DataClassJsonMixin):
    programs: Dict[str, Dict[str, ProgramConfig]]
    """domain -> name -> program"""

    pipelines: Dict[str, PipelineConfig] = field(default_factory=dict)
    """name -> pipeline"""

    satellites: Dict[str, SatelliteConfig] = field(default_factory=dict)
    """name -> satellite"""

    servers: Dict[str, Dict[str, ServerConfig]] = field(default_factory=dict)
    """domain -> name -> server"""

    def __post_init__(self):
        # Handle inheritance
        # TODO: Catch loops
        pipeline_queue = list(self.pipelines.values())
        while pipeline_queue:
            child_pipeline = pipeline_queue.pop()
            if child_pipeline.inherit:
                parent_pipeline = self.pipelines[child_pipeline.inherit]
                if parent_pipeline.inherit:
                    # Need to process parent first
                    pipeline_queue.append(child_pipeline)
                    continue

                child_pipeline.mic = child_pipeline.mic or parent_pipeline.mic
                child_pipeline.wake = child_pipeline.wake or parent_pipeline.wake
                child_pipeline.vad = child_pipeline.vad or parent_pipeline.vad
                child_pipeline.asr = child_pipeline.asr or parent_pipeline.asr
                child_pipeline.intent = child_pipeline.intent or parent_pipeline.intent
                child_pipeline.handle = child_pipeline.handle or parent_pipeline.handle
                child_pipeline.tts = child_pipeline.tts or parent_pipeline.tts
                child_pipeline.snd = child_pipeline.snd or parent_pipeline.snd

                # Mark as done
                child_pipeline.inherit = None


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("config", nargs="+", help="Path to YAML configuration file")
    args = parser.parse_args()

    config_dict: Dict[str, Any] = {}
    for config_path in args.config:
        with open(config_path, "r", encoding="utf-8") as config_file:
            merge_dict(config_dict, safe_load(config_file))

    config = Config.from_dict(config_dict)
    print(config)


================================================
FILE: rhasspy3/configuration.yaml
================================================
programs:

  # -----------
  # Audio input
  # -----------
  mic:

    # apt-get install alsa-utils
    arecord:
      command: |
        arecord -q -D "${device}" -r 16000 -c 1 -f S16_LE -t raw -
      adapter: |
        mic_adapter_raw.py --samples-per-chunk 1024 --rate 16000 --width 2 --channels 1
      template_args:
        device: "default"

    # https://people.csail.mit.edu/hubert/pyaudio/docs/
    pyaudio:
      command: |
        script/events

    # https://python-sounddevice.readthedocs.io
    sounddevice:
      command: |
        script/events

    # apt-get install gstreamer1.0-tools gstreamer1.0-plugins-base
    gstreamer_udp:
      command: |
        gst-launch-1.0 -v udpsrc address=${address} port=${port} ! rawaudioparse use-sink-caps=false format=pcm pcm-format=${format} sample-rate=${rate} num-channels=${channels} ! audioconvert ! audioresample ! volume volume=3.0 ! level ! fdsink fd=1 sync=false
      template_args:
        format: s16le
        rate: 16000
        channels: 1
        address: "0.0.0.0"
        port: 5000
      adapter: |
        mic_adapter_raw.py --samples-per-chunk 1024 --rate 16000 --width 2 --channels 1

    udp_raw:
      command: |
        bin/udp_raw.py --host ${host} --port ${port}
      template_args:
        host: 0.0.0.0
        port: 5000

  # -------------------
  # Wake word detection
  # -------------------
  wake:

    # https://github.com/Picovoice/porcupine
    # Models: see script/list_models
    porcupine1:
      command: |
        .venv/bin/python3 bin/porcupine_stream.py --model "${model}"
      template_args:
        model: "porcupine_linux.ppn"

    # https://github.com/Kitt-AI/snowboy
    # Models included in share/
    # Custom wake word: https://github.com/rhasspy/snowboy-seasalt
    snowboy:
      command: |
        .venv/bin/python3 bin/snowboy_raw_text.py --model "${model}"
      adapter: |
        wake_adapter_raw.py
      template_args:
        model: "share/snowboy.umdl"

    # https://github.com/mycroftAI/mycroft-precise
    # Model included in share/
    precise-lite:
      command: |
        .venv/bin/python3 bin/precise.py "${model}"
      adapter: |
        wake_adapter_raw.py
      template_args:
        model: "share/hey_mycroft.tflite"

    # TODO: snowman
    # https://github.com/Thalhammer/snowman/

  # ------------------------
  # Voice activity detection
  # ------------------------
  vad:

    # https://github.com/snakers4/silero-vad
    # Model included in share/
    silero:
      command: |
        script/speech_prob "${model}"
      adapter: |
        vad_adapter_raw.py --rate 16000 --width 2 --channels 1 --samples-per-chunk 512
      template_args:
        model: "share/silero_vad.onnx"

    # https://pypi.org/project/webrtcvad/
    webrtcvad:
      command: |
        script/speech_prob ${sensitivity}
      adapter: |
        vad_adapter_raw.py --rate 16000 --width 2 --channels 1 --samples-per-chunk 480
      template_args:
        sensitivity: 3

    # Uses rms energy threshold.
    # For testing only.
    energy:
      command: |
        bin/energy_speech_prob.py --threshold ${threshold} --width 2 --samples-per-chunk 1024
      adapter: |
        vad_adapter_raw.py --rate 16000 --width 2 --channels 1 --samples-per-chunk 1024
      template_args:
        threshold: 300

  # --------------
  # Speech to text
  # --------------
  asr:

    # https://alphacephei.com/vosk/
    # Models: https://alphacephei.com/vosk/models
    vosk:
      command: |
        script/raw2text "${model}"
      adapter: |
        asr_adapter_raw2text.py --rate 16000 --width 2 --channels 1
      template_args:
        model: "${data_dir}/vosk-model-small-en-us-0.15"

    # Run server: asr vosk
    vosk.client:
      command: |
        client_unix_socket.py var/run/vosk.socket

    # https://stt.readthedocs.io
    # Models: https://coqui.ai/models/
    coqui-stt:
      command: |
        script/raw2text "${model}"
      adapter: |
        asr_adapter_raw2text.py --rate 16000 --width 2 --channels 1
      template_args:
        model: "${data_dir}/english_v1.0.0-large-vocab"

    # Run server: asr coqui-stt
    coqui-stt.client:
      command: |
        client_unix_socket.py var/run/coqui-stt.socket

    # https://github.com/cmusphinx/pocketsphinx
    # Models: https://github.com/synesthesiam/voice2json-profiles
    pocketsphinx:
      command: |
        script/raw2text "${model}"
      adapter: |
        asr_adapter_raw2text.py --rate 16000 --width 2 --channels 1
      template_args:
        model: "${data_dir}/en-us_pocketsphinx-cmu"

    # Run server: asr pocketsphinx
    pocketsphinx.client:
      command: |
        client_unix_socket.py var/run/pocketsphinx.socket

    # https://github.com/openai/whisper
    # Models: tiny.en,tiny,base.en,base,small.en,small,medium.en,medium,large-v1,large-v2,large
    # Languages: af,am,ar,as,az,ba,be,bg,bn,bo,br,bs,ca,cs,cy,da,de,el,en,es,et,
    # eu,fa,fi,fo,fr,gl,gu,ha,haw,he,hi,hr,ht,hu,hy,id,is,it,ja,jw,ka,kk,km,kn,
    # ko,la,lb,ln,lo,lt,lv,mg,mi,mk,ml,mn,mr,ms,mt,my,ne,nl,nn,no,oc,pa,pl,ps,
    # pt,ro,ru,sa,sd,si,sk,sl,sn,so,sq,sr,su,sv,sw,ta,te,tg,th,tk,tl,tr,tt,uk,
    # ur,uz,vi,yi,yo,zh
    whisper:
      command: |
        script/wav2text --language ${language} --model-directory "${data_dir}" "${model}" "{wav_file}"
      adapter: |
        asr_adapter_wav2text.py
      template_args:
        language: "en"
        model: "tiny.en"

    # Run server: asr whisper
    whisper.client:
      command: |
        client_unix_socket.py var/run/whisper.socket

    # https://github.com/ggerganov/whisper.cpp/
    # Models: https://huggingface.co/datasets/ggerganov/whisper.cpp
    whisper-cpp:
      command: |
        script/wav2text "${model}" "{wav_file}"
      adapter: |
        asr_adapter_wav2text.py
      template_args:
        model: "${data_dir}/ggml-tiny.en.bin"

    # Run server: asr whisper-cpp
    whisper-cpp.client:
      command: |
        client_unix_socket.py var/run/whisper-cpp.socket

    # https://github.com/guillaumekln/faster-whisper/
    # Models: https://github.com/rhasspy/models/releases/tag/v1.0
    # (asr_faster-whisper-*)
    faster-whisper:
      command: |
        script/wav2text --language ${language} "${model}" "{wav_file}"
      adapter: |
        asr_adapter_wav2text.py
      template_args:
        model: "${data_dir}/tiny-int8"
        language: "en"

    # Run server: asr faster-whisper
    faster-whisper.client:
      command: |
        client_unix_socket.py var/run/faster-whisper.socket


  # --------------
  # Text to speech
  # --------------
  tts:

    # https://github.com/rhasspy/piper/
    # Models: https://github.com/piper/piper/releases/tag/v0.0.2
    piper:
      command: |
        bin/piper --model "${model}" --output_file -
      adapter: |
        tts_adapter_text2wav.py
      template_args:
        model: "${data_dir}/en-us-blizzard_lessac-medium.onnx"
      install:
        command: |
          script/setup.py --destination '${program_dir}/bin'
        check_file: "${program_dir}/bin/piper"
        download:
          command: |
            script/download.py --destination '${data_dir}' '${model}'
        downloads:
          en-us_lessac:
            description: "U.S. English voice"
            check_file: "${data_dir}/en-us-blizzard_lessac-medium.onnx"


    # Run server: tts piper
    piper.client:
      command: |
        client_unix_socket.py var/run/piper.socket

    # https://github.com/rhasspy/larynx/
    # Models: https://rhasspy.github.io/larynx/
    larynx:
      command: |
        .venv/bin/larynx --voices-dir "${data_dir}" --voice "${voice}"
      adapter: |
        tts_adapter_text2wav.py
      template_args:
        voice: "en-us"

    # Run server: tts larynx
    larynx.client:
      command: |
        bin/larynx_client.py ${url} ${voice}
      template_args:
        url: "http://localhost:5002/process"
        voice: "en-us"
      adapter: |
        tts_adapter_text2wav.py

    # https://github.com/espeak-ng/espeak-ng/
    # apt-get install espeak-ng
    espeak-ng:
      command: |
        espeak-ng -v "${voice}" --stdin -w "{temp_file}"
      adapter: |
        tts_adapter_text2wav.py --temp_file
      template_args:
        voice: "en-us"

    # http://www.festvox.org/flite/
    # Models: https://github.com/rhasspy/models/releases/tag/v1.0
    # (tts_flite-*)
    flite:
      command: |
        flite -voice "${voice}" -o "{temp_file}"
      template_args:
        voice: "cmu_us_slt"
      adapter: |
        tts_adapter_text2wav.py --temp_file

    # http://www.cstr.ed.ac.uk/projects/festival/
    # apt-get install festival festival-<lang>
    festival:
      command: |
        text2wave -o "{temp_file}" -eval "(voice_${voice})"
      template_args:
        voice: "cmu_us_slt_arctic_hts"
      adapter: |
        tts_adapter_text2wav.py --temp_file

    # https://tts.readthedocs.io
    # Models: see script/list_models
    coqui-tts:
      command: |
        .venv/bin/tts --model_name "${model}" --out_path "{temp_file}" --text "{text}"
      adapter: |
        tts_adapter_text2wav.py --temp_file --text
      template_args:
        model: "tts_models/en/ljspeech/vits"
        speaker_id: ""

    coqui-tts.client:
      command: |
        tts_adapter_http.py "${url}" --param speaker_id "${speaker_id}"
      template_args:
        url: "http://localhost:5002/api/tts"
        speaker_id: ""

    # http://mary.dfki.de/
    # Models: https://github.com/synesthesiam/opentts/releases/tag/v2.1
    # (marytts-voices.tar.gz)
    marytts:
      command: |
        bin/marytts.py "${url}" "${voice}"
      template_args:
        url: "http://localhost:59125/process"
        voice: "cmu-slt-hsmm"
      adapter: |
        tts_adapter_text2wav.py

    # https://github.com/mycroftAI/mimic3
    # Models: https://mycroftai.github.io/mimic3-voices/
    mimic3:
      command: |
        .venv/bin/mimic3 --voices-dir "${data_dir}" --voice "${voice}" --stdout
      adapter: |
        tts_adapter_text2wav.py
      template_args:
        voice: "apope"

    # Run server: tts mimic3
    mimic3.client:
      command: |
        client_unix_socket.py var/run/mimic3.socket

  # ------------------
  # Intent recognition
  # ------------------
  intent:

    # Simple regex matching
    regex:
      command: |
        bin/regex.py -i TurnOn "turn on (the )?(?P<name>.+)"

    # TODO: fsticuffs
    # https://github.com/rhasspy/rhasspy-nlu

    # TODO: hassil
    # https://github.com/home-assistant/hassil

    # TODO: rapidfuzz
    # https://github.com/rhasspy/rhasspy-fuzzywuzzy

    # TODO: snips-nlu
    # https://snips-nlu.readthedocs.io

  # ---------------
  # Intent Handling
  # ---------------
  handle:

    # Text only: repeats transcript back
    repeat:
      command: |
        cat
      shell: true
      adapter: |
        handle_adapter_text.py

    # Text only: send to HA Assist
    # https://www.home-assistant.io/docs/assist
    #
    # 1. Change server url
    # 2. Put long-lived access token in etc/token
    home_assistant:
      command: |
        bin/converse.py --language "${language}" "${url}" "${token_file}"
      adapter: |
        handle_adapter_text.py
      template_args:
        url: "http://localhost:8123/api/conversation/process"
        token_file: "${data_dir}/token"
        language: ""

    # Intent only: answer English date/time requests
    date_time:
      command: |
        bin/date_time.py
      adapter: |
        handle_adapter_text.py

    # Intent only: produces canned response to regex intent system
    test:
      command: |
        name="$(jq -r .slots.name)"
        echo "Turned on ${name}."
      shell: true
      adapter: |
        handle_adapter_json.py

  # ------------
  # Audio output
  # ------------
  snd:

    # apt-get install alsa-utils
    aplay:
      command: |
        aplay -q -D "${device}" -r 22050 -f S16_LE -c 1 -t raw
      adapter: |
        snd_adapter_raw.py --rate 22050 --width 2 --channels 1
      template_args:
        device: "default"

    udp_raw:
      command: |
        bin/udp_raw.py --host "${host}" --port ${port}
      template_args:
        host: "127.0.0.1"
        port: 5001

  # -------------------
  # Remote base station
  # -------------------
  remote:

    # Sample tool to communicate with websocket API.
    # Use rhasspy3/bin/satellite_run.py
    websocket:
      command: |
        script/run "${uri}"
      template_args:
        uri: "ws://localhost:13331/pipeline/asr-tts"

# -----------------------------------------------------------------------------

servers:
  asr:
    vosk:
      command: |
        script/server "${model}"
      template_args:
        model: "${data_dir}/vosk-model-small-en-us-0.15"

    coqui-stt:
      command: |
        script/server "${model}"
      template_args:
        model: "${data_dir}/english_v1.0.0-large-vocab"

    pocketsphinx:
      command: |
        script/server "${model}"
      template_args:
        model: "${data_dir}/en-us_pocketsphinx-cmu"

    whisper:
      command: |
        script/server --model-directory "${data_dir}" --language ${language} --device ${device} ${model}
      template_args:
        language: "en"
        model: "tiny.en"
        device: "cpu"  # or cuda

    whisper-cpp:
      command: |
        script/server "${model}"
      template_args:
        model: "${data_dir}/ggml-tiny.en.bin"

    faster-whisper:
      command: |
        script/server --language ${language} --device ${device} "${model}"
      template_args:
        language: "en"
        model: "${data_dir}/tiny-int8"
        device: "cpu"  # or cuda

  tts:
    mimic3:
      command: |
        script/server --voice "${voice}" "${data_dir}"
      template_args:
        voice: "en_US/ljspeech_low"

    piper:
      command: |
        script/server "${model}"
      template_args:
        model: "${data_dir}/en-us-blizzard_lessac-medium.onnx"

    larynx:
      command: |
        script/server --voices-dir "${data_dir}" --host "${host}"
      template_args:
        host: "127.0.0.1"

    coqui-tts:
      command: |
        script/server


# -----------------------------------------------------------------------------

# Example satellites
# satellites:
#   default:
#     mic:
#       name: arecord
#     wake:
#       name: porcupine1
#     remote:
#       name: websocket
#     snd:
#       name: aplay

# -----------------------------------------------------------------------------

# Example pipelines
pipelines:

  # English (default)
  default:
    mic:
      name: arecord
    wake:
      name: porcupine1
    vad:
      name: silero
    asr:
      name: faster-whisper
    # intent:
    #   name: regex
    handle:
      name: repeat
    tts:
      name: piper
    snd:
      name: aplay

  # # German
  # de:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: de
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/de-thorsten-low.onnx"

  # # French
  # fr:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: fr
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/fr-siwis-low.onnx"

  # # Spanish
  # es:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: es
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/es-carlfm-low.onnx"

  # # Italian
  # it:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: it
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/it-riccardo_fasol-low.onnx"

  # # Catalan
  # ca:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: ca
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/ca-upc_ona-low.onnx"

  # # Danish
  # da:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: da
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/da-nst_talesyntese-medium.onnx"

  # # Dutch
  # nl:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: nl
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/nl-nathalie-low.onnx"

  # # Norwegian
  # no:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: no
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/no-talesyntese-medium.onnx"

  # # Ukrainian
  # uk:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: uk
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/uk-lada-low.onnx"

  # # Vietnamese
  # vi:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: vi
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/vi-vivos-low.onnx"

  # # Chinese
  # zh:
  #   inherit: default
  #   asr:
  #     name: faster-whisper
  #     template_args:
  #       language: zh
  #   tts:
  #     name: piper
  #     template_args:
  #       model: "${data_dir}/zh-cn-huayan-low.onnx"


================================================
FILE: rhasspy3/core.py
================================================
import logging
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, Union

from .config import Config
from .util import merge_dict
from .util.jaml import safe_load

_DIR = Path(__file__).parent
_DEFAULT_CONFIG = _DIR / "configuration.yaml"
_LOGGER = logging.getLogger(__name__)


@dataclass
class Rhasspy:
    config: Config
    config_dir: Path
    base_dir: Path
    config_dict: Dict[str, Any]

    @property
    def programs_dir(self) -> Path:
        """Directory where programs are installed."""
        return self.config_dir / "programs"

    @property
    def data_dir(self) -> Path:
        """Directory where models are downloaded."""
        return self.config_dir / "data"

    @staticmethod
    def load(config_dir: Union[str, Path]) -> "Rhasspy":
        """Load and merge configuration.yaml files from rhasspy3 and config dir."""
        config_dir = Path(config_dir)
        config_paths = [
            _DEFAULT_CONFIG,
            config_dir / "configuration.yaml",
        ]
        config_dict: Dict[str, Any] = {}

        for config_path in config_paths:
            if config_path.exists():
                _LOGGER.debug("Loading config from %s", config_path)
                with config_path.open(encoding="utf-8") as config_file:
                    merge_dict(config_dict, safe_load(config_file))
            else:
                _LOGGER.debug("Skipping %s", config_path)

        return Rhasspy(
            config=Config.from_dict(config_dict),
            config_dir=config_dir,
            config_dict=config_dict,
            base_dir=_DIR.parent,
        )


================================================
FILE: rhasspy3/event.py
================================================
import asyncio
import json
import sys
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import IO, Any, Dict, Iterable, Optional

_TYPE = "type"
_DATA = "data"
_PAYLOAD_LENGTH = "payload_length"
_NEWLINE = "\n".encode()


@dataclass
class Event:
    type: str
    data: Dict[str, Any] = field(default_factory=dict)
    payload: Optional[bytes] = None

    def to_dict(self) -> Dict[str, Any]:
        return {_TYPE: self.type, _DATA: self.data}

    @staticmethod
    def from_dict(event_dict: Dict[str, Any]) -> "Event":
        return Event(type=event_dict["type"], data=event_dict.get("data", {}))


class Eventable(ABC):
    @abstractmethod
    def event(self) -> Event:
        pass

    @staticmethod
    @abstractmethod
    def is_type(event_type: str) -> bool:
        pass

    def to_dict(self) -> Dict[str, Any]:
        return self.event().data


async def async_read_event(reader: asyncio.StreamReader) -> Optional[Event]:
    try:
        json_line = await reader.readline()
        if not json_line:
            return None

        event_dict = json.loads(json_line)
        payload_length = event_dict.get(_PAYLOAD_LENGTH)

        payload: Optional[bytes] = None
        if payload_length is not None:
            payload = await reader.readexactly(payload_length)

        return Event(
            type=event_dict[_TYPE], data=event_dict.get(_DATA), payload=payload
        )
    except KeyboardInterrupt:
        pass

    return None


async def async_write_event(event: Event, writer: asyncio.StreamWriter):
    event_dict: Dict[str, Any] = event.to_dict()
    if event.payload:
        event_dict[_PAYLOAD_LENGTH] = len(event.payload)

    json_line = json.dumps(event_dict, ensure_ascii=False)

    try:
        writer.writelines((json_line.encode(), _NEWLINE))

        if event.payload:
            writer.write(event.payload)

        await writer.drain()
    except KeyboardInterrupt:
        pass


async def async_write_events(events: Iterable[Event], writer: asyncio.StreamWriter):
    coros = []
    for event in events:
        event_dict: Dict[str, Any] = event.to_dict()
        if event.payload:
            event_dict[_PAYLOAD_LENGTH] = len(event.payload)

        json_line = json.dumps(event_dict, ensure_ascii=False)
        writer.writelines((json_line.encode(), _NEWLINE))

        if event.payload:
            writer.write(event.payload)

        coros.append(writer.drain())

    try:
        await asyncio.gather(*coros)
    except KeyboardInterrupt:
        pass


def read_event(reader: Optional[IO[bytes]] = None) -> Optional[Event]:
    if reader is None:
        reader = sys.stdin.buffer

    try:
        json_line = reader.readline()

        if not json_line:
            return None

        event_dict = json.loads(json_line)
        payload_length = event_dict.get(_PAYLOAD_LENGTH)

        payload: Optional[bytes] = None
        if payload_length is not None:
            payload = reader.read(payload_length)

        return Event(
            type=event_dict[_TYPE], data=event_dict.get(_DATA), payload=payload
        )
    except KeyboardInterrupt:
        pass

    return None


def write_event(event: Event, writer: Optional[IO[bytes]] = None):
    if writer is None:
        writer = sys.stdout.buffer

    event_dict: Dict[str, Any] = event.to_dict()
    if event.payload:
        event_dict[_PAYLOAD_LENGTH] = len(event.payload)

    json_line = json.dumps(event_dict, ensure_ascii=False)

    try:
        writer.writelines((json_line.encode(), _NEWLINE))

        if event.payload:
            writer.write(event.payload)

        writer.flush()
    except KeyboardInterrupt:
        pass


================================================
FILE: rhasspy3/handle.py
================================================
"""Intent recognition and handling."""
import logging
from dataclasses import dataclass
from typing import Any, Dict, Optional, Union

from .asr import Transcript
from .config import PipelineProgramConfig
from .core import Rhasspy
from .event import Event, Eventable, async_read_event, async_write_event
from .intent import Intent, NotRecognized
from .program import create_process

DOMAIN = "handle"
_HANDLED_TYPE = "handled"
_NOT_HANDLED_TYPE = "not-handled"

_LOGGER = logging.getLogger(__name__)


@dataclass
class Handled(Eventable):
    text: Optional[str] = None

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _HANDLED_TYPE

    def event(self) -> Event:
        data: Dict[str, Any] = {}
        if self.text is not None:
            data["text"] = self.text

        return Event(type=_HANDLED_TYPE, data=data)

    @staticmethod
    def from_event(event: Event) -> "Handled":
        assert event.data is not None
        return Handled(text=event.data.get("text"))


@dataclass
class NotHandled(Eventable):
    text: Optional[str] = None

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _NOT_HANDLED_TYPE

    def event(self) -> Event:
        data: Dict[str, Any] = {}
        if self.text is not None:
            data["text"] = self.text

        return Event(type=_NOT_HANDLED_TYPE, data=data)

    @staticmethod
    def from_event(event: Event) -> "NotHandled":
        assert event.data is not None
        return NotHandled(text=event.data.get("text"))


async def handle(
    rhasspy: Rhasspy,
    program: Union[str, PipelineProgramConfig],
    handle_input: Union[Intent, NotRecognized, Transcript],
) -> Optional[Union[Handled, NotHandled]]:
    handle_result: Optional[Union[Handled, NotHandled]] = None
    async with (await create_process(rhasspy, DOMAIN, program)) as handle_proc:
        assert handle_proc.stdin is not None
        assert handle_proc.stdout is not None

        _LOGGER.debug("handle: input=%s", handle_input)
        await async_write_event(handle_input.event(), handle_proc.stdin)
        while True:
            event = await async_read_event(handle_proc.stdout)
            if event is None:
                break

            if Handled.is_type(event.type):
                handle_result = Handled.from_event(event)
            elif NotHandled.is_type(event.type):
                handle_result = NotHandled.from_event(event)

    _LOGGER.debug("handle: %s", handle_result)

    return handle_result


================================================
FILE: rhasspy3/intent.py
================================================
"""Intent recognition and handling."""
import logging
from dataclasses import asdict, dataclass, field
from typing import Any, Dict, List, Optional, Union

from .config import PipelineProgramConfig
from .core import Rhasspy
from .event import Event, Eventable, async_read_event, async_write_event
from .program import create_process

DOMAIN = "intent"
_RECOGNIZE_TYPE = "recognize"
_INTENT_TYPE = "intent"
_NOT_RECOGNIZED_TYPE = "not-recognized"

_LOGGER = logging.getLogger(__name__)


@dataclass
class Entity:
    name: str
    value: Optional[Any] = None


@dataclass
class Recognize(Eventable):
    text: str

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _RECOGNIZE_TYPE

    def event(self) -> Event:
        data: Dict[str, Any] = {"text": self.text}
        return Event(type=_RECOGNIZE_TYPE, data=data)

    @staticmethod
    def from_event(event: Event) -> "Recognize":
        assert event.data is not None
        return Recognize(text=event.data["text"])


@dataclass
class Intent(Eventable):
    name: str
    entities: List[Entity] = field(default_factory=list)

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _INTENT_TYPE

    def event(self) -> Event:
        data: Dict[str, Any] = {"name": self.name}
        if self.entities:
            data["entities"] = [asdict(entity) for entity in self.entities]

        return Event(type=_INTENT_TYPE, data=data)

    @staticmethod
    def from_dict(data: Dict[str, Any]) -> "Intent":
        entity_dicts = data.get("entities")
        if entity_dicts:
            entities: List[Entity] = [
                Entity(**entity_dict) for entity_dict in entity_dicts
            ]
        else:
            entities = []

        return Intent(name=data["name"], entities=entities)

    @staticmethod
    def from_event(event: Event) -> "Intent":
        assert event.data is not None
        return Intent.from_dict(event.data)

    def to_rhasspy(self) -> Dict[str, Any]:
        return {
            "intent": {
                "name": self.name,
            },
            "entities": [
                {"entity": entity.name, "value": entity.value}
                for entity in self.entities
            ],
            "slots": {entity.name: entity.value for entity in self.entities},
        }


@dataclass
class NotRecognized(Eventable):
    text: Optional[str] = None

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _NOT_RECOGNIZED_TYPE

    def event(self) -> Event:
        data: Dict[str, Any] = {}
        if self.text is not None:
            data["text"] = self.text

        return Event(type=_NOT_RECOGNIZED_TYPE, data=data)

    @staticmethod
    def from_event(event: Event) -> "NotRecognized":
        assert event.data is not None
        return NotRecognized(text=event.data.get("text"))


async def recognize(
    rhasspy: Rhasspy, program: Union[str, PipelineProgramConfig], text: str
) -> Optional[Union[Intent, NotRecognized]]:
    result: Optional[Union[Intent, NotRecognized]] = None
    async with (await create_process(rhasspy, DOMAIN, program)) as intent_proc:
        assert intent_proc.stdin is not None
        assert intent_proc.stdout is not None

        _LOGGER.debug("recognize: text='%s'", text)
        await async_write_event(Recognize(text=text).event(), intent_proc.stdin)
        while True:
            intent_event = await async_read_event(intent_proc.stdout)
            if intent_event is None:
                break

            if Intent.is_type(intent_event.type):
                result = Intent.from_event(intent_event)
                break

            if NotRecognized.is_type(intent_event.type):
                result = NotRecognized.from_event(intent_event)
                break

    _LOGGER.debug("recognize: %s", result)

    return result


================================================
FILE: rhasspy3/mic.py
================================================
"""Audio input from a microphone."""
DOMAIN = "mic"


================================================
FILE: rhasspy3/pipeline.py
================================================
"""Full voice loop (pipeline)."""
import io
import logging
from collections import deque
from dataclasses import dataclass, fields
from enum import Enum
from typing import IO, Any, Deque, Dict, Optional, Union

from .asr import DOMAIN as ASR_DOMAIN
from .asr import Transcript, transcribe
from .config import CommandConfig, PipelineConfig, PipelineProgramConfig
from .core import Rhasspy
from .event import Event, Eventable, async_read_event
from .handle import Handled, NotHandled, handle
from .intent import Intent, NotRecognized, recognize
from .mic import DOMAIN as MIC_DOMAIN
from .program import create_process, run_command
from .snd import play
from .tts import synthesize
from .util.dataclasses_json import DataClassJsonMixin
from .vad import segment
from .wake import Detection, detect

_LOGGER = logging.getLogger(__name__)


@dataclass
class PipelineResult(DataClassJsonMixin):
    """Result of running all or part of a pipeline."""

    wake_detection: Optional[Detection] = None
    asr_transcript: Optional[Transcript] = None
    intent_result: Optional[Union[Intent, NotRecognized]] = None
    handle_result: Optional[Union[Handled, NotHandled]] = None

    def to_event_dict(self) -> Dict[str, Any]:
        event_dict: Dict[str, Any] = {}
        for field in fields(self):
            value = getattr(self, field.name)
            if value is None:
                event_dict[field.name] = {}
            else:
                assert isinstance(value, Eventable)
                event_dict[field.name] = value.event().to_dict()

        return event_dict


class StopAfterDomain(str, Enum):
    WAKE = "wake"
    ASR = "asr"
    INTENT = "intent"
    HANDLE = "handle"
    TTS = "tts"


async def run(
    rhasspy: Rhasspy,
    pipeline: Union[str, PipelineConfig],
    samples_per_chunk: int,
    asr_chunks_to_buffer: int = 0,
    mic_program: Optional[Union[str, PipelineProgramConfig]] = None,
    wake_program: Optional[Union[str, PipelineProgramConfig]] = None,
    wake_detection: Optional[Detection] = None,
    asr_program: Optional[Union[str, PipelineProgramConfig]] = None,
    asr_wav_in: Optional[IO[bytes]] = None,
    asr_transcript: Optional[Transcript] = None,
    vad_program: Optional[Union[str, PipelineProgramConfig]] = None,
    intent_result: Optional[Union[Intent, NotRecognized]] = None,
    intent_program: Optional[Union[str, PipelineProgramConfig]] = None,
    handle_result: Optional[Union[Handled, NotHandled]] = None,
    handle_program: Optional[Union[str, PipelineProgramConfig]] = None,
    tts_wav_in: Optional[IO[bytes]] = None,
    tts_program: Optional[Union[str, PipelineProgramConfig]] = None,
    snd_program: Optional[Union[str, PipelineProgramConfig]] = None,
    stop_after: Optional[StopAfterDomain] = None,
) -> PipelineResult:
    """Run a full or partial pipeline."""
    pipeline_result = PipelineResult()

    if isinstance(pipeline, str):
        pipeline = rhasspy.config.pipelines[pipeline]

    mic_program = mic_program or pipeline.mic

    wake_program = wake_program or pipeline.wake
    wake_after = pipeline.wake.after if pipeline.wake else None

    asr_program = asr_program or pipeline.asr
    asr_after = pipeline.asr.after if pipeline.asr else None

    vad_program = vad_program or pipeline.vad
    intent_program = intent_program or pipeline.intent
    handle_program = handle_program or pipeline.handle
    tts_program = tts_program or pipeline.tts
    snd_program = snd_program or pipeline.snd

    skip_asr = (
        (intent_result is not None)
        or (handle_result is not None)
        or (tts_wav_in is not None)
    )

    if not skip_asr:
        # Speech to text
        if asr_wav_in is not None:
            # WAV input
            if stop_after == StopAfterDomain.WAKE:
                return pipeline_result

            asr_wav_in.seek(0)
            assert asr_program is not None, "No asr program"
            asr_transcript = await transcribe(
                rhasspy, asr_program, asr_wav_in, samples_per_chunk
            )

            if asr_after is not None:
                await run_command(rhasspy, asr_after)
        elif asr_transcript is None:
            # Mic input
            assert mic_program is not None, "No asr program"

            if wake_program is None:
                # No wake
                assert asr_program is not None, "No asr program"
                assert vad_program is not None, "No vad program"
                await _mic_asr(
                    rhasspy, mic_program, asr_program, vad_program, pipeline_result
                )
            elif stop_after == StopAfterDomain.WAKE:
                # Audio input, wake word detection, segmentation, speech to text
                assert wake_program is not None, "No vad program"
                await _mic_wake(
                    rhasspy,
                    mic_program,
                    wake_program,
                    pipeline_result,
                    wake_detection=wake_detection,
                )
                return pipeline_result
            else:
                assert wake_program is not None, "No vad program"
                assert asr_program is not None, "No asr program"
                assert vad_program is not None, "No vad program"
                await _mic_wake_asr(
                    rhasspy,
                    mic_program,
                    wake_program,
                    asr_program,
                    vad_program,
                    pipeline_result,
                    asr_chunks_to_buffer=asr_chunks_to_buffer,
                    wake_detection=wake_detection,
                    wake_after=wake_after,
                )

            if asr_after is not None:
                await run_command(rhasspy, asr_after)

            asr_transcript = pipeline_result.asr_transcript
            pipeline_result.asr_transcript = asr_transcript

    if (stop_after == StopAfterDomain.ASR) or (
        (intent_program is None) and (handle_program is None)
    ):
        return pipeline_result

    # Text to intent
    if (asr_transcript is not None) and (intent_program is not None):
        pipeline_result.asr_transcript = asr_transcript
        intent_result = await recognize(
            rhasspy, intent_program, asr_transcript.text or ""
        )
        pipeline_result.intent_result = intent_result

    # Handle intent
    handle_input: Optional[Union[Intent, NotRecognized, Transcript]] = None
    if intent_result is not None:
        pipeline_result.intent_result = intent_result
        handle_input = intent_result
    elif asr_transcript is not None:
        handle_input = asr_transcript

    if (stop_after == StopAfterDomain.INTENT) or (handle_program is None):
        return pipeline_result

    if (handle_input is not None) and (handle_result is None):
        assert handle_program is not None, "Pipeline is missing handle"
        handle_result = await handle(rhasspy, handle_program, handle_input)
        pipeline_result.handle_result = handle_result

    if (stop_after == StopAfterDomain.HANDLE) or (tts_program is None):
        return pipeline_result

    # Text to speech
    if handle_result is not None:
        pipeline_result.handle_result = handle_result
        if handle_result.text:
            assert tts_program is not None, "Pipeline is missing tts"
            tts_wav_in = io.BytesIO()
            await synthesize(rhasspy, tts_program, handle_result.text, tts_wav_in)
        else:
            _LOGGER.debug("No text returned from handle")

    if (stop_after == StopAfterDomain.TTS) or (snd_program is None):
        return pipeline_result

    # Audio output
    if tts_wav_in is not None:
        tts_wav_in.seek(0)
        assert snd_program is not None, "Pipeline is missing snd"
        await play(rhasspy, snd_program, tts_wav_in, samples_per_chunk)

    return pipeline_result


async def _mic_wake(
    rhasspy: Rhasspy,
    mic_program: Union[str, PipelineProgramConfig],
    wake_program: Union[str, PipelineProgramConfig],
    pipeline_result: PipelineResult,
    wake_detection: Optional[Detection] = None,
):
    """Just wake word detection."""
    async with (await create_process(rhasspy, MIC_DOMAIN, mic_program)) as mic_proc:
        assert mic_proc.stdout is not None
        if wake_detection is None:
            wake_detection = await detect(
                rhasspy,
                wake_program,
                mic_proc.stdout,
            )

        if wake_detection is not None:
            pipeline_result.wake_detection = wake_detection
        else:
            _LOGGER.debug("run: no wake word detected")


async def _mic_asr(
    rhasspy: Rhasspy,
    mic_program: Union[str, PipelineProgramConfig],
    asr_program: Union[str, PipelineProgramConfig],
    vad_program: Union[str, PipelineProgramConfig],
    pipeline_result: PipelineResult,
    asr_chunks_to_buffer: int = 0,
):
    """Just asr transcription (+ silence detection)."""
    async with (await create_process(rhasspy, MIC_DOMAIN, mic_program)) as mic_proc, (
        await create_process(rhasspy, ASR_DOMAIN, asr_program)
    ) as asr_proc:
        assert mic_proc.stdout is not None
        assert asr_proc.stdin is not None
        assert asr_proc.stdout is not None

        await segment(
            rhasspy,
            vad_program,
            mic_proc.stdout,
            asr_proc.stdin,
        )
        while True:
            asr_event = await async_read_event(asr_proc.stdout)
            if asr_event is None:
                break

            if Transcript.is_type(asr_event.type):
                pipeline_result.asr_transcript = Transcript.from_event(asr_event)
                break


async def _mic_wake_asr(
    rhasspy: Rhasspy,
    mic_program: Union[str, PipelineProgramConfig],
    wake_program: Union[str, PipelineProgramConfig],
    asr_program: Union[str, PipelineProgramConfig],
    vad_program: Union[str, PipelineProgramConfig],
    pipeline_result: PipelineResult,
    asr_chunks_to_buffer: int = 0,
    wake_detection: Optional[Detection] = None,
    wake_after: Optional[CommandConfig] = None,
):
    """Wake word detect + asr transcription (+ silence detection)."""
    chunk_buffer: Optional[Deque[Event]] = (
        deque(maxlen=asr_chunks_to_buffer) if asr_chunks_to_buffer > 0 else None
    )

    async with (await create_process(rhasspy, MIC_DOMAIN, mic_program)) as mic_proc, (
        await create_process(rhasspy, ASR_DOMAIN, asr_program)
    ) as asr_proc:
        assert mic_proc.stdout is not None
        assert asr_proc.stdin is not None
        assert asr_proc.stdout is not None

        if wake_detection is None:
            wake_detection = await detect(
                rhasspy, wake_program, mic_proc.stdout, chunk_buffer
            )

        if wake_detection is not None:
            if wake_after is not None:
                await run_command(rhasspy, wake_after)

            pipeline_result.wake_detection = wake_detection
            await segment(
                rhasspy,
                vad_program,
                mic_proc.stdout,
                asr_proc.stdin,
                chunk_buffer,
            )
            while True:
                asr_event = await async_read_event(asr_proc.stdout)
                if asr_event is None:
                    break

                if Transcript.is_type(asr_event.type):
                    pipeline_result.asr_transcript = Transcript.from_event(asr_event)
                    break
        else:
            _LOGGER.debug("run: no wake word detected")


================================================
FILE: rhasspy3/program.py
================================================
"""Utilities for creating processes."""
import asyncio
import logging
import os
import shlex
import string
from asyncio.subprocess import PIPE, Process
from typing import Optional, Union

from .config import CommandConfig, PipelineProgramConfig, ProgramConfig
from .core import Rhasspy
from .util import merge_dict

_LOGGER = logging.getLogger(__name__)


class MissingProgramConfigError(Exception):
    pass


class ProcessContextManager:
    """Wrapper for an async process that terminates on exit."""

    def __init__(self, proc: Process, name: str):
        self.proc = proc
        self.name = name

    async def __aenter__(self):
        return self.proc

    async def __aexit__(self, exc_type, exc, tb):
        try:
            if self.proc.returncode is None:
                self.proc.terminate()
                await self.proc.wait()
        except ProcessLookupError:
            # Expected when process has already exited
            pass
        except Exception:
            _LOGGER.exception("Unexpected error stopping process: %s", self.name)


async def create_process(
    rhasspy: Rhasspy, domain: str, name: Union[str, PipelineProgramConfig]
) -> ProcessContextManager:
    pipeline_config: Optional[PipelineProgramConfig] = None
    if isinstance(name, PipelineProgramConfig):
        pipeline_config = name
        name = pipeline_config.name

    assert name, f"No program name for domain {domain}"

    # The "." is special in program names:
    # it means to use the directory of "base" in <base>.<name>.
    #
    # This is used for <base>.client programs, which are just scripts in the
    # "base" directory that communicate with their respective servers.
    if "." in name:
        base_name = name.split(".", maxsplit=1)[0]
    else:
        base_name = name

    program_config: Optional[ProgramConfig] = rhasspy.config.programs.get(
        domain, {}
    ).get(name)
    assert program_config is not None, f"No config for program {domain}/{name}"
    assert isinstance(program_config, ProgramConfig)

    # Directory where this program is installed
    program_dir = rhasspy.programs_dir / domain / base_name

    # Directory where this program should store data
    data_dir = rhasspy.data_dir / domain / base_name

    # ${variables} available within program/pipeline template_args
    default_mapping = {
        "program_dir": str(program_dir.absolute()),
        "data_dir": str(data_dir.absolute()),
    }

    command_str = program_config.command.strip()
    command_mapping = dict(default_mapping)
    if program_config.template_args:
        # Substitute within program template args
        args_mapping = dict(program_config.template_args)
        for arg_name, arg_str in args_mapping.items():
            if not isinstance(arg_str, str):
                continue

            arg_template = string.Template(arg_str)
            args_mapping[arg_name] = arg_template.safe_substitute(default_mapping)

        command_mapping.update(args_mapping)

    if pipeline_config is not None:
        if pipeline_config.template_args:
            # Substitute within pipeline template args
            args_mapping = dict(pipeline_config.template_args)
            for arg_name, arg_str in args_mapping.items():
                if not isinstance(arg_str, str):
                    continue

                arg_template = string.Template(arg_str)
                args_mapping[arg_name] = arg_template.safe_substitute(default_mapping)

            merge_dict(command_mapping, args_mapping)

    # Substitute template args
    command_template = string.Template(command_str)
    command_str = command_template.safe_substitute(command_mapping)

    working_dir = rhasspy.programs_dir / domain / base_name
    env = dict(os.environ)

    # Add rhasspy3/bin to $PATH
    env["PATH"] = f'{rhasspy.base_dir}/bin:${env["PATH"]}'

    # Ensure stdout is flushed for Python programs
    env["PYTHONUNBUFFERED"] = "1"

    cwd = working_dir if working_dir.is_dir() else None

    if program_config.shell:
        if program_config.adapter:
            program, *args = shlex.split(program_config.adapter)
            args.append("--shell")
            args.append(command_str)

            _LOGGER.debug("(shell): %s %s", program, args)
            proc = await asyncio.create_subprocess_exec(
                program,
                *args,
                stdin=PIPE,
                stdout=PIPE,
                cwd=cwd,
                env=env,
            )
        else:
            _LOGGER.debug("(shell): %s %s", program, args)
            proc = await asyncio.create_subprocess_shell(
                command_str,
                stdin=PIPE,
                stdout=PIPE,
                cwd=cwd,
                env=env,
            )
    else:
        if program_config.adapter:
            program, *args = shlex.split(program_config.adapter)
            args.append(command_str)
        else:
            program, *args = shlex.split(command_str)

        _LOGGER.debug("%s %s", program, args)
        proc = await asyncio.create_subprocess_exec(
            program,
            *args,
            stdin=PIPE,
            stdout=PIPE,
            cwd=cwd,
            env=env,
        )

    return ProcessContextManager(proc, name=name)


async def run_command(rhasspy: Rhasspy, command_config: CommandConfig) -> int:
    env = dict(os.environ)

    # Add rhasspy3/bin to $PATH
    env["PATH"] = f'{rhasspy.base_dir}/bin:${env["PATH"]}'

    # Ensure stdout is flushed for Python programs
    env["PYTHONUNBUFFERED"] = "1"

    if command_config.shell:
        proc = await asyncio.create_subprocess_shell(
            command_config.command,
            env=env,
        )
    else:
        program, *args = shlex.split(command_config.command)
        proc = await asyncio.create_subprocess_exec(
            program,
            *args,
            env=env,
        )

    await proc.wait()
    assert proc.returncode is not None

    return proc.returncode


================================================
FILE: rhasspy3/py.typed
================================================


================================================
FILE: rhasspy3/remote.py
================================================
"""Remote communication with a base station."""
DOMAIN = "remote"


================================================
FILE: rhasspy3/snd.py
================================================
"""Audio output to speakers."""
import wave
from dataclasses import dataclass
from typing import IO, AsyncIterable, Optional, Union

from .audio import AudioChunk, AudioStop, wav_to_chunks
from .config import PipelineProgramConfig
from .core import Rhasspy
from .event import Event, Eventable, async_read_event, async_write_event
from .program import create_process

DOMAIN = "snd"
_PLAYED_TYPE = "played"


@dataclass
class Played(Eventable):
    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _PLAYED_TYPE

    def event(self) -> Event:
        return Event(type=_PLAYED_TYPE)

    @staticmethod
    def from_event(event: Event) -> "Played":
        return Played()


async def play(
    rhasspy: Rhasspy,
    program: Union[str, PipelineProgramConfig],
    wav_in: IO[bytes],
    samples_per_chunk: int,
) -> Optional[Played]:
    wav_file: wave.Wave_read = wave.open(wav_in, "rb")
    with wav_file:
        async with (await create_process(rhasspy, DOMAIN, program)) as snd_proc:
            assert snd_proc.stdin is not None
            assert snd_proc.stdout is not None

            timestamp: Optional[int] = None
            for chunk in wav_to_chunks(wav_file, samples_per_chunk=samples_per_chunk):
                await async_write_event(chunk.event(), snd_proc.stdin)
                timestamp = chunk.timestamp

            await async_write_event(
                AudioStop(timestamp=timestamp).event(), snd_proc.stdin
            )

            # Wait for confimation
            while True:
                event = await async_read_event(snd_proc.stdout)
                if event is None:
                    break

                if Played.is_type(event.type):
                    return Played.from_event(event)

    return None


async def play_stream(
    rhasspy: Rhasspy,
    program: Union[str, PipelineProgramConfig],
    audio_stream: AsyncIterable[bytes],
    rate: int,
    width: int,
    channels: int,
) -> Optional[Played]:
    async with (await create_process(rhasspy, DOMAIN, program)) as snd_proc:
        assert snd_proc.stdin is not None
        assert snd_proc.stdout is not None

        async for audio_bytes in audio_stream:
            chunk = AudioChunk(rate, width, channels, audio_bytes)
            await async_write_event(chunk.event(), snd_proc.stdin)

        await async_write_event(AudioStop().event(), snd_proc.stdin)

        # Wait for confimation
        while True:
            event = await async_read_event(snd_proc.stdout)
            if event is None:
                break

            if Played.is_type(event.type):
                return Played.from_event(event)

    return None


================================================
FILE: rhasspy3/tts.py
================================================
"""Text to speech."""
import wave
from dataclasses import dataclass
from typing import IO, AsyncIterable, Union

from .audio import AudioChunk, AudioStart, AudioStop
from .config import PipelineProgramConfig
from .core import Rhasspy
from .event import Event, Eventable, async_read_event, async_write_event
from .program import create_process

DOMAIN = "tts"
_SYNTHESIZE_TYPE = "synthesize"


@dataclass
class Synthesize(Eventable):
    """Request to synthesize audio from text."""

    text: str
    """Text to synthesize."""

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _SYNTHESIZE_TYPE

    def event(self) -> Event:
        return Event(type=_SYNTHESIZE_TYPE, data={"text": self.text})

    @staticmethod
    def from_event(event: Event) -> "Synthesize":
        assert event.data is not None
        return Synthesize(text=event.data["text"])


async def synthesize(
    rhasspy: Rhasspy,
    program: Union[str, PipelineProgramConfig],
    text: str,
    wav_out: IO[bytes],
):
    """Synthesize audio from text to WAV output."""
    async with (await create_process(rhasspy, DOMAIN, program)) as tts_proc:
        assert tts_proc.stdin is not None
        assert tts_proc.stdout is not None

        await async_write_event(Synthesize(text=text).event(), tts_proc.stdin)

        wav_file: wave.Wave_write = wave.open(wav_out, "wb")
        wav_params_set = False
        with wav_file:
            while True:
                event = await async_read_event(tts_proc.stdout)
                if event is None:
                    break

                if AudioStart.is_type(event.type):
                    if not wav_params_set:
                        start = AudioStart.from_event(event)
                        wav_file.setframerate(start.rate)
                        wav_file.setsampwidth(start.width)
                        wav_file.setnchannels(start.channels)
                        wav_params_set = True
                elif AudioChunk.is_type(event.type):
                    chunk = AudioChunk.from_event(event)

                    if not wav_params_set:
                        wav_file.setframerate(chunk.rate)
                        wav_file.setsampwidth(chunk.width)
                        wav_file.setnchannels(chunk.channels)
                        wav_params_set = True

                    wav_file.writeframes(chunk.audio)
                elif AudioStop.is_type(event.type):
                    break


async def synthesize_stream(
    rhasspy: Rhasspy,
    program: Union[str, PipelineProgramConfig],
    text: str,
) -> AsyncIterable[AudioChunk]:
    """Synthesize audio from text to a raw stream."""
    async with (await create_process(rhasspy, DOMAIN, program)) as tts_proc:
        assert tts_proc.stdin is not None
        assert tts_proc.stdout is not None

        await async_write_event(Synthesize(text=text).event(), tts_proc.stdin)

        while True:
            event = await async_read_event(tts_proc.stdout)
            if event is None:
                break

            if AudioChunk.is_type(event.type):
                yield AudioChunk.from_event(event)
            elif AudioStop.is_type(event.type):
                break


================================================
FILE: rhasspy3/util/__init__.py
================================================
import collections


def merge_dict(base_dict, new_dict):
    """Merges new_dict into base_dict."""
    for key, value in new_dict.items():
        if key in base_dict:
            old_value = base_dict[key]
            if isinstance(old_value, collections.abc.MutableMapping):
                # Combine dictionary
                assert isinstance(
                    value, collections.abc.Mapping
                ), f"Not a dict: {value}"
                merge_dict(old_value, value)
            elif isinstance(old_value, collections.abc.MutableSequence):
                # Combine list
                assert isinstance(
                    value, collections.abc.Sequence
                ), f"Not a list: {value}"
                old_value.extend(value)
            else:
                # Overwrite
                base_dict[key] = value
        else:
            base_dict[key] = value


================================================
FILE: rhasspy3/util/dataclasses_json.py
================================================
"""Implement a tiny subset of dataclasses_json for config."""
from collections.abc import Mapping, Sequence
from dataclasses import asdict, fields, is_dataclass
from typing import Any, Dict, Type


class DataClassJsonMixin:
    """Adds from_dict to dataclass."""

    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> Any:
        """Parse dataclasses recursively."""
        kwargs: Dict[str, Any] = {}

        cls_fields = {field.name: field for field in fields(cls)}
        for key, value in data.items():
            field = cls_fields[key]
            if is_dataclass(field.type):
                assert issubclass(field.type, DataClassJsonMixin), field.type
                kwargs[key] = field.type.from_dict(value)
            else:
                kwargs[key] = _decode(value, field.type)

        return cls(**kwargs)

    def to_dict(self) -> Dict[str, Any]:
        """Alias for asdict."""
        return asdict(self)


def _decode(value: Any, target_type: Type) -> Any:
    """Decode value using (possibly generic) type."""
    if is_dataclass(target_type):
        assert issubclass(target_type, DataClassJsonMixin), target_type
        return target_type.from_dict(value) if value is not None else None

    if hasattr(target_type, "__args__"):
        # Optional[T]
        if type(None) in target_type.__args__:
            optional_type = target_type.__args__[0]
            return _decode(value, optional_type)

        # List[T]
        if isinstance(value, Sequence):
            list_type = target_type.__args__[0]
            return [_decode(item, list_type) for item in value]

        # Dict[str, T]
        if isinstance(value, Mapping):
            value_type = target_type.__args__[1]
            return {
                map_key: _decode(map_value, value_type)
                for map_key, map_value in value.items()
            }

    return value


================================================
FILE: rhasspy3/util/jaml.py
================================================
"""JAML is JSON objects as a *severely* restricted subset of YAML."""
from collections.abc import Mapping
from enum import Enum, auto
from typing import IO, Any, Dict, List, Union

_INDENT = 2


def safe_load(fp: IO[str]) -> Dict[str, Any]:
    loader = JamlLoader()
    for line in fp:
        loader.process_line(line)

    return loader.output


class LoaderState(Enum):
    IN_DICT = auto()
    LITERAL = auto()


class JamlLoader:
    def __init__(self) -> None:
        self.output: Dict[str, Any] = {}
        self.indent = 0
        self.state = LoaderState.IN_DICT
        self.literal = ""
        self.target_stack: List[Union[Dict[str, Any], str]] = [self.output]

    def process_line(self, line: str):
        line_stripped = line.strip()
        if line_stripped.startswith("#") or (not line_stripped):
            # Comment or empty line
            return

        line_indent = len(line) - len(line.lstrip())
        if self.state == LoaderState.LITERAL:
            # Multi-line literal
            if line_indent < self.indent:
                # Done with literal
                assert len(self.target_stack) > 1
                key = self.target_stack.pop()
                assert isinstance(key, str)

                target = self.target_stack[-1]
                assert isinstance(target, Mapping)
                target[key] = self.literal.strip()

                # Reset indent and state
                self.indent -= _INDENT
                self.state = LoaderState.IN_DICT
            else:
                # Add to literal
                self.literal += "\n" + line.strip()

        if self.state == LoaderState.IN_DICT:
            self._add_key(line, line_indent)

    def _add_key(self, line, line_indent: int):
        while line_indent < self.indent:
            self.target_stack.pop()
            self.indent -= _INDENT

        assert self.target_stack
        target = self.target_stack[-1]
        assert isinstance(target, Mapping)

        parts = line.split(":", maxsplit=1)
        assert len(parts) == 2
        key = parts[0].strip()
        value = parts[1].strip()

        assert not key.startswith("-"), "Lists are not supported"

        # Remove inline comments
        if value and (value[0] in ("'", '"')):
            # Just keep what's in quotes.
            # This doesn't take escapes, etc. into account.
            end_quote = value.find(value[0], 1)
            value = value[: end_quote + 1]
        else:
            # Remove comment
            value = value.split("#", maxsplit=1)[0]

        value_is_dict = True

        if value:
            value_is_dict = False

            if value[0] in ("'", '"'):
                # Remove quotes
                value = value[1:-1]
            elif value == "|":
                self.literal = ""
                self.target_stack.append(key)
                self.indent += _INDENT
                self.state = LoaderState.LITERAL
                return
            elif value.lower() in ("true", "false"):
                value = value.lower() == "true"
            else:
                try:
                    value = int(value)
                except ValueError:
                    try:
                        value = float(value)
                    except ValueError:
                        pass

        if value_is_dict:
            new_target: Dict[str, Any] = {}
            target[key] = new_target
            self.target_stack.append(new_target)
            self.indent += _INDENT
        else:
            target[key] = value


================================================
FILE: rhasspy3/vad.py
================================================
"""Voice activity detection."""
import asyncio
import logging
import time
from dataclasses import dataclass
from typing import Iterable, Optional, Union

from .audio import AudioChunk, AudioStop
from .config import PipelineProgramConfig
from .core import Rhasspy
from .event import Event, Eventable, async_read_event, async_write_event
from .program import create_process

DOMAIN = "vad"
_STARTED_TYPE = "voice-started"
_STOPPED_TYPE = "voice-stopped"

_LOGGER = logging.getLogger(__name__)


@dataclass
class VoiceStarted(Eventable):
    """User has started speaking."""

    timestamp: Optional[int] = None
    """Milliseconds"""

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _STARTED_TYPE

    def event(self) -> Event:
        return Event(
            type=_STARTED_TYPE,
            data={"timestamp": self.timestamp},
        )

    @staticmethod
    def from_event(event: Event) -> "VoiceStarted":
        return VoiceStarted(timestamp=event.data.get("timestamp"))


@dataclass
class VoiceStopped(Eventable):
    """User has stopped speaking."""

    timestamp: Optional[int] = None
    """Milliseconds"""

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _STOPPED_TYPE

    def event(self) -> Event:
        return Event(
            type=_STOPPED_TYPE,
            data={"timestamp": self.timestamp},
        )

    @staticmethod
    def from_event(event: Event) -> "VoiceStopped":
        return VoiceStopped(timestamp=event.data.get("timestamp"))


@dataclass
class Segmenter:
    """Segments an audio stream by speech."""

    speech_seconds: float
    """Seconds of speech before voice command has started."""

    silence_seconds: float
    """Seconds of silence after voice command has ended."""

    timeout_seconds: float
    """Maximum number of seconds before stopping with timeout=True."""

    reset_seconds: float
    """Seconds before reset start/stop time counters."""

    started: bool = False
    """True if user has started speaking"""

    start_timestamp: Optional[int] = None
    """Timestamp when user started speaking."""

    stopped: bool = False
    """True if user has stopped speaking"""

    stop_timestamp: Optional[int] = None
    """Timestamp when user stopped speaking."""

    timeout: bool = False
    """True if stopping was due to timeout."""

    _in_command: bool = False
    """True if inside voice command."""

    _speech_seconds_left: float = 0.0
    """Seconds left before considering voice command as started."""

    _silence_seconds_left: float = 0.0
    """Seconds left before considering voice command as stopped."""

    _timeout_seconds_left: float = 0.0
    """Seconds left before considering voice command timed out."""

    _reset_seconds_left: float = 0.0
    """Seconds left before resetting start/stop time counters."""

    def __post_init__(self):
        self.reset()

    def reset(self):
        """Resets all counters and state."""
        self._speech_seconds_left = self.speech_seconds
        self._silence_seconds_left = self.silence_seconds
        self._timeout_seconds_left = self.timeout_seconds
        self._reset_seconds_left = self.reset_seconds
        self._in_command = False
        self.start_timestamp = None
        self.stop_timestamp = None

    def process(
        self, chunk: bytes, chunk_seconds: float, is_speech: bool, timestamp: int
    ):
        """Process a single chunk of audio."""
        self._timeout_seconds_left -= chunk_seconds
        if self._timeout_seconds_left <= 0:
            self.stop_timestamp = timestamp
            self.timeout = True
            self.stopped = True
            return

        if not self._in_command:
            if is_speech:
                self._reset_seconds_left = self.reset_seconds

                if self.start_timestamp is None:
                    self.start_timestamp = timestamp

                self._speech_seconds_left -= chunk_seconds
                if self._speech_seconds_left <= 0:
                    # Inside voice command
                    self._in_command = True
                    self.started = True
            else:
                # Reset if enough silence
                self._reset_seconds_left -= chunk_seconds
                if self._reset_seconds_left <= 0:
                    self._speech_seconds_left = self.speech_seconds
                    self.start_timestamp = None
        else:
            if not is_speech:
                self._reset_seconds_left = self.reset_seconds
                self._silence_seconds_left -= chunk_seconds
                if self._silence_seconds_left <= 0:
                    self.stop_timestamp = timestamp
                    self.stopped = True
            else:
                # Reset if enough speech
                self._reset_seconds_left -= chunk_seconds
                if self._reset_seconds_left <= 0:
                    self._silence_seconds_left = self.silence_seconds


async def segment(
    rhasspy: Rhasspy,
    program: Union[str, PipelineProgramConfig],
    mic_in: asyncio.StreamReader,
    asr_out: asyncio.StreamWriter,
    chunk_buffer: Optional[Iterable[Event]] = None,
):
    """Segments an audio input stream, passing audio chunks to asr."""
    async with (await create_process(rhasspy, DOMAIN, program)) as vad_proc:
        assert vad_proc.stdin is not None
        assert vad_proc.stdout is not None

        if chunk_buffer:
            # Buffered chunks from wake word detection
            for buffered_event in chunk_buffer:
                await asyncio.gather(
                    async_write_event(buffered_event, vad_proc.stdin),
                    async_write_event(buffered_event, asr_out),
                )

        mic_task = asyncio.create_task(async_read_event(mic_in))
        vad_task = asyncio.create_task(async_read_event(vad_proc.stdout))
        pending = {mic_task, vad_task}

        timestamp = 0
        in_command = False
        is_first_chunk = True

        while True:
            done, pending = await asyncio.wait(
                pending, return_when=asyncio.FIRST_COMPLETED
            )
            if mic_task in done:
                mic_event = mic_task.result()
                if mic_event is None:
                    break

                # Process chunk
                if AudioChunk.is_type(mic_event.type):
                    if is_first_chunk:
                        is_first_chunk = False
                        _LOGGER.debug("segment: processing audio")

                    chunk = AudioChunk.from_event(mic_event)
                    timestamp = (
                        chunk.timestamp
                        if chunk.timestamp is not None
                        else time.monotonic_ns()
                    )

                    if in_command:
                        # Speech recognition and silence detection
                        await asyncio.gather(
                            async_write_event(mic_event, asr_out),
                            async_write_event(mic_event, vad_proc.stdin),
                        )
                    else:
                        # Voice detection
                        await asyncio.gather(
                            async_write_event(mic_event, asr_out),
                            async_write_event(mic_event, vad_proc.stdin),
                        )

                # Next chunk
                mic_task = asyncio.create_task(async_read_event(mic_in))
                pending.add(mic_task)

            if vad_task in done:
                vad_event = vad_task.result()
                if vad_event is None:
                    break

                if VoiceStarted.is_type(vad_event.type):
                    if not in_command:
                        # Start of voice command
                        in_command = True
                        _LOGGER.debug("segment: speaking started")
                elif VoiceStopped.is_type(vad_event.type):
                    # End of voice command
                    _LOGGER.debug("segment: speaking ended")
                    await async_write_event(
                        AudioStop(timestamp=timestamp).event(), asr_out
                    )
                    break

                # Next VAD event
                vad_task = asyncio.create_task(async_read_event(vad_proc.stdout))
                pending.add(vad_task)


================================================
FILE: rhasspy3/wake.py
================================================
"""Wake word detection"""
import asyncio
import logging
from dataclasses import dataclass
from typing import AsyncIterable, MutableSequence, Optional, Union

from .audio import AudioChunk, AudioStart, AudioStop
from .config import PipelineProgramConfig
from .core import Rhasspy
from .event import Event, Eventable, async_read_event, async_write_event
from .program import create_process

DOMAIN = "wake"
_DETECTION_TYPE = "detection"
_NOT_DETECTED_TYPE = "not-detected"

_LOGGER = logging.getLogger(__name__)


@dataclass
class Detection(Eventable):
    """Wake word was detected."""

    name: Optional[str] = None
    """Name of model."""

    timestamp: Optional[int] = None
    """Timestamp of audio chunk with detection"""

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _DETECTION_TYPE

    def event(self) -> Event:
        return Event(
            type=_DETECTION_TYPE, data={"name": self.name, "timestamp": self.timestamp}
        )

    @staticmethod
    def from_event(event: Event) -> "Detection":
        assert event.data is not None
        return Detection(
            name=event.data.get("name"), timestamp=event.data.get("timestamp")
        )


@dataclass
class NotDetected(Eventable):
    """Audio stream ended before wake word was detected."""

    @staticmethod
    def is_type(event_type: str) -> bool:
        return event_type == _NOT_DETECTED_TYPE

    def event(self) -> Event:
        return Event(type=_NOT_DETECTED_TYPE)

    @staticmethod
    def from_event(event: Event) -> "NotDetected":
        return NotDetected()


async def detect(
    rhasspy: Rhasspy,
    program: Union[str, PipelineProgramConfig],
    mic_in: asyncio.StreamReader,
    chunk_buffer: Optional[MutableSequence[Event]] = None,
) -> Optional[Detection]:
    """Try to detect wake word in an audio stream."""
    detection: Optional[Detection] = None
    async with (await create_process(rhasspy, DOMAIN, program)) as wake_proc:
        assert wake_proc.stdin is not None
        assert wake_proc.stdout is not None

        mic_task = asyncio.create_task(async_read_event(mic_in))
        wake_task = asyncio.create_task(async_read_event(wake_proc.stdout))
        pending = {mic_task, wake_task}
        is_first_chunk = True

        while True:
            done, pending = await asyncio.wait(
                pending, return_when=asyncio.FIRST_COMPLETED
            )
            if mic_task in done:
                mic_event = mic_task.result()
                if mic_event is None:
                    break

                if AudioChunk.is_type(mic_event.type):
                    if is_first_chunk:
                        is_first_chunk = False
                        _LOGGER.debug("detect: processing audio")

                    await async_write_event(mic_event, wake_proc.stdin)
                    if chunk_buffer is not None:
                        # Buffer chunks for asr
                        chunk_buffer.append(mic_event)

                if detection is None:
                    # Next chunk
                    mic_task = asyncio.create_task(async_read_event(mic_in))
                    pending.add(mic_task)

            if detection is not None:
                # Ensure last mic task is finished
                break

            if wake_task in done:
                wake_event = wake_task.result()
                if wake_event is None:
                    break

                if Detection.is_type(wake_event.type):
                    detection = Detection.from_event(wake_event)
                else:
                    # Next wake event
                    wake_task = asyncio.create_task(async_read_event(wake_proc.stdout))
                    pending.add(wake_task)

    _LOGGER.debug("detect: %s", detection)

    return detection


async def detect_stream(
    rhasspy: Rhasspy,
    program: Union[str, PipelineProgramConfig],
    audio_stream: AsyncIterable[bytes],
    rate: int,
    width: int,
    channels: int,
) -> Optional[Detection]:
    """Try to detect the wake word in a raw audio stream."""
    async with (await create_process(rhasspy, DOMAIN, program)) as wake_proc:
        assert wake_proc.stdin is not None
        assert wake_proc.stdout is not None

        timestamp = 0
        await async_write_event(
            AudioStart(rate, width, channels, timestamp=timestamp).event(),
            wake_proc.stdin,
        )

        async def next_chunk():
            """Get the next chunk from audio stream."""
            async for chunk_bytes in audio_stream:
                return chunk_bytes

        audio_task = asyncio.create_task(next_chunk())
        wake_task = asyncio.create_task(async_read_event(wake_proc.stdout))
        pending = {audio_task, wake_task}

        while True:
            done, pending = await asyncio.wait(
                pending, return_when=asyncio.FIRST_COMPLETED
            )
            if audio_task in done:
                chunk_bytes = audio_task.result()
                if chunk_bytes:
                    chunk = AudioChunk(rate, width, channels, chunk_bytes)
                    await async_write_event(chunk.event(), wake_proc.stdin)
                    timestamp += chunk.milliseconds

                    audio_task = asyncio.create_task(next_chunk())
                    pending.add(audio_task)
                else:
                    wake_task.cancel()
                    await async_write_event(AudioStop().event(), wake_proc.stdin)
                    wake_task = asyncio.create_task(async_read_event(wake_proc.stdout))
                    pending = {wake_task}

            if wake_task in done:
                wake_event = wake_task.result()
                if wake_event is None:
                    break

                if Detection.is_type(wake_event.type):
                    detection = Detection.from_event(wake_event)
                    _LOGGER.debug("detect: %s", detection)
                    return detection

                if NotDetected.is_type(wake_event.type):
                    break

                wake_task = asyncio.create_task(async_read_event(wake_proc.stdout))
                pending.add(wake_task)

        _LOGGER.debug("Not detected")

    return None


================================================
FILE: rhasspy3_http_api/__init__.py
================================================


================================================
FILE: rhasspy3_http_api/__main__.py
================================================
import argparse
import asyncio
import logging
import os
import subprocess
import threading
from pathlib import Path
from typing import Tuple
from uuid import uuid4

import hypercorn
import quart_cors
from quart import Quart, Response, jsonify, render_template, send_from_directory

from rhasspy3.audio import DEFAULT_SAMPLES_PER_CHUNK
from rhasspy3.core import Rhasspy

from .asr import add_asr
from .handle import add_handle
from .intent import add_intent
from .pipeline import add_pipeline
from .snd import add_snd
from .tts import add_tts
from .wake import add_wake

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("rhasspy")


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--config",
        default=_DIR.parent / "config",
        help="Configuration directory",
    )
    parser.add_argument(
        "--pipeline", default="default", help="Name of default pipeline to run"
    )
    parser.add_argument(
        "--server",
        nargs=2,
        action="append",
        metavar=("domain", "name"),
        help="Domain/name of server(s) to run",
    )
    parser.add_argument(
        "--host", default="0.0.0.0", help="Host of HTTP server (default: 0.0.0.0)"
    )
    parser.add_argument(
        "--port", type=int, default=13331, help="Port of HTTP server (default: 13331)"
    )
    parser.add_argument(
        "--samples-per-chunk", type=int, default=DEFAULT_SAMPLES_PER_CHUNK
    )
    parser.add_argument("--asr-chunks-to-buffer", type=int, default=0)
    parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
    args = parser.parse_args()
    logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)

    rhasspy = Rhasspy.load(args.config)
    pipeline = rhasspy.config.pipelines[args.pipeline]

    template_dir = _DIR / "templates"
    img_dir = _DIR / "img"
    css_dir = _DIR / "css"
    js_dir = _DIR / "js"

    app = Quart("rhasspy3", template_folder=str(template_dir))
    app.secret_key = str(uuid4())

    # Monkey patch quart_cors to get rid of non-standard requirement that
    # websockets have origin header set.
    def _apply_websocket_cors(*args, **kwargs):
        """Allow null origin."""
        pass

    # pylint: disable=protected-access
    quart_cors._apply_websocket_cors = _apply_websocket_cors
    app = quart_cors.cors(app, allow_origin="*")

    add_wake(app, rhasspy, pipeline, args)
    add_asr(app, rhasspy, pipeline, args)
    add_intent(app, rhasspy, pipeline, args)
    add_handle(app, rhasspy, pipeline, args)
    add_snd(app, rhasspy, pipeline, args)
    add_tts(app, rhasspy, pipeline, args)
    add_pipeline(app, rhasspy, pipeline, args)

    @app.errorhandler(Exception)
    async def handle_error(err) -> Tuple[str, int]:
        """Return error as text."""
        _LOGGER.exception(err)
        return (f"{err.__class__.__name__}: {err}", 500)

    @app.route("/", methods=["GET"])
    async def page_index() -> str:
        """Render main web page."""
        return await render_template("index.html", config=rhasspy.config)

    @app.route("/satellite.html", methods=["GET"])
    async def page_satellite() -> str:
        """Render satellite web page."""
        return await render_template("satellite.html", config=rhasspy.config)

    @app.route("/img/<path:filename>", methods=["GET"])
    async def img(filename) -> Response:
        """Image static endpoint."""
        return await send_from_directory(img_dir, filename)

    @app.route("/css/<path:filename>", methods=["GET"])
    async def css(filename) -> Response:
        """css static endpoint."""
        return await send_from_directory(css_dir, filename)

    @app.route("/js/<path:filename>", methods=["GET"])
    async def js(filename) -> Response:
        """Javascript static endpoint."""
        return await send_from_directory(js_dir, filename)

    @app.route("/config", methods=["GET"])
    async def http_config() -> Response:
        return jsonify(rhasspy.config)

    @app.route("/version", methods=["POST"])
    async def http_version() -> str:
        return "3.0.0"

    hyp_config = hypercorn.config.Config()
    hyp_config.bind = [f"{args.host}:{args.port}"]

    if args.server:
        run_servers(rhasspy, args.server)

    try:
        asyncio.run(hypercorn.asyncio.serve(app, hyp_config))
    except KeyboardInterrupt:
        pass


# -----------------------------------------------------------------------------


def run_servers(rhasspy, servers):
    def run_server(domain: str, name: str):
        try:
            command = [
                "server_run.py",
                "--config",
                str(rhasspy.config_dir),
                domain,
                name,
            ]
            env = dict(os.environ)
            env["PATH"] = f'{rhasspy.base_dir}/bin:{env["PATH"]}'
            _LOGGER.debug(command)
            _LOGGER.info("Starting %s %s", domain, name)
            subprocess.run(command, check=True, cwd=rhasspy.base_dir, env=env)
        except Exception:
            _LOGGER.exception(
                "Unexpected error running server: domain=%s, name=%s", domain, name
            )

    for domain, server_name in servers:
        threading.Thread(
            target=run_server, args=(domain, server_name), daemon=True
        ).start()


# -----------------------------------------------------------------------------

if __name__ == "__main__":
    main()


================================================
FILE: rhasspy3_http_api/asr.py
================================================
import argparse
import io
import json
import logging

from quart import Quart, Response, jsonify, render_template, request, websocket

from rhasspy3.asr import transcribe, transcribe_stream
from rhasspy3.audio import (
    DEFAULT_IN_CHANNELS,
    DEFAULT_IN_RATE,
    DEFAULT_IN_WIDTH,
    AudioStop,
)
from rhasspy3.config import PipelineConfig
from rhasspy3.core import Rhasspy
from rhasspy3.event import Event

_LOGGER = logging.getLogger(__name__)


def add_asr(
    app: Quart, rhasspy: Rhasspy, pipeline: PipelineConfig, args: argparse.Namespace
) -> None:
    @app.route("/asr.html", methods=["GET"])
    async def http_asr() -> str:
        return await render_template("asr.html", config=rhasspy.config)

    @app.route("/asr/transcribe", methods=["POST"])
    async def http_asr_transcribe() -> Response:
        """Transcribe a WAV file."""
        wav_bytes = await request.data
        asr_pipeline = (
            rhasspy.config.pipelines[request.args["pipeline"]]
            if "pipeline" in request.args
            else pipeline
        )

        asr_program = request.args.get("asr_program") or asr_pipeline.asr
        assert asr_program, "Missing program for asr"

        samples_per_chunk = int(
            request.args.get("samples_per_chunk", args.samples_per_chunk)
        )

        _LOGGER.debug("transcribe: asr=%s, wav=%s byte(s)", asr_program, len(wav_bytes))

        with io.BytesIO(wav_bytes) as wav_in:
            transcript = await transcribe(
                rhasspy, asr_program, wav_in, samples_per_chunk
            )

        _LOGGER.debug("transcribe: transcript='%s'", transcript)
        return jsonify(transcript.event().to_dict() if transcript is not None else {})

    @app.websocket("/asr/transcribe")
    async def ws_asr_transcribe():
        """Transcribe a websocket audio stream."""
        asr_pipeline = (
            rhasspy.config.pipelines[websocket.args["pipeline"]]
            if "pipeline" in websocket.args
            else pipeline
        )
        asr_program = websocket.args.get("asr_program") or asr_pipeline.asr
        assert asr_program, "Missing program for asr"

        vad_program = websocket.args.get("vad_program") or asr_pipeline.vad
        assert vad_program, "Missing program for vad"

        rate = int(websocket.args.get("rate", DEFAULT_IN_RATE))
        width = int(websocket.args.get("width", DEFAULT_IN_WIDTH))
        channels = int(websocket.args.get("channels", DEFAULT_IN_CHANNELS))

        _LOGGER.debug(
            "transcribe: asr=%s, vad=%s, rate=%s, width=%s, channels=%s",
            asr_program,
            vad_program,
            rate,
            width,
            channels,
        )

        async def audio_stream():
            while True:
                data = await websocket.receive()
                if not data:
                    # Empty message signals stop
                    break

                if isinstance(data, bytes):
                    # Raw audio
                    yield data
                else:
                    event = Event.from_dict(json.loads(data))
                    if AudioStop.is_type(event.type):
                        # Stop event
                        break

        transcript = await transcribe_stream(
            rhasspy, asr_program, vad_program, audio_stream(), rate, width, channels
        )

        _LOGGER.debug("transcribe: transcript='%s'", transcript)

        await websocket.send_json(
            transcript.event().to_dict() if transcript is not None else {}
        )


================================================
FILE: rhasspy3_http_api/css/main.css
================================================
#header {
    border-bottom: 1px solid black;
}

li {
    padding-bottom: 1rem;
}


================================================
FILE: rhasspy3_http_api/handle.py
================================================
import argparse
import json
import logging
from typing import Optional, Union

from quart import Quart, Response, jsonify, request

from rhasspy3.asr import Transcript
from rhasspy3.config import PipelineConfig
from rhasspy3.core import Rhasspy
from rhasspy3.event import Event
from rhasspy3.handle import handle
from rhasspy3.intent import Intent, NotRecognized

_LOGGER = logging.getLogger(__name__)
_HANDLE_INPUT_TYPES = (Transcript, Intent, NotRecognized)


def add_handle(
    app: Quart, rhasspy: Rhasspy, pipeline: PipelineConfig, args: argparse.Namespace
) -> None:
    @app.route("/handle/handle", methods=["GET", "POST"])
    async def http_handle_handle() -> Response:
        """Handle text or intent JSON."""
        if request.method == "GET":
            data = request.args["input"]
        else:
            data = (await request.data).decode()

        handle_pipeline = (
            rhasspy.config.pipelines[request.args["pipeline"]]
            if "pipeline" in request.args
            else pipeline
        )

        # Input can be plain text or a JSON intent
        handle_input: Optional[Union[Intent, NotRecognized, Transcript]] = None
        if request.content_type == "application/json":
            # Try to parse either an "intent" or "not-recognized" event
            event = Event(json.loads(data))
            for event_class in _HANDLE_INPUT_TYPES:
                assert issubclass(event_class, _HANDLE_INPUT_TYPES)
                if event_class.is_type(event.type):
                    handle_input = event_class.from_event(event)
        else:
            # Assume plain text
            handle_input = Transcript(data)

        assert handle_input is not None, "Invalid input"

        handle_program = request.args.get("handle_program") or handle_pipeline.handle
        assert handle_program is not None, "Missing program for handle"
        _LOGGER.debug("handle: handle=%s, input='%s'", handle_program, handle_input)

        result = await handle(rhasspy, handle_program, handle_input)
        _LOGGER.debug("handle: result=%s", result)

        return jsonify(result.event().to_dict() if result is not None else {})


================================================
FILE: rhasspy3_http_api/intent.py
================================================
import argparse
import logging

from quart import Quart, Response, jsonify, request

from rhasspy3.config import PipelineConfig
from rhasspy3.core import Rhasspy
from rhasspy3.intent import recognize

_LOGGER = logging.getLogger(__name__)


def add_intent(
    app: Quart, rhasspy: Rhasspy, pipeline: PipelineConfig, args: argparse.Namespace
) -> None:
    @app.route("/intent/recognize", methods=["GET", "POST"])
    async def http_intent_recognize() -> Response:
        """Recognize intent from text."""
        if request.method == "GET":
            text = request.args["text"]
        else:
            text = (await request.data).decode()

        intent_pipeline = (
            rhasspy.config.pipelines[request.args["pipeline"]]
            if "pipeline" in request.args
            else pipeline
        )
        intent_program = request.args.get("intent_program") or intent_pipeline.intent
        assert intent_program, "Missing program for intent"
        _LOGGER.debug("recognize: intent=%s, text='%s'", intent_program, text)

        result = await recognize(rhasspy, intent_program, text)
        _LOGGER.debug("recognize: result=%s", result)

        return jsonify(result.event().to_dict() if result is not None else {})


================================================
FILE: rhasspy3_http_api/js/main.js
================================================
function q(selector) {
    return document.querySelector(selector);
}

// https://stackoverflow.com/questions/62093473/how-to-play-raw-audio-files
function buildWaveHeader(opts) {
  const numFrames =      opts.numFrames;
  const numChannels =    opts.numChannels || 2;
  const sampleRate =     opts.sampleRate || 44100;
  const bytesPerSample = opts.bytesPerSample || 2;
  const format =         opts.format

  const blockAlign = numChannels * bytesPerSample;
  const byteRate = sampleRate * blockAlign;
  const dataSize = numFrames * blockAlign;

  const buffer = new ArrayBuffer(44);
  const dv = new DataView(buffer);

  let p = 0;

  function writeString(s) {
    for (let i = 0; i < s.length; i++) {
      dv.setUint8(p + i, s.charCodeAt(i));
    }
    p += s.length;
  }

  function writeUint32(d) {
    dv.setUint32(p, d, true);
    p += 4;
  }

  function writeUint16(d) {
    dv.setUint16(p, d, true);
    p += 2;
  }

  writeString('RIFF');              // ChunkID
  writeUint32(dataSize + 36);       // ChunkSize
  writeString('WAVE');              // Format
  writeString('fmt ');              // Subchunk1ID
  writeUint32(16);                  // Subchunk1Size
  writeUint16(format);              // AudioFormat
  writeUint16(numChannels);         // NumChannels
  writeUint32(sampleRate);          // SampleRate
  writeUint32(byteRate);            // ByteRate
  writeUint16(blockAlign);          // BlockAlign
  writeUint16(bytesPerSample * 8);  // BitsPerSample
  writeString('data');              // Subchunk2ID
  writeUint32(dataSize);            // Subchunk2Size

  return buffer;
}


================================================
FILE: rhasspy3_http_api/js/recorder.worklet.js
================================================
class RecorderProcessor extends AudioWorkletProcessor {
  constructor() {
    super();
  }

  process(inputList, outputList, parameters) {
    if (inputList[0].length < 1) {
      return true;
    }

    const float32Data = inputList[0][0];
    const int16Data = new Int16Array(float32Data.length);

    for (let i = 0; i < float32Data.length; i++) {
      const s = Math.max(-1, Math.min(1, float32Data[i]));
      int16Data[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
    }

    this.port.postMessage(int16Data);

    return true;
  }
};

registerProcessor("recorder.worklet", RecorderProcessor);


================================================
FILE: rhasspy3_http_api/pipeline.py
================================================
import argparse
import asyncio
import io
import logging
from enum import Enum
from typing import IO, Optional, Union

from quart import Quart, Response, jsonify, render_template, request, websocket

from rhasspy3.asr import DOMAIN as ASR_DOMAIN
from rhasspy3.asr import Transcript
from rhasspy3.audio import (
    DEFAULT_IN_CHANNELS,
    DEFAULT_IN_RATE,
    DEFAULT_IN_WIDTH,
    DEFAULT_OUT_CHANNELS,
    DEFAULT_OUT_RATE,
    DEFAULT_OUT_WIDTH,
    AudioChunk,
    AudioChunkConverter,
    AudioStart,
    AudioStop,
)
from rhasspy3.config import PipelineConfig
from rhasspy3.core import Rhasspy
from rhasspy3.event import Event, async_read_event, async_write_event
from rhasspy3.handle import Handled, NotHandled, handle
from rhasspy3.intent import Intent, NotRecognized
from rhasspy3.pipeline import StopAfterDomain
from rhasspy3.pipeline import run as run_pipeline
from rhasspy3.program import create_process
from rhasspy3.tts import synthesize_stream
from rhasspy3.vad import DOMAIN as VAD_DOMAIN
from rhasspy3.vad import VoiceStarted, VoiceStopped
from rhasspy3.wake import Detection

_LOGGER = logging.getLogger(__name__)


class StartAfterDomain(str, Enum):
    WAKE = "wake"
    ASR = "asr"
    INTENT = "intent"
    HANDLE = "handle"
    TTS = "tts"


def add_pipeline(
    app: Quart, rhasspy: Rhasspy, pipeline: PipelineConfig, args: argparse.Namespace
) -> None:
    @app.route("/pipeline.html", methods=["GET"])
    async def http_pipeline() -> str:
        return await render_template("pipeline.html", config=rhasspy.config)

    @app.route("/pipeline/run", methods=["POST"])
    async def http_pipeline_run() -> Response:
        running_pipeline = (
            rhasspy.config.pipelines[request.args["pipeline"]]
            if "pipeline" in request.args
            else pipeline
        )
        mic_program = request.args.get("mic_program") or running_pipeline.mic
        wake_program = request.args.get("wake_program") or running_pipeline.wake
        vad_program = request.args.get("vad_program") or running_pipeline.vad
        asr_program = request.args.get("asr_program") or running_pipeline.asr
        intent_program = request.args.get("intent_program") or running_pipeline.intent
        handle_program = request.args.get("handle_program") or running_pipeline.handle
        tts_program = request.args.get("tts_program") or running_pipeline.tts
        snd_program = request.args.get("snd_program") or running_pipeline.snd
        #
        start_after = request.args.get("start_after")
        stop_after = request.args.get("stop_after")
        #
        samples_per_chunk = int(
            request.args.get("samples_per_chunk", args.samples_per_chunk)
        )
        asr_chunks_to_buffer = int(
            request.args.get("asr_chunks_to_buffer", args.asr_chunks_to_buffer)
        )

        _LOGGER.debug(
            "run: "
            "mic=%s,"
            "wake=%s,"
            "vad=%s,"
            "asr=%s,"
            "intent=%s,"
            "handle=%s,"
            "tts=%s,"
            "snd=%s,"
            "start_after=%s "
            "stop_after=%s",
            mic_program,
            wake_program,
            vad_program,
            asr_program,
            intent_program,
            handle_program,
            tts_program,
            snd_program,
            start_after,
            stop_after,
        )

        wake_detection: Optional[Detection] = None
        asr_wav_in: Optional[IO[bytes]] = None
        asr_transcript: Optional[Transcript] = None
        intent_result: Optional[Union[Intent, NotRecognized]] = None
        handle_result: Optional[Union[Handled, NotHandled]] = None
        tts_wav_in: Optional[IO[bytes]] = None

        if start_after:
            # Determine where to start in the pipeline
            start_after = StartAfterDomain(start_after)
            if start_after == StartAfterDomain.WAKE:
                # Body is detected wake name
                name = (await request.data).decode()
                wake_detection = Detection(name=name)
            elif start_after == StartAfterDomain.ASR:
                # Body is transcript or WAV
                if request.content_type == "audio/wav":
                    wav_bytes = await request.data
                    asr_wav_in = io.BytesIO(wav_bytes)
                else:
                    text = (await request.data).decode()
                    asr_transcript = Transcript(text=text)
            elif start_after == StartAfterDomain.INTENT:
                # Body is JSON
                event = Event.from_dict(await request.json)
                if Intent.is_type(event.type):
                    intent_result = Intent.from_event(event)
                elif NotRecognized.is_type(event.type):
                    intent_result = NotRecognized.from_event(event)
                else:
                    raise ValueError(f"Unexpected event type: {event.type}")
            elif start_after == StartAfterDomain.HANDLE:
                # Body is text or JSON
                if request.content_type == "application/json":
                    event = Event.from_dict(await request.json)
                    if Handled.is_type(event.type):
                        handle_result = Handled.from_event(event)
                    elif NotRecognized.is_type(event.type):
                        handle_result = NotHandled.from_event(event)
                    else:
                        raise ValueError(f"Unexpected event type: {event.type}")
                else:
                    # Plain text
                    text = (await request.data).decode()
                    handle_result = Handled(text=text)
            elif start_after == StartAfterDomain.TTS:
                # Body is or WAV
                wav_bytes = await request.data
                tts_wav_in = io.BytesIO(wav_bytes)

        pipeline_result = await run_pipeline(
            rhasspy,
            pipeline,
            samples_per_chunk,
            asr_chunks_to_buffer=asr_chunks_to_buffer,
            mic_program=mic_program,
            wake_program=wake_program,
            wake_detection=wake_detection,
            asr_program=asr_program,
            asr_transcript=asr_transcript,
            asr_wav_in=asr_wav_in,
            vad_program=vad_program,
            intent_program=intent_program,
            intent_result=intent_result,
            handle_program=handle_program,
            handle_result=handle_result,
            tts_program=tts_program,
            tts_wav_in=tts_wav_in,
            snd_program=snd_program,
            stop_after=StopAfterDomain(stop_after) if stop_after else None,
        )

        return jsonify(pipeline_result.to_event_dict())

    @app.websocket("/pipeline/asr-tts")
    async def ws_api_asr_tts() -> None:
        running_pipeline = (
            rhasspy.config.pipelines[websocket.args["pipeline"]]
            if "pipeline" in websocket.args
            else pipeline
        )

        asr_program = websocket.args.get("asr_program") or running_pipeline.asr
        assert asr_program, "Missing asr program"

        vad_program = websocket.args.get("vad_program") or running_pipeline.vad
        assert vad_program, "Missing vad program"

        handle_program = websocket.args.get("handle_program") or running_pipeline.handle
        assert handle_program, "Missing handle program"

        tts_program = websocket.args.get("tts_program") or running_pipeline.tts
        assert tts_program, "Missing tts program"

        in_rate = int(websocket.args.get("in_rate", DEFAULT_IN_RATE))
        in_width = int(websocket.args.get("in_width", DEFAULT_IN_WIDTH))
        in_channels = int(websocket.args.get("in_channels", DEFAULT_IN_CHANNELS))

        out_rate = int(websocket.args.get("out_rate", DEFAULT_OUT_RATE))
        out_width = int(websocket.args.get("out_width", DEFAULT_OUT_WIDTH))
        out_channels = int(websocket.args.get("out_channels", DEFAULT_OUT_CHANNELS))

        # asr + vad
        async with (
            await create_process(rhasspy, ASR_DOMAIN, asr_program)
        ) as asr_proc, (
            await create_process(rhasspy, VAD_DOMAIN, vad_program)
        ) as vad_proc:
            assert asr_proc.stdin is not None
            assert asr_proc.stdout is not None
            assert vad_proc.stdin is not None
            assert vad_proc.stdout is not None

            mic_task = asyncio.create_task(websocket.receive())
            vad_task = asyncio.create_task(async_read_event(vad_proc.stdout))
            pending = {mic_task, vad_task}

            while True:
                done, pending = await asyncio.wait(
                    pending, return_when=asyncio.FIRST_COMPLETED
                )

                if mic_task in done:
                    audio_bytes = mic_task.result()
                    if isinstance(audio_bytes, bytes) and audio_bytes:
                        mic_chunk = AudioChunk(
                            in_rate, in_width, in_channels, audio_bytes
                        )
                        mic_chunk_event = mic_chunk.event()
                        await asyncio.gather(
                            async_write_event(mic_chunk_event, asr_proc.stdin),
                            async_write_event(mic_chunk_event, vad_proc.stdin),
                        )

                    mic_task = asyncio.create_task(websocket.receive())
                    pending.add(mic_task)

                if vad_task in done:
                    vad_event = vad_task.result()
                    if vad_event is None:
                        break

                    # Forward to websocket
                    await websocket.send_json(vad_event.to_dict())

                    if VoiceStarted.is_type(vad_event.type):
                        _LOGGER.debug("stream-to-stream: voice started")
                    elif VoiceStopped.is_type(vad_event.type):
                        _LOGGER.debug("stream-to-stream: voice stopped")
                        break

                    vad_task = asyncio.create_task(async_read_event(vad_proc.stdout))
                    pending.add(vad_task)

            # Get transcript from asr
            await async_write_event(AudioStop().event(), asr_proc.stdin)
            transcript: Optional[Transcript] = None
            while True:
                asr_event = await async_read_event(asr_proc.stdout)
                if asr_event is None:
                    break

                # Forward to websocket
                await websocket.send_json(asr_event.to_dict())

                if Transcript.is_type(asr_event.type):
                    transcript = Transcript.from_event(asr_event)
                    _LOGGER.debug("stream-to-stream: asr=%s", transcript)
                    break

            handle_result: Optional[Union[Handled, NotHandled]] = None
            if transcript is not None:
                handle_result = await handle(rhasspy, handle_program, transcript)
                _LOGGER.debug("stream-to-stream: handle=%s", handle_result)

            if (handle_result is not None) and handle_result.text:
                # Forward to websocket
                await websocket.send_json(handle_result.event().to_dict())

                _LOGGER.debug("stream-to-stream: sending tts")
                await websocket.send_json(
                    AudioStart(out_rate, out_width, out_channels).event().to_dict()
                )
                converter = AudioChunkConverter(out_rate, out_width, out_channels)
                async for tts_chunk in synthesize_stream(
                    rhasspy, tts_program, handle_result.text
                ):
                    tts_chunk = converter.convert(tts_chunk)
                    await websocket.send(tts_chunk.audio)

                _LOGGER.debug("stream-to-stream: tts done")

            await websocket.send_json(AudioStop().event().to_dict())


================================================
FILE: rhasspy3_http_api/snd.py
================================================
import argparse
import io
import json
import logging

from quart import Quart, Response, jsonify, request, websocket

from rhasspy3.audio import (
    DEFAULT_OUT_CHANNELS,
    DEFAULT_OUT_RATE,
    DEFAULT_OUT_WIDTH,
    AudioStop,
)
from rhasspy3.config import PipelineConfig
from rhasspy3.core import Rhasspy
from rhasspy3.event import Event
from rhasspy3.snd import play, play_stream

_LOGGER = logging.getLogger(__name__)


def add_snd(
    app: Quart, rhasspy: Rhasspy, pipeline: PipelineConfig, args: argparse.Namespace
) -> None:
    @app.route("/snd/play", methods=["POST"])
    async def http_snd_play() -> Response:
        """Play WAV file."""
        wav_bytes = await request.data
        snd_pipeline = (
            rhasspy.config.pipelines[request.args["pipeline"]]
            if "pipeline" in request.args
            else pipeline
        )
        snd_program = request.args.get("snd_program") or snd_pipeline.snd
        assert snd_program, "Missing program for snd"

        samples_per_chunk = int(
            request.args.get("samples_per_chunk", args.samples_per_chunk)
        )

        _LOGGER.debug("play: snd=%s, wav=%s byte(s)", snd_program, len(wav_bytes))

        with io.BytesIO(wav_bytes) as wav_in:
            played = await play(
                rhasspy,
                snd_program,
                wav_in,
                samples_per_chunk,
            )

        return jsonify(played.event().to_dict() if played is not None else {})

    @app.websocket("/snd/play")
    async def ws_snd_play():
        """Play websocket audio stream."""
        snd_pipeline = (
            rhasspy.config.pipelines[request.args["pipeline"]]
            if "pipeline" in request.args
            else pipeline
        )
        snd_program = websocket.args.get("snd_program") or snd_pipeline.snd
        assert snd_program, "Missing program for snd"

        rate = int(websocket.args.get("rate", DEFAULT_OUT_RATE))
        width = int(websocket.args.get("width", DEFAULT_OUT_WIDTH))
        channels = int(websocket.args.get("channels", DEFAULT_OUT_CHANNELS))

        _LOGGER.debug("play: snd=%s", snd_program)

        async def audio_stream():
            while True:
                data = await websocket.receive()
                if not data:
                    # Empty message signals stop
                    break

                if isinstance(data, bytes):
                    # Raw audio
                    yield data
                else:
                    event = Event.from_dict(json.loads(data))
                    if AudioStop.is_type(event.type):
                        # Stop event
                        break

        played = await play_stream(
            rhasspy, snd_program, audio_stream(), rate, width, channels
        )

        await websocket.send_json(
            played.event().to_dict() if played is not None else {}
        )


================================================
FILE: rhasspy3_http_api/templates/asr.html
================================================
{% extends "layout.html" %}

{% block body %}

<h1>Speech to Text (asr)</h1>

<h2>Transcribe</h2>

<ol>
  <li>
    Pipeline:
    <select id="pipeline_name">
      <option value="default">default</option>
      {% set pipelines = config.pipelines | sort %}
      {% for pipeline in pipelines: %}
      {% if pipeline != "default": %}
      <option>{{ pipeline }}</option>
      {% endif %}
      {% endfor %}
    </select>
  </li>
  <li>
    Mic Program:
    <select id="asr_mic_program" onchange="asr_mic_change()">
      <option value="">Browser</option>
      <option value="">WAV file</option>
      <option value="">default</option>
      {% set programs = config.programs["mic"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li id="asr_wav_li" hidden>
    WAV file: <input id="asr_wav" type="file">
  </li>
  <li id="asr_vad_li">
    VAD Program:
    <select id="asr_vad_program">
      <option value="">default</option>
      {% set programs = config.programs["vad"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    ASR Program:
    <select id="asr_program">
      <option value="">default</option>
      {% set programs = config.programs["asr"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    <button onclick="asr_transcribe()">Transcribe</button>
  </li>
  <li>
    Status: <span id="asr_status"></span>
  </li>
  <li>
    Transcript: <span id="asr_transcript"></span>
  </li>
</ol>

<script type="text/javascript">

function asr_mic_change() {
    const micIndex = q("#asr_mic_program").selectedIndex;
    q("#asr_wav_li").hidden = (micIndex != 1);
    q("#asr_vad_li").hidden = (micIndex == 1);
}

function asr_transcribe() {
    const micIndex = q("#asr_mic_program").selectedIndex;
    if (micIndex == 0) {
        asr_transcribe_stream();
        return;
    }

    if (micIndex == 1) {
        asr_transcribe_wav();
        return;
    }

    const pipelineName = q("#pipeline_name").value;
    const micProgram = q("#asr_mic_program").value;
    const asrProgram = q("#asr_program").value;
    const vadProgram = q("#asr_vad_program").value;

    const status = q("#asr_status");
    status.innerText = "Listening";

    const transcript = q("#asr_transcript");
    transcript.innerText = "";

    const startTime = performance.now();
    fetch("{{ url_prefix }}" + "pipeline/run?start_after=wake&stop_after=asr"
          + "&pipeline=" + encodeURIComponent(pipelineName)
          + "&asr_program=" + encodeURIComponent(asrProgram)
          + "&mic_program=" + encodeURIComponent(micProgram)
          + "&vad_program=" + encodeURIComponent(vadProgram),
          {method: "POST"})
        .then(response => {
            const endTime = performance.now();
            status.innerText = "Done in " + (endTime - startTime) / 1000 + " second(s)";
            return response;
        })
        .then(response => response.json())
        .then(response => response.asr_transcript.data.text)
        .then(text => {
            transcript.innerText = text;
        })
        .catch(error => alert(error));
}

function asr_transcribe_wav() {
    const pipelineName = q("#pipeline_name").value;
    const asrProgram = q("#asr_program").value;

    const status = q("#asr_status");
    status.innerText = "Loading";

    const transcript = q("#asr_transcript");
    transcript.innerText = "";

    const files = q("#asr_wav").files;
    if (files.length < 1) {
        alert("No file");
        return;
    }

    const reader = new FileReader();
    reader.onload = function() {
        const wavData = this.result;
        status.innerText = "Transcribing";

        const startTime = performance.now();
        fetch("{{ url_prefix }}" + "asr/transcribe"
              + "&pipeline=" + encodeURIComponent(pipelineName)
              + "&asr_program=" + encodeURIComponent(asrProgram),
              { method: "POST", body: wavData })
            .then(response => {
                const endTime = performance.now();
                status.innerText = "Done in " + (endTime - startTime) / 1000 + " second(s)";
                return response;
            })
            .then(response => response.json())
            .then(response => response.data.text)
            .then(text => {
                transcript.innerText = text;
            })
            .catch(error => alert(error));
    }

    reader.readAsArrayBuffer(files[0]);
}

async function asr_transcribe_stream() {
    const pipelineName = q("#pipeline_name").value;
    const asrProgram = q("#asr_program").value;
    const vadProgram = q("#asr_vad_program").value;

    const status = q("#asr_status");
    status.innerText = "Connecting";

    const transcript = q("#asr_transcript");
    transcript.innerText = "";

    const context = new (window.AudioContext || window.webkitAudioContext)();
    const stream = await navigator.mediaDevices.getUserMedia({audio: true});
    await context.audioWorklet.addModule("/js/recorder.worklet.js");

    const source = context.createMediaStreamSource(stream);
    const recorder = new AudioWorkletNode(context, "recorder.worklet");

    const websocket = new WebSocket("ws://" + location.host + "/asr/transcribe"
                                    + "?rate=" + encodeURIComponent(context.sampleRate)
                                    + "&pipeline=" + encodeURIComponent(pipelineName)
                                    + "&asr_program=" + encodeURIComponent(asrProgram)
                                    + "&vad_program=" + encodeURIComponent(vadProgram));

    websocket.binaryType = "arraybuffer";
    websocket.onopen = function() {
        status.innerText = "Connected";

        source.connect(recorder).connect(context.destination);
        recorder.port.onmessage = function(e) {
            websocket.send(e.data);
        };
    };

    let audioArrayBuffers = [];
    let numAudioBytes = 0;
    websocket.onmessage = function(e) {
        source.disconnect();
        status.innerText = "Done";
        transcript.innerText = JSON.parse(e.data).data.text;
    };

}

</script>


{% endblock %}


================================================
FILE: rhasspy3_http_api/templates/index.html
================================================
{% extends "layout.html" %}

{% block body %}

<ul>
  <li>
    <a href="{{ url_prefix }}/config">Config</a>
  </li>
  <li>
    <a href="{{ url_prefix }}/pipeline.html">Pipeline</a>
  </li>
  <li>
    <a href="{{ url_prefix }}/asr.html">Speech to Text (asr)</a>
  </li>
  <li>
    <a href="{{ url_prefix }}/tts.html">Text to Speech (tts)</a>
  </li>
  <li>
    <a href="{{ url_prefix }}/satellite.html">Satellite</a>
  </li>
</ul>

{% endblock %}


================================================
FILE: rhasspy3_http_api/templates/layout.html
================================================
<!doctype html>
<html lang="en">
  <head>
    <meta charset="utf-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width,initial-scale=1.0">
    <link rel="icon" href="{{ url_prefix }}/img/favicon.png">
    <link rel="stylesheet" href="{{ url_prefix }}/css/main.css">
    <title>Rhasspy</title>
  </head>
  <body>
    <div id="header">
      <a href="{{ url_prefix }}/">
        <img id="banner" title="Banner" src="{{ url_prefix }}/img/banner.png">
      </a>
    </div>

    {% block body %}{% endblock %}
  </body>

  <script src="{{ url_prefix }}/js/main.js"></script>
</html>


================================================
FILE: rhasspy3_http_api/templates/pipeline.html
================================================
{% extends "layout.html" %}

{% block body %}

<h1>Pipeline</h1>

<ol>
  <li>
    Pipeline:
    <select id="pipeline_name">
      <option value="default">default</option>
      {% set pipelines = config.pipelines | sort %}
      {% for pipeline in pipelines: %}
      {% if pipeline != "default": %}
      <option>{{ pipeline }}</option>
      {% endif %}
      {% endfor %}
    </select>
  </li>
  <li>
    Mic Program:
    <select id="pipeline_mic_program">
      <option value="">default</option>
      {% set programs = config.programs["mic"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
  </li>
  <li>
    Wake Program:
    <select id="pipeline_wake_program">
      <option value="">default</option>
      {% set programs = config.programs["wake"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
  </li>
  <li>
    ASR Program:
    <select id="pipeline_asr_program">
      <option value="">default</option>
      {% set programs = config.programs["asr"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    VAD Program:
    <select id="pipeline_vad_program">
      <option value="">default</option>
      {% set programs = config.programs["vad"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    Intent Program:
    <select id="pipeline_intent_program">
      <option value="">default</option>
      {% set programs = config.programs["intent"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    Handle Program:
    <select id="pipeline_handle_program">
      <option value="">default</option>
      {% set programs = config.programs["handle"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    TTS Program:
    <select id="pipeline_tts_program">
      <option value="">default</option>
      {% set programs = config.programs["tts"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    Snd Program:
    <select id="pipeline_snd_program">
      <option value="">default</option>
      {% set programs = config.programs["snd"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    <button onclick="pipeline_run()">Run</button>
    starting after
    <select id="pipeline_start_after">
      <option value="">mic</option>
      <option>wake</option>
      <option>asr</option>
      <option>intent</option>
      <option>handle</option>
      <option>tts</option>
    </select>
    stopping after
    <select id="pipeline_stop_after">
      <option value="">snd</option>
      <option>wake</option>
      <option>asr</option>
      <option>intent</option>
      <option>handle</option>
      <option>tts</option>
    </select>
  </li>
  <li>
    Status: <span id="pipeline_status"></span>
  </li>
  <li>
    Result: <span id="pipeline_result"></span>
  </li>
</ol>

<script type="text/javascript">

function pipeline_run() {
    const pipelineName = q("#pipeline_name").value;

    const micProgram = q("#pipeline_mic_program").value;
    const wakeProgram = q("#pipeline_wake_program").value;
    const asrProgram = q("#pipeline_asr_program").value;
    const vadProgram = q("#pipeline_vad_program").value;
    const intentProgram = q("#pipeline_intent_program").value;
    const handleProgram = q("#pipeline_handle_program").value;
    const ttsProgram = q("#pipeline_tts_program").value;
    const sndProgram = q("#pipeline_snd_program").value;

    const startAfter = q("#pipeline_start_after").value;
    const stopAfter = q("#pipeline_stop_after").value;

    const status = q("#pipeline_status");
    status.innerText = "Listening";

    const result = q("#pipeline_result");
    result.innerText = "";

    const startTime = performance.now();
    fetch("{{ url_prefix }}" + "pipeline/run"
          + "?pipeline=" + encodeURIComponent(pipelineName)
          + "&mic_program=" + encodeURIComponent(micProgram)
          + "&wake_program=" + encodeURIComponent(wakeProgram)
          + "&asr_program=" + encodeURIComponent(asrProgram)
          + "&vad_program=" + encodeURIComponent(vadProgram)
          + "&intent_program=" + encodeURIComponent(intentProgram)
          + "&handle_program=" + encodeURIComponent(handleProgram)
          + "&tts_program=" + encodeURIComponent(ttsProgram)
          + "&snd_program=" + encodeURIComponent(sndProgram)
          + "&start_after=" + encodeURIComponent(startAfter)
          + "&stop_after=" + encodeURIComponent(stopAfter),
          {method: "POST"})
        .then(response => {
            const endTime = performance.now();
            status.innerText = "Done in " + (endTime - startTime) / 1000 + " second(s)";
            return response;
        })
        .then(response => response.json())
        .then(result => {
            pipeline_result.innerText = JSON.stringify(result);
        })
        .catch(error => alert(error));
}

</script>


{% endblock %}


================================================
FILE: rhasspy3_http_api/templates/satellite.html
================================================
{% extends "layout.html" %}

{% block body %}

<h1>Satellite</h1>

<ol>
  <li>
    Pipeline:
    <select id="pipeline_name">
      <option value="default">default</option>
      {% set pipelines = config.pipelines | sort %}
      {% for pipeline in pipelines: %}
      {% if pipeline != "default": %}
      <option>{{ pipeline }}</option>
      {% endif %}
      {% endfor %}
    </select>
  </li>
  <li>
    ASR Program:
    <select id="pipeline_asr_program">
      <option value="">default</option>
      {% set programs = config.programs["asr"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    VAD Program:
    <select id="pipeline_vad_program">
      <option value="">default</option>
      {% set programs = config.programs["vad"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    Handle Program:
    <select id="pipeline_handle_program">
      <option value="">default</option>
      {% set programs = config.programs["handle"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    TTS Program:
    <select id="pipeline_tts_program">
      <option value="">default</option>
      {% set programs = config.programs["tts"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    <button onclick="asr_tts()">Run</button>
  </li>
  <li>
    Status: <span id="pipeline_status"></span>
  </li>
  <li>
    Events:
    <ul id="events">
    </ul>
  </li>
  <li>
    <audio id="tts_audio" preload="none" controls autoplay></audio>
  </li>
</ol>

<script type="text/javascript">

async function asr_tts() {
    const pipelineName = q("#pipeline_name").value;
    const asrProgram = q("#pipeline_asr_program").value;
    const vadProgram = q("#pipeline_vad_program").value;
    const handleProgram = q("#pipeline_handle_program").value;
    const ttsProgram = q("#pipeline_tts_program").value;

    const status = q("#pipeline_status");
    status.innerText = "Connecting";

    const eventsList = q("#events");
    eventsList.innerHTML = "";

    const context = new (window.AudioContext || window.webkitAudioContext)();
    const stream = await navigator.mediaDevices.getUserMedia({audio: true});
    await context.audioWorklet.addModule("/js/recorder.worklet.js");

    const source = context.createMediaStreamSource(stream);
    const recorder = new AudioWorkletNode(context, "recorder.worklet");

    const websocket = new WebSocket("ws://" + location.host + "/pipeline/asr-tts"
                                    + "?in_rate=" + encodeURIComponent(context.sampleRate)
                                    + "&out_rate=" + encodeURIComponent(22050)
                                    + "&pipeline=" + encodeURIComponent(pipelineName)
                                    + "&asr_program=" + encodeURIComponent(asrProgram)
                                    + "&vad_program=" + encodeURIComponent(vadProgram)
                                    + "&handle_program=" + encodeURIComponent(handleProgram)
                                    + "&tts_program=" + encodeURIComponent(ttsProgram));

    websocket.binaryType = "arraybuffer";
    websocket.onopen = function() {
        status.innerText = "Connected";

        source.connect(recorder).connect(context.destination);
        recorder.port.onmessage = function(e) {
            websocket.send(e.data);
        };
    };

    let audioArrayBuffers = [];
    let numAudioBytes = 0;
    websocket.onmessage = function(e) {
        if (typeof e.data === "string") {
            eventsList.innerHTML += "<li>" + e.data + "</li>";
        } else {
            audioArrayBuffers.push(e.data);
            numAudioBytes += e.data.byteLength;
        }
    };

    websocket.onclose = function() {
        // Stop streaming audio
        source.disconnect();
        
        const audioBytes = new Uint8Array(numAudioBytes);

        // Copy to single buffer
        let audioBytesIndex = 0;
        for (let i = 0; i < audioArrayBuffers.length; i++) {
            const buffer = audioArrayBuffers[i];
            audioBytes.set(new Uint8Array(buffer), audioBytesIndex);
            audioBytesIndex += buffer.byteLength;
        }
        
        const wavHeader = new Uint8Array(buildWaveHeader({
            numFrames: audioBytes.byteLength / 2,
            bytesPerSample: 2,
            sampleRate: 22050,
            numChannels: 1,
            format: 1
        }))

        // create WAV file with header and downloaded PCM audio
        const wavBytes = new Uint8Array(wavHeader.length + audioBytes.byteLength)
        wavBytes.set(wavHeader, 0)
        wavBytes.set(audioBytes, wavHeader.length)
        const blob = new Blob([wavBytes], { "type": "audio/wav" });

        q("#tts_audio").src = window.URL.createObjectURL(blob);
        status.innerText = "Done";
    };

}

</script>


{% endblock %}


================================================
FILE: rhasspy3_http_api/templates/tts.html
================================================
{% extends "layout.html" %}

{% block body %}

<h1>Text to Speech (tts)</h1>

<h2>Speak</h2>

<ol>
  <li>
    Text: <input id="tts_text" type="text" onkeypress="tts_keypress(event)">
  </li>
  <li>
    Pipeline:
    <select id="pipeline_name">
      <option value="default">default</option>
      {% set pipelines = config.pipelines | sort %}
      {% for pipeline in pipelines: %}
      {% if pipeline != "default": %}
      <option>{{ pipeline }}</option>
      {% endif %}
      {% endfor %}
    </select>
  </li>
  <li>
    TTS Program:
    <select id="tts_program">
      <option value="">default</option>
      {% set programs = config.programs["tts"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    Snd Program:
    <select id="tts_snd_program">
      <option value="">Browser</option>
      <option value="">default</option>
      {% set programs = config.programs["snd"].items() | sort %}
      {% for name, program in programs: %}
      {% if program.installed: %}
      <option>{{ name }}</option>
      {% endif %}
      {% endfor %}
    </select>
    </select>
  </li>
  <li>
    <button onclick="tts_speak()">Speak</button>
  </li>
  <li>
    Status: <span id="tts_status"></span>
  </li>
  <li>
    <audio id="tts_audio" preload="none" controls autoplay></audio>
  </li>
</ol>

<script type="text/javascript">

function tts_keypress(e) {
    if (e.keyCode == 13) {
        e.preventDefault();
        tts_speak();
    }
}

function tts_speak() {
    if (q("#tts_snd_program").selectedIndex == 0) {
        tts_synthesize();
        return;
    }

    const pipelineName = q("#pipeline_name").value;
    const ttsProgram = q("#tts_program").value;
    const sndProgram = q("#tts_snd_program").value;

    const status = q("#tts_status");
    status.innerText = "Speaking";
    
    const text = q("#tts_text").value;
    
    const startTime = performance.now();
    fetch("{{ url_prefix }}" + "tts/speak"
          + "?pipeline=" + encodeURIComponent(pipelineName)
          + "&tts_program=" + encodeURIComponent(ttsProgram)
          + "&snd_program=" + encodeURIComponent(sndProgram),
          { method: "POST", body: text })
        .then(response => {
            const endTime = performance.now();
            status.innerText = "Done in " + (endTime - startTime) / 1000 + " second(s)";
            return response;
        })
        .catch(error => alert(error));
}

function tts_synthesize() {
    const pipelineName = q("#pipeline_name").value;
    const ttsProgram = q("#tts_program").value;

    const status = q("#tts_status");
    status.innerText = "Synthesizing";
    
    const text = q("#tts_text").value;
    
    const startTime = performance.now();
    fetch("{{ url_prefix }}" + "tts/synthesize"
          + "?pipeline=" + encodeURIComponent(pipelineName)
          + "&tts_program=" + encodeURIComponent(ttsProgram),
          { method: "POST", body: text })
        .then(response => {
            const endTime = performance.now();
            status.innerText = "Done in " + (endTime - startTime) / 1000 + " second(s)";
            return response;
        })
        .then(response => response.blob())
        .then(blob => {
            q('#tts_audio').src = URL.createObjectURL(blob);
        })
        .catch(error => alert(error));
}

</script>


{% endblock %}


================================================
FILE: rhasspy3_http_api/tts.py
================================================
import argparse
import io
import logging

from quart import Quart, Response, jsonify, render_template, request

from rhasspy3.config import PipelineConfig
from rhasspy3.core import Rhasspy
from rhasspy3.snd import play
from rhasspy3.tts import synthesize

_LOGGER = logging.getLogger(__name__)


def add_tts(
    app: Quart, rhasspy: Rhasspy, pipeline: PipelineConfig, args: argparse.Namespace
) -> None:
    @app.route("/tts.html", methods=["GET"])
    async def http_tts() -> str:
        return await render_template("tts.html", config=rhasspy.config)

    @app.route("/tts/synthesize", methods=["GET", "POST"])
    async def http_tts_synthesize() -> Response:
        """Synthesize a WAV file from text."""
        if request.method == "GET":
            text = request.args["text"]
        else:
            text = (await request.data).decode()

        tts_pipeline = (
            rhasspy.config.pipelines[request.args["pipeline"]]
            if "pipeline" in request.args
            else pipeline
        )
        tts_program = request.args.get("tts_program") or tts_pipeline.tts
        assert tts_program, "No tts program"
        _LOGGER.debug("synthesize: tts=%s, text='%s'", tts_program, text)

        with io.BytesIO() as wav_out:
            await synthesize(rhasspy, tts_program, text, wav_out)
            wav_bytes = wav_out.getvalue()
            _LOGGER.debug("synthesize: wav=%s byte(s)", len(wav_bytes))

            return Response(wav_bytes, mimetype="audio/wav")

    @app.route("/tts/speak", methods=["GET", "POST"])
    async def http_tts_speak() -> Response:
        """Synthesize audio from text and play."""
        if request.method == "GET":
            text = request.args["text"]
        else:
            text = (await request.data).decode()

        tts_pipeline = (
            rhasspy.config.pipelines[request.args["pipeline"]]
            if "pipeline" in request.args
            else pipeline
        )
        tts_program = request.args.get("tts_program") or tts_pipeline.tts
        snd_program = request.args.get("snd_program") or tts_pipeline.snd
        samples_per_chunk = int(
            request.args.get("samples_per_chunk", args.samples_per_chunk)
        )

        assert tts_program, "No tts program"
        assert snd_program, "No snd program"
        _LOGGER.debug(
            "synthesize: tts=%s, snd=%s, text='%s'", tts_program, snd_program, text
        )

        with io.BytesIO() as wav_out:
            await synthesize(rhasspy, tts_program, text, wav_out)
            wav_bytes = wav_out.getvalue()
            _LOGGER.debug("synthesize: wav=%s byte(s)", len(wav_bytes))

            wav_out.seek(0)
            played = await play(rhasspy, snd_program, wav_out, samples_per_chunk)

        return jsonify(played.event().to_dict() if played is not None else {})


================================================
FILE: rhasspy3_http_api/wake.py
================================================
import argparse
import io
import json
import logging
import wave

from quart import Quart, Response, jsonify, request, websocket

from rhasspy3.audio import (
    DEFAULT_IN_CHANNELS,
    DEFAULT_IN_RATE,
    DEFAULT_IN_WIDTH,
    AudioStop,
)
from rhasspy3.config import PipelineConfig
from rhasspy3.core import Rhasspy
from rhasspy3.event import Event
from rhasspy3.mic import DOMAIN as MIC_DOMAIN
from rhasspy3.program import create_process
from rhasspy3.wake import detect, detect_stream

_LOGGER = logging.getLogger(__name__)


def add_wake(
    app: Quart, rhasspy: Rhasspy, pipeline: PipelineConfig, args: argparse.Namespace
) -> None:
    @app.route("/wake/detect", methods=["GET", "POST"])
    async def http_wake_detect() -> Response:
        """Detect wake word in WAV file."""
        wav_bytes = await request.data
        wake_pipeline = (
            rhasspy.config.pipelines[request.args["pipeline"]]
            if "pipeline" in request.args
            else pipeline
        )
        wake_program = request.args.get("wake_program") or wake_pipeline.wake
        assert wake_program, "Missing program for wake"

        if wav_bytes:
            # Detect from WAV
            samples_per_chunk = int(
                request.args.get("samples_per_chunk", args.samples_per_chunk)
            )

            _LOGGER.debug(
                "detect: wake=%s, wav=%s byte(s)", wake_program, len(wav_bytes)
            )

            with io.BytesIO(wav_bytes) as wav_io:
                wav_file: wave.Wave_read = wave.open(wav_io, "rb")
                with wav_file:

                    async def audio_stream():
                        chunk = wav_file.readframes(samples_per_chunk)
                        while chunk:
                            yield chunk
                            chunk = wav_file.readframes(samples_per_chunk)

                    detection = await detect_stream(
                        rhasspy,
                        wake_program,
                        audio_stream(),
                        wav_file.getframerate(),
                        wav_file.getsampwidth(),
                        wav_file.getnchannels(),
                    )
        else:
            # Detect from mic
            mic_program = request.args.get("mic_program") or wake_pipeline.mic
            assert mic_program, "Missing program for mic"

            _LOGGER.debug("detect: mic=%s, wake=%s", mic_program, wake_program)

            async with (
                await create_process(rhasspy, MIC_DOMAIN, mic_program)
            ) as mic_proc:
                assert mic_proc.stdout is not None
                detection = await detect(rhasspy, wake_program, mic_proc.stdout)

        _LOGGER.debug("wake: detection=%s", detection)
        return jsonify(detection.event().to_dict() if detection is not None else {})

    @app.websocket("/wake/detect")
    async def ws_wake_detect():
        """Detect wake word in websocket audio stream."""
        wake_pipeline = (
            rhasspy.config.pipelines[request.args["pipeline"]]
            if "pipeline" in request.args
            else pipeline
        )
        wake_program = websocket.args.get("wake_program") or wake_pipeline.wake
        assert wake_program, "Missing program for wake"

        rate = int(websocket.args.get("rate", DEFAULT_IN_RATE))
        width = int(websocket.args.get("width", DEFAULT_IN_WIDTH))
        channels = int(websocket.args.get("channels", DEFAULT_IN_CHANNELS))

        _LOGGER.debug("detect: wake=%s", wake_program)

        async def audio_stream():
            while True:
                data = await websocket.receive()
                if not data:
                    # Empty message signals stop
                    break

                if isinstance(data, bytes):
                    # Raw audio
                    yield data
                else:
                    event = Event.from_dict(json.loads(data))
                    if AudioStop.is_type(event.type):
                        # Stop event
                        break

        detection = await detect_stream(
            rhasspy, wake_program, audio_stream(), rate, width, channels
        )

        _LOGGER.debug("detect: detection='%s'", detection)

        await websocket.send_json(
            detection.event().to_dict() if detection is not None else {}
        )


================================================
FILE: script/format
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    # Activate virtual environment if available
    source "${venv}/bin/activate"
fi

python_files=()
python_files+=("${base_dir}/bin")
python_files+=("${base_dir}/rhasspy3")
python_files+=("${base_dir}/rhasspy3_http_api")
python_files+=("${base_dir}/programs")

# Format code
black "${python_files[@]}"
isort "${python_files[@]}"


================================================
FILE: script/http_server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    # Activate virtual environment if available
    source "${venv}/bin/activate"
fi

export PYTHONPATH="${base_dir}"
export PATH="${base_dir}/bin:${PATH}"

python3 -m rhasspy3_http_api "$@"


================================================
FILE: script/lint
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    # Activate virtual environment if available
    source "${venv}/bin/activate"
fi

python_files=()
python_files+=("${base_dir}/bin")
python_files+=("${base_dir}/rhasspy3")
python_files+=("${base_dir}/rhasspy3_http_api")
python_files+=("${base_dir}/programs")

# Check
black "${python_files[@]}" --check
isort "${python_files[@]}" --check
flake8 "${python_files[@]}"
pylint "${python_files[@]}"
mypy "${base_dir}/bin" "${base_dir}/rhasspy3"


================================================
FILE: script/run
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    # Activate virtual environment if available
    source "${venv}/bin/activate"
fi

export PYTHONPATH="${base_dir}"
export PYTHONUNBUFFERED='1'
export PATH="${base_dir}/bin:${PATH}"

"$@"


================================================
FILE: script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Copy default config
config_dir="${base_dir}/config"

mkdir -p "${config_dir}"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: script/setup_http_server
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

# Create virtual environment
if [ ! -d "${venv}" ]; then
    echo "Creating virtual environment at ${venv} (${python_version})"
    rm -rf "${venv}"
    "${PYTHON}" -m venv "${venv}"
    source "${venv}/bin/activate"

    # Install Python dependencies
    echo 'Installing Python dependencies'
    pip3 install --upgrade pip
    pip3 install --upgrade wheel setuptools
else
    source "${venv}/bin/activate"
fi

if [ -f "${base_dir}/requirements.txt" ]; then
    pip3 install -r "${base_dir}/requirements.txt"
fi

pip3 install -r "${base_dir}/requirements_http_api.txt"

# -----------------------------------------------------------------------------

echo "OK"


================================================
FILE: script/test
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    # Activate virtual environment if available
    source "${venv}/bin/activate"
fi

export PYTHONPATH="${base_dir}:${PYTHONPATH}"
pytest -vv "${base_dir}/tests"


================================================
FILE: setup.cfg
================================================
[flake8]
# To work with Black
max-line-length = 88
# E501: line too long
# W503: Line break occurred before a binary operator
# E203: Whitespace before ':'
# D202 No blank lines allowed after function docstring
# W504 line break after binary operator
ignore =
    E501,
    W503,
    E203,
    D202,
    W504

# F401 import unused
per-file-ignores =
    programs/asr/faster-whisper/src/faster_whisper/__init__.py:F401

[isort]
multi_line_output = 3
include_trailing_comma=True
force_grid_wrap=0
use_parentheses=True
line_length=88
indent = "    "


================================================
FILE: setup.py
================================================
#!/usr/bin/env python3
from pathlib import Path

import setuptools
from setuptools import setup

this_dir = Path(__file__).parent
module_dir = this_dir / "rhasspy3"

# -----------------------------------------------------------------------------

# Load README in as long description
long_description: str = ""
readme_path = this_dir / "README.md"
if readme_path.is_file():
    long_description = readme_path.read_text(encoding="utf-8")

requirements = []
requirements_path = this_dir / "requirements.txt"
if requirements_path.is_file():
    with open(requirements_path, "r", encoding="utf-8") as requirements_file:
        requirements = requirements_file.read().splitlines()

version_path = module_dir / "VERSION"
with open(version_path, "r", encoding="utf-8") as version_file:
    version = version_file.read().strip()

# -----------------------------------------------------------------------------

setup(
    name="rhasspy3",
    version=version,
    description="Rhasspy Voice Assistant Toolkit",
    long_description=long_description,
    url="http://github.com/rhasspy/rhasspy3",
    author="Michael Hansen",
    author_email="mike@rhasspy.org",
    license="MIT",
    packages=setuptools.find_packages(),
    package_data={
        "rhasspy3": ["VERSION", "py.typed"],
    },
    install_requires=requirements,
    classifiers=[
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Developers",
        "Topic :: Text Processing :: Linguistic",
        "License :: OSI Approved :: License :: OSI Approved :: MIT License",
        "Programming Language :: Python :: 3.7",
        "Programming Language :: Python :: 3.8",
        "Programming Language :: Python :: 3.9",
        "Programming Language :: Python :: 3.10",
    ],
    keywords="voice assistant rhasspy",
)


================================================
FILE: tests/test_dataclasses_json.py
================================================
from dataclasses import dataclass
from typing import Dict, List, Optional

from rhasspy3.util.dataclasses_json import DataClassJsonMixin


@dataclass
class Class1(DataClassJsonMixin):
    name: str


@dataclass
class Class2(DataClassJsonMixin):
    name: str
    obj1: Class1
    list1: List[Class1]
    dict1: Dict[str, Class1]
    opt1: Optional[Class1]


_DICT = {
    "name": "2",
    "obj1": {"name": "1"},
    "list1": [{"name": "1-2"}],
    "dict1": {"key": {"name": "1-3"}},
    "opt1": {"name": "1-4"},
}
_OBJ = Class2(
    name="2",
    obj1=Class1(name="1"),
    list1=[Class1(name="1-2")],
    dict1={"key": Class1(name="1-3")},
    opt1=Class1(name="1-4"),
)


def test_to_dict():
    assert _OBJ.to_dict() == _DICT


def test_from_dict():
    assert Class2.from_dict(_DICT) == _OBJ


================================================
FILE: tests/test_jaml.py
================================================
import io

from rhasspy3.util.jaml import safe_load

YAML = """
# Line comment
outer_a:  # Inline comment
  name: outer_a
  prop_int: 1
  prop_float: 1.23
  prop_bool: true
  prop_bool2: false
  prop_str_noquotes: hello: world
  prop_str_1quotes: 'hello: world'
  prop_str_2quotes: "hello: world"
  prop_str_literal: |
    hello:
    world
  inner_a:
    name: inner_a
  empty_string: ""
  string_with_hash: "#test"

outer_b:
  name: inner_b
"""


def test_safe_load():
    with io.StringIO(YAML) as yaml:
        assert safe_load(yaml) == {
            "outer_a": {
                "name": "outer_a",
                "prop_int": 1,
                "prop_float": 1.23,
                "prop_bool": True,
                "prop_bool2": False,
                "prop_str_noquotes": "hello: world",
                "prop_str_1quotes": "hello: world",
                "prop_str_2quotes": "hello: world",
                "prop_str_literal": "hello:\nworld",
                "inner_a": {"name": "inner_a"},
                "empty_string": "",
                "string_with_hash": "#test",
            },
            "outer_b": {"name": "inner_b"},
        }


================================================
FILE: tools/websocket-client/bin/websocket_client.py
================================================
#!/usr/bin/env python3
import argparse
import asyncio
import wave
from urllib.parse import urlencode, urlparse, parse_qsl, urlunparse

from websockets import connect


async def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument("url")
    parser.add_argument("wav_file", nargs="+", help="Path(s) to WAV file(s)")
    parser.add_argument("--samples-per-chunk", type=int, default=1024)
    args = parser.parse_args()

    for wav_path in args.wav_file:
        wav_file: wave.Wave_read = wave.open(wav_path, "rb")
        with wav_file:
            # Add audio parameters if missing
            parse_result = urlparse(args.url)
            query = dict(parse_qsl(parse_result.query))
            query.setdefault("rate", str(wav_file.getframerate()))
            query.setdefault("width", str(wav_file.getsampwidth()))
            query.setdefault("channels", str(wav_file.getnchannels()))

            url = urlunparse(parse_result._replace(query=urlencode(query)))
            async with connect(url) as websocket:
                chunk = wav_file.readframes(args.samples_per_chunk)
                while chunk:
                    await websocket.send(chunk)
                    chunk = wav_file.readframes(args.samples_per_chunk)

                # Signal stop with empty message
                await websocket.send(bytes())
                result = await websocket.recv()
                print(result)


if __name__ == "__main__":
    asyncio.run(main())


================================================
FILE: tools/websocket-client/requirements.txt
================================================
websockets


================================================
FILE: tools/websocket-client/script/run
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

if [ -d "${venv}" ]; then
    source "${venv}/bin/activate"
fi

export PATH="${base_dir}/bin:${PATH}"

python3 "${base_dir}/bin/websocket_client.py" "$@"


================================================
FILE: tools/websocket-client/script/setup
================================================
#!/usr/bin/env bash
set -eo pipefail

# Directory of *this* script
this_dir="$( cd "$( dirname "$0" )" && pwd )"

# Base directory of repo
base_dir="$(realpath "${this_dir}/..")"

# Path to virtual environment
: "${venv:=${base_dir}/.venv}"

# Python binary to use
: "${PYTHON=python3}"

python_version="$(${PYTHON} --version)"

# Create virtual environment
echo "Creating virtual environment at ${venv} (${python_version})"
rm -rf "${venv}"
"${PYTHON}" -m venv "${venv}"
source "${venv}/bin/activate"

# Install Python dependencies
echo 'Installing Python dependencies'
pip3 install --upgrade pip
pip3 install --upgrade wheel setuptools

pip3 install -r "${base_dir}/requirements.txt"

# -----------------------------------------------------------------------------

echo "OK"