Repository: rhasspy/wyoming-faster-whisper
Branch: main
Commit: df3e5bac007b
Files: 32
Total size: 51.6 KB
Directory structure:
gitextract_nf4x59hb/
├── .dockerignore
├── .github/
│ └── workflows/
│ ├── publish.yml
│ └── test.yml
├── .gitignore
├── .isort.cfg
├── .projectile
├── CHANGELOG.md
├── Dockerfile
├── LICENSE.md
├── README.md
├── docker_run.sh
├── mypy.ini
├── pylintrc
├── pyproject.toml
├── script/
│ ├── format
│ ├── lint
│ ├── package
│ ├── run
│ ├── setup
│ └── test
├── setup.cfg
├── tests/
│ ├── __init__.py
│ └── test_faster_whisper.py
└── wyoming_faster_whisper/
├── __init__.py
├── __main__.py
├── const.py
├── dispatch_handler.py
├── faster_whisper_handler.py
├── models.py
├── onnx_asr_handler.py
├── sherpa_handler.py
└── transformers_whisper.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .dockerignore
================================================
*
!wyoming_faster_whisper/*.py
!pyproject.toml
!docker_run.sh
================================================
FILE: .github/workflows/publish.yml
================================================
---
name: Release (DockerHub + GitHub)
on:
workflow_dispatch:
push:
tags:
- "v*.*.*" # e.g. v1.2.3
jobs:
build:
runs-on: ubuntu-latest
outputs:
tag: ${{ steps.read_ver.outputs.tag }}
file_ver: ${{ steps.read_ver.outputs.file_ver }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Install build tools
run: pip install tomli setuptools wheel build
- name: Read version from pyproject.toml & verify tag
id: read_ver
shell: python
run: |
import os, sys
try:
import tomllib as toml # Python 3.11+
except Exception:
import tomli as toml # fallback
# Read pyproject version
with open("pyproject.toml","rb") as f:
ver = toml.load(f)["project"]["version"]
# Extract tag from GITHUB_REF (e.g. "refs/tags/v1.2.3" -> "1.2.3")
ref = os.environ["GITHUB_REF"]
# Safety checks & strip prefix
prefix = "refs/tags/v"
if not ref.startswith(prefix):
print(f"Unexpected GITHUB_REF: {ref}", file=sys.stderr)
sys.exit(1)
tag = ref[len(prefix):]
if tag != ver:
print(f"Tag ({tag}) != pyproject.toml ({ver})", file=sys.stderr)
sys.exit(1)
# Expose outputs
with open(os.environ["GITHUB_OUTPUT"], "a") as fh:
fh.write(f"tag={tag}\n")
fh.write(f"file_ver={ver}\n")
print(f"Version OK: tag={tag} matches pyproject.toml={ver}")
- name: Build wheel
run: |
python -m build --wheel --sdist
- name: Upload wheels and sdist
uses: actions/upload-artifact@v4
with:
name: dist-wyoming-faster-whisper
path: |
dist/*.whl
dist/*.tar.gz
if-no-files-found: error
# Enables building foreign architectures (e.g., arm64 on amd64 runner)
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Build
uses: docker/build-push-action@v5
with:
context: .
file: ./Dockerfile
platforms: linux/amd64,linux/arm64
push: true
tags: |
rhasspy/wyoming-whisper:latest
rhasspy/wyoming-whisper:${{ steps.read_ver.outputs.tag }}
cache-from: type=gha
cache-to: type=gha,mode=max
changelog:
needs: build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Extract release notes (fail if missing)
id: notes
shell: python
env:
TAG: ${{ needs.build.outputs.tag }}
run: |
import os, sys, re, pathlib
ver = os.environ["TAG"] # e.g., 1.0.2
chlog_path = pathlib.Path("CHANGELOG.md")
if not chlog_path.exists():
print("CHANGELOG.md not found", file=sys.stderr)
sys.exit(2)
lines = chlog_path.read_text(encoding="utf-8").splitlines()
def is_heading_for_version(s: str) -> bool:
s = s.strip()
if not s.startswith("##"):
return False
s = s[2:].strip() # drop "##"
s = s.strip("[]") # allow [1.0.2]
s = re.sub(r"\s*-\s*.*$", "", s) # drop " - date"
s = s.lstrip("v") # allow v1.0.2
return s == ver
start_idx = None
for i, line in enumerate(lines):
if is_heading_for_version(line):
start_idx = i + 1 # start AFTER heading
break
if start_idx is None:
print(f"No changelog section found for {ver}", file=sys.stderr)
sys.exit(3)
end_idx = len(lines)
for j in range(start_idx, len(lines)):
if lines[j].lstrip().startswith("## "):
end_idx = j
break
section_lines = lines[start_idx:end_idx]
while section_lines and not section_lines[0].strip():
section_lines.pop(0)
while section_lines and not section_lines[-1].strip():
section_lines.pop()
section = "\n".join(section_lines)
only_links = re.fullmatch(r"(?:\[[^\]]+\]:\s*\S+\s*(?:\n|$))*", section or "", flags=re.MULTILINE)
if not section or only_links:
print(f"Changelog section for {ver} is empty", file=sys.stderr)
sys.exit(4)
pathlib.Path("RELEASE_NOTES.md").write_text(section, encoding="utf-8")
- name: Upload release notes
uses: actions/upload-artifact@v4
with:
name: release-notes
path: RELEASE_NOTES.md
publish:
needs: [build, changelog]
runs-on: ubuntu-latest
environment: pypi
permissions:
id-token: write # required for PyPI Trusted Publishing (OIDC)
contents: read
steps:
- name: Download all dists
uses: actions/download-artifact@v4
with:
pattern: dist-*
merge-multiple: true
path: dist
- name: Publish to PyPI via OIDC
uses: pypa/gh-action-pypi-publish@release/v1
with:
verbose: true
github_release:
needs: [build, changelog]
runs-on: ubuntu-latest
permissions:
contents: write
steps:
- name: Download all dists
uses: actions/download-artifact@v4
with:
pattern: dist-*
merge-multiple: true
path: dist
- uses: actions/download-artifact@v4
with:
name: release-notes
path: .
- name: Create GitHub Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ github.ref_name }}
name: ${{ github.ref_name }}
body_path: RELEASE_NOTES.md
files: |
dist/*
================================================
FILE: .github/workflows/test.yml
================================================
---
name: Test
on:
workflow_dispatch:
pull_request:
branches:
- main
jobs:
build_wheels:
name: Test on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.13"
- name: Install
run: |
script/setup --dev --transformers --sherpa
- name: Run tests
run: script/test
================================================
FILE: .gitignore
================================================
.DS_Store
.idea
*.log
tmp/
*.py[cod]
*.egg
*.egg-info/
build
htmlcov
/.venv/
.mypy_cache/
__pycache__/
/dist/
/local/
================================================
FILE: .isort.cfg
================================================
[settings]
multi_line_output=3
include_trailing_comma=True
force_grid_wrap=0
use_parentheses=True
line_length=88
================================================
FILE: .projectile
================================================
- /.venv/
================================================
FILE: CHANGELOG.md
================================================
# Changelog
## 3.1.1 (unrelased)
- Fix transformers language
- Add initial prompt to transformers
## 3.1.0
- Refactor to dynamically load models
- Only prefer Parakeet for English (other languages don't detect reliably)
- Add `--vad-filter`, `--vad-threshold`, `--vad-min-speech-ms`, `--vad-min-silence-ms` (thanks @lmoe)
- Add `zeroconf` to Docker image
## 3.0.2
- Set `--data-dir /data` in Docker run script
## 3.0.1
- Fix model auto selection logic
## 3.0.0
- Add support for `sherpa-onnx` and Nvidia's parakeet model
- Add support for [GigaAM](https://github.com/salute-developers/GigaAM) for Russian via [`onnx-asr`](https://github.com/istupakov/onnx-asr)
- Add `--stt-library` to select speech-to-text library (deprecate `--use-transformers`)
- Default `--model` to "auto" (prefer parakeet)
- Add Docker build here
- Default `--language` to "auto"
- Add `--cpu-threads` for faster-whisper (@Zerwin)
## 2.5.0
- Add support for HuggingFace transformers Whisper models (--use-transformers)
## 2.4.0
- Add "auto" for model and beam size (0) to select values based on CPU
## 2.3.0
- Bump faster-whisper package to 1.1.0
- Supports model `turbo` for faster processing
## 2.2.0
- Bump faster-whisper package to 1.0.3
## 2.1.0
- Added `--initial-prompt` (see https://github.com/openai/whisper/discussions/963)
## 2.0.0
- Use faster-whisper PyPI package
- `--model` can now be a HuggingFace model like `Systran/faster-distil-whisper-small.en`
## 1.1.0
- Fix enum use for Python 3.11+
- Add tests and Github actions
- Bump tokenizers to 0.15
- Bump wyoming to 1.5.2
## 1.0.0
- Initial release
================================================
FILE: Dockerfile
================================================
FROM debian:bookworm-slim
ARG TARGETARCH
ARG TARGETVARIANT
# Install faster-whisper
WORKDIR /usr/src
COPY ./pyproject.toml ./
RUN \
apt-get update \
&& apt-get install -y --no-install-recommends \
python3 \
python3-pip \
python3-venv \
\
&& python3 -m venv .venv \
&& .venv/bin/pip3 install --no-cache-dir -U \
setuptools \
wheel \
&& .venv/bin/pip3 install --no-cache-dir \
--extra-index-url 'https://download.pytorch.org/whl/cpu' \
'torch==2.6.0' \
\
&& .venv/bin/pip3 install --no-cache-dir \
--extra-index-url https://www.piwheels.org/simple \
-e '.[zeroconf,transformers,sherpa,onnx-asr]' \
\
&& rm -rf /var/lib/apt/lists/*
COPY ./ ./
EXPOSE 10400
ENTRYPOINT ["bash", "docker_run.sh"]
================================================
FILE: LICENSE.md
================================================
MIT License
Copyright (c) 2025 Michael Hansen
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# Wyoming Faster Whisper
[Wyoming protocol](https://github.com/rhasspy/wyoming) server for the [faster-whisper](https://github.com/guillaumekln/faster-whisper/) speech to text system.
## Home Assistant Add-on
[](https://my.home-assistant.io/redirect/supervisor_addon/?addon=core_whisper)
[Source](https://github.com/home-assistant/addons/tree/master/whisper)
## Local Install
Clone the repository and set up Python virtual environment:
``` sh
git clone https://github.com/rhasspy/wyoming-faster-whisper.git
cd wyoming-faster-whisper
script/setup
```
Run a server anyone can connect to:
```sh
script/run --model tiny-int8 --language en --uri 'tcp://0.0.0.0:10300' --data-dir /data --download-dir /data
```
The `--model` can also be a HuggingFace model like `Systran/faster-distil-whisper-small.en`
**NOTE**: Models are downloaded to the first `--data-dir` directory.
## Docker Image
``` sh
docker run -it -p 10300:10300 -v /path/to/local/data:/data rhasspy/wyoming-whisper \
--model tiny-int8 --language en
```
**NOTE**: Models are downloaded to `/data`, so make sure this points to a Docker volume.
[Source](https://github.com/rhasspy/wyoming-addons/tree/master/whisper)
================================================
FILE: docker_run.sh
================================================
#!/usr/bin/env bash
cd /usr/src
.venv/bin/python3 -m wyoming_faster_whisper \
--uri 'tcp://0.0.0.0:10300' --data-dir '/data' "$@"
================================================
FILE: mypy.ini
================================================
[mypy]
ignore_missing_imports = true
[mypy-setuptools.*]
ignore_missing_imports = True
================================================
FILE: pylintrc
================================================
[MASTER]
ignore=faster_whisper
[MESSAGES CONTROL]
disable=
format,
abstract-method,
cyclic-import,
duplicate-code,
global-statement,
import-outside-toplevel,
inconsistent-return-statements,
locally-disabled,
not-context-manager,
too-few-public-methods,
too-many-arguments,
too-many-branches,
too-many-instance-attributes,
too-many-lines,
too-many-locals,
too-many-public-methods,
too-many-return-statements,
too-many-statements,
too-many-boolean-expressions,
unnecessary-pass,
unused-argument,
broad-except,
too-many-nested-blocks,
invalid-name,
unused-import,
fixme,
useless-super-delegation,
missing-module-docstring,
missing-class-docstring,
missing-function-docstring,
import-error,
consider-using-with,
too-many-positional-arguments
[FORMAT]
expected-line-ending-format=LF
================================================
FILE: pyproject.toml
================================================
[project]
name = "wyoming-faster-whisper"
version = "3.1.1"
description = "Wyoming Server for Faster Whisper"
readme = "README.md"
requires-python = ">=3.8"
license = {text = "MIT"}
authors = [
{name = "Michael Hansen", email = "mike@rhasspy.org"}
]
keywords = ["rhasspy", "wyoming", "whisper", "stt"]
classifiers = [
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Topic :: Multimedia :: Sound/Audio :: Speech",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3.13",
]
dependencies = [
"wyoming>=1.8,<2",
"faster-whisper>=1.2.1,<2",
]
[project.urls]
Homepage = "http://github.com/rhasspy/wyoming-faster-whisper"
[tool.setuptools]
platforms = ["any"]
zip-safe = true
include-package-data = true
[tool.setuptools.packages.find]
include = ["wyoming_faster_whisper"]
exclude = ["tests"]
[project.scripts]
wyoming-faster-whisper = "wyoming_faster_whisper.__main__:run"
[build-system]
requires = ["setuptools>=42", "wheel"]
build-backend = "setuptools.build_meta"
[tool.black]
line-length = 88
[tool.isort]
profile = "black"
[tool.pytest.ini_options]
asyncio_mode = "auto"
[tool.mypy]
check_untyped_defs = true
disallow_untyped_defs = true
[project.optional-dependencies]
dev = [
"black",
"flake8",
"isort",
"mypy",
"pylint",
"pytest",
"pytest-asyncio",
]
transformers = [
"transformers[torch]==4.52.4",
]
sherpa = [
"sherpa-onnx>=1.12.19,<2",
]
onnx_asr = [
"onnx-asr[cpu,hub]==0.7.0",
]
zeroconf = [
"wyoming[zeroconf]",
]
================================================
FILE: script/format
================================================
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
_MODULE = _PROGRAM_DIR.name.replace("-", "_")
_MODULE_DIR = _PROGRAM_DIR / _MODULE
if _VENV_DIR.exists():
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
python_exe = context.env_exe
else:
python_exe = "python3"
subprocess.check_call([python_exe, "-m", "black", str(_MODULE_DIR)])
subprocess.check_call([python_exe, "-m", "isort", str(_MODULE_DIR)])
================================================
FILE: script/lint
================================================
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
_MODULE = _PROGRAM_DIR.name.replace("-", "_")
_MODULE_DIR = _PROGRAM_DIR / _MODULE
if _VENV_DIR.exists():
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
python_exe = context.env_exe
else:
python_exe = "python3"
subprocess.check_call([python_exe, "-m", "black", str(_MODULE_DIR), "--check"])
subprocess.check_call([python_exe, "-m", "isort", str(_MODULE_DIR), "--check"])
subprocess.check_call([python_exe, "-m", "flake8", str(_MODULE_DIR)])
subprocess.check_call([python_exe, "-m", "pylint", str(_MODULE_DIR)])
subprocess.check_call([python_exe, "-m", "mypy", str(_MODULE_DIR)])
================================================
FILE: script/package
================================================
#!/usr/bin/env python3
import shutil
import subprocess
import sys
import platform
import venv
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
_MODULE = _PROGRAM_DIR.name.replace("-", "_")
_MODULE_DIR = _PROGRAM_DIR / _MODULE
_SRC_LIB_DIR = _PROGRAM_DIR / "lib"
_DEST_LIB_DIR = _MODULE_DIR / "lib"
if _VENV_DIR.exists():
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
python_exe = context.env_exe
else:
python_exe = "python3"
machine = platform.machine().lower()
is_arm = ("arm" in machine) or ("aarch" in machine)
is_amd64 = machine in ("x86_64", "amd64")
system = sys.platform
platform_name = ""
if system.startswith("linux"):
if is_arm:
platform_name = "linux_arm64"
if is_amd64:
platform_name = "linux_amd64"
if system == "win32":
if is_amd64:
platform_name = "windows_amd64"
if system == "darwin":
if is_arm:
platform_name = "darwin_arm64"
if not platform_name:
raise SystemExit("Unsupported platform")
platform_src_dir = _SRC_LIB_DIR / platform_name
platform_dest_dir = _DEST_LIB_DIR
if _DEST_LIB_DIR.exists():
shutil.rmtree(_DEST_LIB_DIR)
shutil.copytree(platform_src_dir, platform_dest_dir)
subprocess.check_call([python_exe, "-m", "build", "--wheel", "--sdist"])
================================================
FILE: script/run
================================================
#!/usr/bin/env python3
import sys
import subprocess
import venv
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_MODULE = _PROGRAM_DIR.name.replace("-", "_")
_VENV_DIR = _PROGRAM_DIR / ".venv"
if _VENV_DIR.exists():
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
python_exe = context.env_exe
else:
python_exe = "python3"
subprocess.check_call([python_exe, "-m", _MODULE] + sys.argv[1:])
================================================
FILE: script/setup
================================================
#!/usr/bin/env python3
import argparse
import subprocess
import venv
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
parser = argparse.ArgumentParser()
parser.add_argument("--dev", action="store_true", help="Install dev requirements")
parser.add_argument(
"--transformers", action="store_true", help="Install transformers requirements"
)
parser.add_argument("--sherpa", action="store_true", help="Install sherpa requirements")
args = parser.parse_args()
# Create virtual environment
builder = venv.EnvBuilder(with_pip=True)
context = builder.ensure_directories(_VENV_DIR)
builder.create(_VENV_DIR)
# Upgrade dependencies
pip = [context.env_exe, "-m", "pip"]
subprocess.check_call(pip + ["install", "--upgrade", "pip"])
subprocess.check_call(pip + ["install", "--upgrade", "setuptools", "wheel"])
extras = []
if args.dev:
extras.append("dev")
if args.transformers:
extras.append("transformers")
if args.sherpa:
extras.append("sherpa")
extras_str = ""
if extras:
extras_str = "[" + ",".join(extras) + "]"
# Install requirements
subprocess.check_call(pip + ["install", "-e", f"{_PROGRAM_DIR}{extras_str}"])
================================================
FILE: script/test
================================================
#!/usr/bin/env python3
import subprocess
import venv
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_VENV_DIR = _PROGRAM_DIR / ".venv"
_TESTS_DIR = _PROGRAM_DIR / "tests"
if _VENV_DIR.exists():
context = venv.EnvBuilder().ensure_directories(_VENV_DIR)
python_exe = context.env_exe
else:
python_exe = "python3"
subprocess.check_call([python_exe, "-m", "pytest", str(_TESTS_DIR)])
================================================
FILE: setup.cfg
================================================
[flake8]
# To work with Black
max-line-length = 88
# E501: line too long
# W503: Line break occurred before a binary operator
# E203: Whitespace before ':'
# D202 No blank lines allowed after function docstring
# W504 line break after binary operator
ignore =
E501,
W503,
E203,
D202,
W504
exclude = wyoming_faster_whisper/faster_whisper
[isort]
multi_line_output = 3
include_trailing_comma=True
force_grid_wrap=0
use_parentheses=True
line_length=88
indent = " "
================================================
FILE: tests/__init__.py
================================================
"""Tests for wyoming-faster-whisper."""
from pathlib import Path
_DIR = Path(__file__).parent
_PROGRAM_DIR = _DIR.parent
_LOCAL_DIR = _PROGRAM_DIR / "local"
_SAMPLES_PER_CHUNK = 1024
# Need to give time for the model to download
_START_TIMEOUT = 60
_TRANSCRIBE_TIMEOUT = 60
================================================
FILE: tests/test_faster_whisper.py
================================================
"""Tests for faster-whisper."""
import asyncio
import re
import sys
import wave
from asyncio.subprocess import PIPE
import pytest
from wyoming.asr import Transcribe, Transcript
from wyoming.audio import AudioStart, AudioStop, wav_to_chunks
from wyoming.event import async_read_event, async_write_event
from wyoming.info import Describe, Info
from . import _LOCAL_DIR, _SAMPLES_PER_CHUNK, _START_TIMEOUT, _TRANSCRIBE_TIMEOUT, _DIR
@pytest.mark.parametrize(
("stt_library", "model"),
[
("faster-whisper", "base-int8"),
("transformers", "openai/whisper-base.en"),
("sherpa", "auto"),
],
)
@pytest.mark.asyncio
async def test_faster_whisper(stt_library: str, model: str) -> None:
proc = await asyncio.create_subprocess_exec(
sys.executable,
"-m",
"wyoming_faster_whisper",
"--uri",
"stdio://",
"--stt-library",
stt_library,
"--model",
model,
"--data-dir",
str(_LOCAL_DIR),
"--language",
"en",
"--vad-filter",
stdin=PIPE,
stdout=PIPE,
)
assert proc.stdin is not None
assert proc.stdout is not None
# Check info
await async_write_event(Describe().event(), proc.stdin)
while True:
event = await asyncio.wait_for(
async_read_event(proc.stdout), timeout=_START_TIMEOUT
)
assert event is not None
if not Info.is_type(event.type):
continue
info = Info.from_event(event)
assert len(info.asr) == 1, "Expected one asr service"
break
await async_write_event(Transcribe().event(), proc.stdin)
# Test known WAV
with wave.open(str(_DIR / "turn_on_the_living_room_lamp.wav"), "rb") as example_wav:
await async_write_event(
AudioStart(
rate=example_wav.getframerate(),
width=example_wav.getsampwidth(),
channels=example_wav.getnchannels(),
).event(),
proc.stdin,
)
for chunk in wav_to_chunks(example_wav, _SAMPLES_PER_CHUNK):
await async_write_event(chunk.event(), proc.stdin)
await async_write_event(AudioStop().event(), proc.stdin)
while True:
event = await asyncio.wait_for(
async_read_event(proc.stdout), timeout=_TRANSCRIBE_TIMEOUT
)
assert event is not None
if not Transcript.is_type(event.type):
continue
transcript = Transcript.from_event(event)
text = transcript.text.lower().strip()
text = re.sub(r"[^a-z ]", "", text)
assert text == "turn on the living room lamp"
break
# Need to close stdin for graceful termination
proc.stdin.close()
_, stderr = await proc.communicate()
assert proc.returncode == 0, stderr.decode()
================================================
FILE: wyoming_faster_whisper/__init__.py
================================================
"""Wyoming server for faster-whisper."""
from importlib.metadata import version
__version__ = version("wyoming_faster_whisper")
__all__ = ["__version__"]
================================================
FILE: wyoming_faster_whisper/__main__.py
================================================
#!/usr/bin/env python3
import argparse
import asyncio
import logging
import platform
import re
from functools import partial
from typing import Any, Dict, Optional
import faster_whisper
from wyoming.info import AsrModel, AsrProgram, Attribution, Info
from wyoming.server import AsyncServer, AsyncTcpServer
from . import __version__
from .const import AUTO_LANGUAGE, AUTO_MODEL, PARAKEET_LANGUAGES, SttLibrary
from .dispatch_handler import DispatchEventHandler
from .models import ModelLoader
_LOGGER = logging.getLogger(__name__)
async def main() -> None:
"""Main entry point."""
parser = argparse.ArgumentParser()
parser.add_argument("--uri", required=True, help="unix:// or tcp://")
#
parser.add_argument(
"--zeroconf",
nargs="?",
const="faster-whisper",
help="Enable discovery over zeroconf with optional name (default: faster-whisper)",
)
#
parser.add_argument(
"--model", default=AUTO_MODEL, help=f"Name of model to use (or {AUTO_MODEL})"
)
parser.add_argument(
"--data-dir",
required=True,
action="append",
help="Data directory to check for downloaded models",
)
parser.add_argument(
"--download-dir",
help="Directory to download models into (default: first data dir)",
)
parser.add_argument(
"--device",
default="cpu",
help="Device to use for inference (default: cpu)",
)
parser.add_argument(
"--language",
default=AUTO_LANGUAGE,
help=f"Default language to set for transcription (default: {AUTO_LANGUAGE})",
)
parser.add_argument(
"--compute-type",
default="default",
help="Compute type (float16, int8, etc.)",
)
parser.add_argument(
"--beam-size",
type=int,
default=0,
help="Size of beam during decoding (0 for auto)",
)
parser.add_argument(
"--cpu-threads",
default=4,
type=int,
help="Number of CPU threads to use for inference (default: 4, faster-whisper and sherpa-onnx)",
)
parser.add_argument(
"--initial-prompt",
help="Optional text to provide as a prompt for the first window (faster-whisper only)",
)
parser.add_argument(
"--vad-filter",
action="store_true",
help="Enable Silero VAD to filter out non-speech which can reduce hallucinations (default: false, faster-whisper only)",
)
parser.add_argument(
"--vad-threshold",
type=float,
default=0.5,
help="VAD speech probability threshold (default: 0.5, faster-whisper only)",
)
parser.add_argument(
"--vad-min-speech-ms",
type=int,
default=250,
help="VAD minimum speech duration in ms (default: 250, faster-whisper only)",
)
parser.add_argument(
"--vad-min-silence-ms",
type=int,
default=2000,
help="VAD minimum silence duration in ms to split (default: 2000, faster-whisper only)",
)
parser.add_argument(
"--stt-library",
choices=[lib.value for lib in SttLibrary],
default=SttLibrary.AUTO,
help="Set library to use for speech-to-text (may require extra dependencies)",
)
parser.add_argument(
"--local-files-only",
action="store_true",
help="Don't check HuggingFace hub for updates every time",
)
#
parser.add_argument("--debug", action="store_true", help="Log DEBUG messages")
parser.add_argument(
"--log-format", default=logging.BASIC_FORMAT, help="Format for log messages"
)
parser.add_argument(
"--version",
action="version",
version=__version__,
help="Print version and exit",
)
args = parser.parse_args()
if not args.download_dir:
# Download to first data dir by default
args.download_dir = args.data_dir[0]
logging.basicConfig(
level=logging.DEBUG if args.debug else logging.INFO, format=args.log_format
)
_LOGGER.debug(args)
args.stt_library = SttLibrary(args.stt_library)
machine = platform.machine().lower()
is_arm = ("arm" in machine) or ("aarch" in machine)
if args.beam_size <= 0:
args.beam_size = 1 if is_arm else 5
_LOGGER.debug("Beam size automatically selected: %s", args.beam_size)
# Resolve model name
model_name = args.model
model_match = re.match(r"^(tiny|base|small|medium)[.-]int8$", args.model)
if model_match:
# Original models re-uploaded to huggingface
model_size = model_match.group(1)
model_name = f"{model_size}-int8"
args.model = f"rhasspy/faster-whisper-{model_name}"
if args.language == AUTO_LANGUAGE:
# Whisper does not understand auto
args.language = None
if args.model == AUTO_MODEL:
args.model = None
wyoming_info = Info(
asr=[
AsrProgram(
name="faster-whisper",
description="Faster Whisper transcription with CTranslate2",
attribution=Attribution(
name="Guillaume Klein",
url="https://github.com/guillaumekln/faster-whisper/",
),
installed=True,
version=__version__,
models=[
AsrModel(
name=model_name,
description=model_name,
attribution=Attribution(
name="Systran",
url="https://huggingface.co/Systran",
),
installed=True,
languages=sorted(
list(
# pylint: disable=protected-access
set(faster_whisper.tokenizer._LANGUAGE_CODES).union(
PARAKEET_LANGUAGES
)
)
),
version=faster_whisper.__version__,
)
],
)
],
)
vad_parameters: Optional[Dict[str, Any]] = None
if args.vad_filter:
vad_parameters = {
"threshold": args.vad_threshold,
"min_speech_duration_ms": args.vad_min_speech_ms,
"min_silence_duration_ms": args.vad_min_silence_ms,
}
loader = ModelLoader(
preferred_stt_library=args.stt_library,
preferred_language=args.language,
download_dir=args.download_dir,
local_files_only=args.local_files_only,
model=args.model,
compute_type=args.compute_type,
device=args.device,
beam_size=args.beam_size,
cpu_threads=args.cpu_threads,
initial_prompt=args.initial_prompt,
vad_parameters=vad_parameters,
)
# Load model
_LOGGER.debug("Pre-loading transcriber")
await loader.load_transcriber()
server = AsyncServer.from_uri(args.uri)
if args.zeroconf:
if not isinstance(server, AsyncTcpServer):
raise ValueError("Zeroconf requires tcp:// uri")
from wyoming.zeroconf import HomeAssistantZeroconf
tcp_server: AsyncTcpServer = server
hass_zeroconf = HomeAssistantZeroconf(
name=args.zeroconf, port=tcp_server.port, host=tcp_server.host
)
await hass_zeroconf.register_server()
_LOGGER.debug("Zeroconf discovery enabled")
_LOGGER.info("Ready")
await server.run(
partial(
DispatchEventHandler,
wyoming_info,
loader,
)
)
# -----------------------------------------------------------------------------
def run() -> None:
asyncio.run(main())
if __name__ == "__main__":
try:
run()
except KeyboardInterrupt:
pass
================================================
FILE: wyoming_faster_whisper/const.py
================================================
"""Constants."""
from abc import ABC, abstractmethod
from enum import Enum
from pathlib import Path
from typing import Optional, Union
class SttLibrary(str, Enum):
"""Speech-to-text library."""
AUTO = "auto"
FASTER_WHISPER = "faster-whisper"
TRANSFORMERS = "transformers"
SHERPA = "sherpa"
ONNX_ASR = "onnx-asr"
AUTO_LANGUAGE = "auto"
AUTO_MODEL = "auto"
PARAKEET_LANGUAGES = {
"bg",
"hr",
"cs",
"da",
"nl",
"en",
"et",
"fi",
"fr",
"de",
"el",
"hu",
"it",
"lv",
"lt",
"mt",
"pl",
"pt",
"ro",
"sk",
"sl",
"es",
"sv",
"ru",
"uk",
}
class Transcriber(ABC):
"""Base class for transcribers."""
@abstractmethod
def transcribe(
self,
wav_path: Union[str, Path],
language: Optional[str],
beam_size: int = 5,
initial_prompt: Optional[str] = None,
) -> str:
pass
================================================
FILE: wyoming_faster_whisper/dispatch_handler.py
================================================
"""Event handler for clients of the server."""
import asyncio
import logging
import os
import tempfile
import wave
from typing import Optional
from wyoming.asr import Transcribe, Transcript
from wyoming.audio import AudioChunk, AudioChunkConverter, AudioStop
from wyoming.event import Event
from wyoming.info import Describe, Info
from wyoming.server import AsyncEventHandler
from .const import Transcriber
from .models import ModelLoader
_LOGGER = logging.getLogger(__name__)
class DispatchEventHandler(AsyncEventHandler):
"""Dispatches to appropriate transcriber."""
def __init__(
self,
wyoming_info: Info,
loader: ModelLoader,
*args,
**kwargs,
) -> None:
super().__init__(*args, **kwargs)
self.wyoming_info_event = wyoming_info.event()
self._loader = loader
self._transcriber: Optional[Transcriber] = None
self._transcriber_future: Optional[asyncio.Future] = None
self._language: Optional[str] = None
self._wav_dir = tempfile.TemporaryDirectory()
self._wav_path = os.path.join(self._wav_dir.name, "speech.wav")
self._wav_file: Optional[wave.Wave_write] = None
self._audio_converter = AudioChunkConverter(rate=16000, width=2, channels=1)
async def handle_event(self, event: Event) -> bool:
if AudioChunk.is_type(event.type):
# Audio is saved to a WAV file for transcription later.
# None of the underlying models support streaming.
chunk = self._audio_converter.convert(AudioChunk.from_event(event))
if self._wav_file is None:
self._wav_file = wave.open(self._wav_path, "wb")
self._wav_file.setframerate(chunk.rate)
self._wav_file.setsampwidth(chunk.width)
self._wav_file.setnchannels(chunk.channels)
self._wav_file.writeframes(chunk.audio)
if (self._transcriber is None) and (self._transcriber_future is None):
# Load the transcriber in the background.
# Hopefully it's ready by the time the audio stops.
self._transcriber_future = asyncio.create_task(
self._loader.load_transcriber(self._language)
)
return True
if AudioStop.is_type(event.type):
_LOGGER.debug("Audio stoppped")
if self._transcriber is None:
# Get transcriber that was loading in the background
assert self._transcriber_future is not None
self._transcriber = await self._transcriber_future
assert self._transcriber is not None
assert self._wav_file is not None
self._wav_file.close()
self._wav_file = None
# Do transcription in a separate thread
text = await asyncio.to_thread(
self._transcriber.transcribe,
self._wav_path,
self._language,
beam_size=self._loader.beam_size,
initial_prompt=self._loader.initial_prompt,
)
_LOGGER.info(text)
await self.write_event(Transcript(text=text).event())
_LOGGER.debug("Completed request")
# Reset
self._language = None
self._transcriber = None
return False
if Transcribe.is_type(event.type):
transcribe = Transcribe.from_event(event)
self._language = transcribe.language or self._loader.preferred_language
_LOGGER.debug("Language set to %s", self._language)
return True
if Describe.is_type(event.type):
await self.write_event(self.wyoming_info_event)
_LOGGER.debug("Sent info")
return True
return True
================================================
FILE: wyoming_faster_whisper/faster_whisper_handler.py
================================================
"""Event handler for clients of the server."""
from pathlib import Path
from typing import Any, Dict, Optional, Union
import faster_whisper
from .const import Transcriber
class FasterWhisperTranscriber(Transcriber):
"""Event handler for clients."""
def __init__(
self,
model_id: str,
cache_dir: Union[str, Path],
device: str = "cpu",
compute_type: str = "default",
cpu_threads: int = 4,
vad_parameters: Optional[Dict[str, Any]] = None,
) -> None:
self.vad_filter = vad_parameters is not None
self.vad_parameters = vad_parameters
self.model = faster_whisper.WhisperModel(
model_id,
download_root=str(cache_dir),
device=device,
compute_type=compute_type,
cpu_threads=cpu_threads,
)
def transcribe(
self,
wav_path: Union[str, Path],
language: Optional[str],
beam_size: int = 5,
initial_prompt: Optional[str] = None,
) -> str:
segments, _info = self.model.transcribe(
str(wav_path),
beam_size=beam_size,
language=language,
initial_prompt=initial_prompt,
vad_filter=self.vad_filter,
vad_parameters=self.vad_parameters,
)
text = " ".join(segment.text for segment in segments)
return text
================================================
FILE: wyoming_faster_whisper/models.py
================================================
"""Logic for model selection, loading, and transcription."""
import asyncio
import logging
import platform
from collections import defaultdict
from pathlib import Path
from typing import Any, Dict, Optional, Tuple, Union
from .const import SttLibrary, Transcriber
from .faster_whisper_handler import FasterWhisperTranscriber
_LOGGER = logging.getLogger(__name__)
TRANSCRIBER_KEY = Tuple[SttLibrary, str] # model id
class ModelLoader:
"""Load transcribers for models."""
def __init__(
self,
preferred_stt_library: SttLibrary,
preferred_language: Optional[str],
download_dir: Union[str, Path],
local_files_only: bool,
model: Optional[str],
compute_type: str,
device: str,
beam_size: int,
cpu_threads: int,
initial_prompt: Optional[str],
vad_parameters: Optional[Dict[str, Any]],
) -> None:
self.preferred_stt_library = preferred_stt_library
self.preferred_language = preferred_language
self.download_dir = Path(download_dir)
self.local_files_only = local_files_only
# faster-whisper only
self.model = model
self.compute_type = compute_type
self.device = device
self.beam_size = beam_size
self.cpu_threads = cpu_threads
self.initial_prompt = initial_prompt
self.vad_parameters = vad_parameters
self._transcriber: Dict[TRANSCRIBER_KEY, Transcriber] = {}
self._transcriber_lock: Dict[TRANSCRIBER_KEY, asyncio.Lock] = defaultdict(
asyncio.Lock
)
async def load_transcriber(self, language: Optional[str] = None) -> Transcriber:
"""Load or get transcriber from cache for a language."""
language = language or self.preferred_language
stt_library = self.preferred_stt_library
# Check dependencies
try:
from .sherpa_handler import SherpaTranscriber
has_sherpa = True
_LOGGER.debug("Sherpa is available")
except ImportError:
has_sherpa = False
_LOGGER.debug("Sherpa is NOT available")
try:
from .transformers_whisper import TransformersTranscriber
has_transformers = True
_LOGGER.debug("Transformers library is available")
except ImportError:
has_transformers = False
_LOGGER.debug("Transformers library is NOT available")
try:
from .onnx_asr_handler import OnnxAsrTranscriber
has_onnx_asr = True
_LOGGER.debug("Onnx-ASR is available")
except ImportError:
has_onnx_asr = False
_LOGGER.debug("Onnx-ASR is NOT available")
# Select speech-to-text library
if stt_library == SttLibrary.AUTO:
# Default to faster-whisper
stt_library = SttLibrary.FASTER_WHISPER
if self.model is None: # auto
if (language == "ru") and has_onnx_asr:
# Prefer GigaAM via onnx-asr
stt_library = SttLibrary.ONNX_ASR
elif (language == "en") and has_sherpa:
# Prefer Parakeet via sherpa for English.
# The v3 Parakeet model claims to auto detect other
# languages, but it doesn't work.
stt_library = SttLibrary.SHERPA
elif (
((stt_library == SttLibrary.TRANSFORMERS) and (not has_transformers))
or ((stt_library == SttLibrary.SHERPA) and (not has_sherpa))
or ((stt_library == SttLibrary.ONNX_ASR) and (not has_onnx_asr))
):
# Fall back to faster-whisper
stt_library = SttLibrary.FASTER_WHISPER
_LOGGER.debug("Falling back to faster-whisper (missing dependencies)")
# Select model
model = self.model
if model is None: # auto
machine = platform.machine().lower()
is_arm = ("arm" in machine) or ("aarch" in machine)
model = guess_model(stt_library, language, is_arm)
_LOGGER.debug(
"Selected stt-library '%s' with model '%s'", stt_library.value, model
)
# Load transcriber
assert stt_library != SttLibrary.AUTO
assert model
key = (stt_library, model)
async with self._transcriber_lock[key]:
transcriber = self._transcriber.get(key)
if transcriber is not None:
return transcriber
if stt_library == SttLibrary.SHERPA:
from .sherpa_handler import SherpaTranscriber # noqa: F811
transcriber = SherpaTranscriber(
model, self.download_dir, cpu_threads=self.cpu_threads
)
elif stt_library == SttLibrary.ONNX_ASR:
from .onnx_asr_handler import OnnxAsrTranscriber # noqa: F811
transcriber = OnnxAsrTranscriber(
model,
cache_dir=self.download_dir,
local_files_only=self.local_files_only,
)
elif stt_library == SttLibrary.TRANSFORMERS:
from .transformers_whisper import TransformersTranscriber # noqa: F811
transcriber = TransformersTranscriber(
model,
cache_dir=self.download_dir,
local_files_only=self.local_files_only,
)
else:
transcriber = FasterWhisperTranscriber(
model,
cache_dir=self.download_dir,
device=self.device,
compute_type=self.compute_type,
cpu_threads=self.cpu_threads,
vad_parameters=self.vad_parameters,
)
self._transcriber[key] = transcriber
return transcriber
async def transcribe(
self, wav_path: Union[str, Path], language: Optional[str]
) -> str:
"""Transcribe WAV file using appropriate transcriber.
Assume WAV file is 16Khz 16-bit mono PCM.
"""
transcriber = await self.load_transcriber(language)
text = await asyncio.to_thread(
transcriber.transcribe,
wav_path,
language=language,
beam_size=self.beam_size,
initial_prompt=self.initial_prompt,
)
_LOGGER.debug("Transcribed audio: %s", text)
return text
def guess_model(stt_library: SttLibrary, language: Optional[str], is_arm: bool) -> str:
"""Automatically guess STT model id."""
if stt_library == SttLibrary.SHERPA:
if language == "en":
return "sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8"
# Non-English
return "sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8"
if stt_library == SttLibrary.TRANSFORMERS:
if language == "en":
if is_arm:
return "openai/whisper-tiny.en"
return "openai/whisper-base.en"
# Non-English
if is_arm:
return "openai/whisper-tiny"
return "openai/whisper-base"
if stt_library == SttLibrary.ONNX_ASR:
return "gigaam-v2-rnnt"
# faster-whisper
if is_arm:
return "rhasspy/faster-whisper-tiny-int8"
return "rhasspy/faster-whisper-base-int8"
================================================
FILE: wyoming_faster_whisper/onnx_asr_handler.py
================================================
"""Code for transcription using the onnx-asr library."""
import wave
from pathlib import Path
from typing import Optional, Union
from unittest.mock import patch
import numpy as np
import onnx_asr
from huggingface_hub import snapshot_download
from .const import Transcriber
_RATE = 16000
class OnnxAsrTranscriber(Transcriber):
"""Wrapper for onnx-asr model."""
def __init__(
self, model_id: str, cache_dir: Union[str, Path], local_files_only: bool
) -> None:
"""Initialize model."""
# Force download to our cache dir
def snapshot_download_with_cache(*args, **kwargs) -> str:
kwargs["cache_dir"] = str(Path(cache_dir).resolve())
kwargs["local_files_only"] = local_files_only
return snapshot_download(*args, **kwargs)
with patch("huggingface_hub.snapshot_download", snapshot_download_with_cache):
self.onnx_model = onnx_asr.load_model(model_id)
def transcribe(
self,
wav_path: Union[str, Path],
language: Optional[str],
beam_size: int = 5,
initial_prompt: Optional[str] = None,
) -> str:
"""Returns transcription for WAV file.
WAV file must be 16Khz 16-bit mono audio.
"""
wav_file: wave.Wave_read = wave.open(str(wav_path), "rb")
with wav_file:
assert wav_file.getframerate() == _RATE, "Sample rate must be 16Khz"
assert wav_file.getsampwidth() == 2, "Width must be 16-bit (2 bytes)"
assert wav_file.getnchannels() == 1, "Audio must be mono"
audio_bytes = wav_file.readframes(wav_file.getnframes())
audio_array = (
np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32767.0
)
recognize_kwargs = {}
if language:
recognize_kwargs["language"] = language
text = self.onnx_model.recognize( # type: ignore[call-overload]
audio_array, sample_rate=_RATE, **recognize_kwargs
)
return text
================================================
FILE: wyoming_faster_whisper/sherpa_handler.py
================================================
"""Code for transcription using the sherpa-onnx library."""
import logging
import shutil
import tarfile
import urllib.request
import wave
from pathlib import Path
from typing import Optional, Union
import numpy as np
import sherpa_onnx as so
from .const import Transcriber
_LOGGER = logging.getLogger(__name__)
_RATE = 16000
_URL_FORMAT = "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_id}.tar.bz2"
class SherpaTranscriber(Transcriber):
"""Wrapper for sherpa-onnx model."""
def __init__(
self,
model_id: str,
cache_dir: Union[str, Path],
cpu_threads: int = 4,
) -> None:
"""Initialize model."""
cache_dir = Path(cache_dir)
model_dir = cache_dir / model_id
_LOGGER.debug("Looking for sherpa model: %s", model_dir)
if not model_dir.exists():
url = _URL_FORMAT.format(model_id=model_id)
_LOGGER.info("Downloading %s", url)
cache_dir.mkdir(parents=True, exist_ok=True)
try:
# Download/extract to cache dir.
# We assume that the .tar.bz2 contains a directory named after
# the model id.
with urllib.request.urlopen(url) as response:
with tarfile.open(fileobj=response, mode="r|bz2") as tar:
for member in tar:
tar.extract(member, path=cache_dir)
except Exception:
# Delete directory so we'll download again next time
shutil.rmtree(model_dir, ignore_errors=True)
raise
# Load model
self.recognizer = so.OfflineRecognizer.from_transducer(
num_threads=cpu_threads,
encoder=f"{model_dir}/encoder.int8.onnx",
decoder=f"{model_dir}/decoder.int8.onnx",
joiner=f"{model_dir}/joiner.int8.onnx",
tokens=f"{model_dir}/tokens.txt",
provider="cpu",
model_type="nemo_transducer",
)
# Prime model so that the first transcription will be fast
stream = self.recognizer.create_stream()
stream.accept_waveform(_RATE, np.zeros(shape=(128), dtype=np.float32))
self.recognizer.decode_stream(stream)
def transcribe(
self,
wav_path: Union[str, Path],
language: Optional[str],
beam_size: int = 5,
initial_prompt: Optional[str] = None,
) -> str:
"""Returns transcription for WAV file.
WAV file must be 16Khz 16-bit mono audio.
"""
wav_file: wave.Wave_read = wave.open(str(wav_path), "rb")
with wav_file:
assert wav_file.getframerate() == _RATE, "Sample rate must be 16Khz"
assert wav_file.getsampwidth() == 2, "Width must be 16-bit (2 bytes)"
assert wav_file.getnchannels() == 1, "Audio must be mono"
audio_bytes = wav_file.readframes(wav_file.getnframes())
audio_array = (
np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32767.0
)
stream = self.recognizer.create_stream()
stream.accept_waveform(_RATE, audio_array)
self.recognizer.decode_stream(stream)
return stream.result.text
================================================
FILE: wyoming_faster_whisper/transformers_whisper.py
================================================
"""Code for Whisper transcription using HuggingFace's transformers library."""
import wave
from pathlib import Path
from typing import Optional, Union
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
from .const import Transcriber
_RATE = 16000
class TransformersTranscriber(Transcriber):
"""Wrapper for HuggingFace transformers Whisper model."""
def __init__(
self,
model_id: str,
cache_dir: Optional[Union[str, Path]] = None,
local_files_only: bool = False,
) -> None:
"""Initialize Whisper model."""
self.processor = AutoProcessor.from_pretrained(
model_id, cache_dir=cache_dir, local_files_only=local_files_only
)
self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, cache_dir=cache_dir, local_files_only=local_files_only
)
self.model.eval()
def transcribe(
self,
wav_path: Union[str, Path],
language: Optional[str],
beam_size: int = 5,
initial_prompt: Optional[str] = None,
) -> str:
"""Returns transcription for WAV file.
WAV file must be 16Khz 16-bit mono audio.
"""
wav_file: wave.Wave_read = wave.open(str(wav_path), "rb")
with wav_file:
assert wav_file.getframerate() == _RATE, "Sample rate must be 16Khz"
assert wav_file.getsampwidth() == 2, "Width must be 16-bit (2 bytes)"
assert wav_file.getnchannels() == 1, "Audio must be mono"
audio_bytes = wav_file.readframes(wav_file.getnframes())
audio_tensor = (
torch.frombuffer(audio_bytes, dtype=torch.int16).float() / 32768.0
)
inputs = self.processor(audio_tensor, sampling_rate=_RATE, return_tensors="pt")
generate_args = {**inputs, "num_beams": beam_size}
if initial_prompt:
prompt_ids = (
self.processor.tokenizer(
initial_prompt, return_tensors="pt", add_special_tokens=False
)
.input_ids[0]
.to(self.model.device)
)
generate_args["prompt_ids"] = prompt_ids
if language:
self.processor.tokenizer.set_prefix_tokens(
language=language, task="transcribe"
)
with torch.no_grad():
# Ignore warning about attention_mask because we're only doing a single utterance.
generated_ids = self.model.generate(**generate_args)
transcription = self.processor.batch_decode(
generated_ids, skip_special_tokens=True
)[0]
return transcription
gitextract_nf4x59hb/
├── .dockerignore
├── .github/
│ └── workflows/
│ ├── publish.yml
│ └── test.yml
├── .gitignore
├── .isort.cfg
├── .projectile
├── CHANGELOG.md
├── Dockerfile
├── LICENSE.md
├── README.md
├── docker_run.sh
├── mypy.ini
├── pylintrc
├── pyproject.toml
├── script/
│ ├── format
│ ├── lint
│ ├── package
│ ├── run
│ ├── setup
│ └── test
├── setup.cfg
├── tests/
│ ├── __init__.py
│ └── test_faster_whisper.py
└── wyoming_faster_whisper/
├── __init__.py
├── __main__.py
├── const.py
├── dispatch_handler.py
├── faster_whisper_handler.py
├── models.py
├── onnx_asr_handler.py
├── sherpa_handler.py
└── transformers_whisper.py
SYMBOL INDEX (26 symbols across 9 files)
FILE: tests/test_faster_whisper.py
function test_faster_whisper (line 28) | async def test_faster_whisper(stt_library: str, model: str) -> None:
FILE: wyoming_faster_whisper/__main__.py
function main (line 22) | async def main() -> None:
function run (line 248) | def run() -> None:
FILE: wyoming_faster_whisper/const.py
class SttLibrary (line 9) | class SttLibrary(str, Enum):
class Transcriber (line 51) | class Transcriber(ABC):
method transcribe (line 55) | def transcribe(
FILE: wyoming_faster_whisper/dispatch_handler.py
class DispatchEventHandler (line 22) | class DispatchEventHandler(AsyncEventHandler):
method __init__ (line 25) | def __init__(
method handle_event (line 47) | async def handle_event(self, event: Event) -> bool:
FILE: wyoming_faster_whisper/faster_whisper_handler.py
class FasterWhisperTranscriber (line 11) | class FasterWhisperTranscriber(Transcriber):
method __init__ (line 14) | def __init__(
method transcribe (line 34) | def transcribe(
FILE: wyoming_faster_whisper/models.py
class ModelLoader (line 18) | class ModelLoader:
method __init__ (line 21) | def __init__(
method load_transcriber (line 55) | async def load_transcriber(self, language: Optional[str] = None) -> Tr...
method transcribe (line 169) | async def transcribe(
function guess_model (line 189) | def guess_model(stt_library: SttLibrary, language: Optional[str], is_arm...
FILE: wyoming_faster_whisper/onnx_asr_handler.py
class OnnxAsrTranscriber (line 17) | class OnnxAsrTranscriber(Transcriber):
method __init__ (line 20) | def __init__(
method transcribe (line 35) | def transcribe(
FILE: wyoming_faster_whisper/sherpa_handler.py
class SherpaTranscriber (line 22) | class SherpaTranscriber(Transcriber):
method __init__ (line 25) | def __init__(
method transcribe (line 70) | def transcribe(
FILE: wyoming_faster_whisper/transformers_whisper.py
class TransformersTranscriber (line 15) | class TransformersTranscriber(Transcriber):
method __init__ (line 18) | def __init__(
method transcribe (line 33) | def transcribe(
Condensed preview — 32 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (57K chars).
[
{
"path": ".dockerignore",
"chars": 62,
"preview": "*\n!wyoming_faster_whisper/*.py\n!pyproject.toml\n!docker_run.sh\n"
},
{
"path": ".github/workflows/publish.yml",
"chars": 6351,
"preview": "---\nname: Release (DockerHub + GitHub)\n\non:\n workflow_dispatch:\n push:\n tags:\n - \"v*.*.*\" # e.g. v1.2.3\n\njobs:"
},
{
"path": ".github/workflows/test.yml",
"chars": 501,
"preview": "---\nname: Test\n\non:\n workflow_dispatch:\n pull_request:\n branches:\n - main\n\njobs:\n build_wheels:\n name: Tes"
},
{
"path": ".gitignore",
"chars": 121,
"preview": ".DS_Store\n.idea\n*.log\ntmp/\n\n*.py[cod]\n*.egg\n*.egg-info/\nbuild\nhtmlcov\n\n/.venv/\n.mypy_cache/\n__pycache__/\n\n/dist/\n/local/"
},
{
"path": ".isort.cfg",
"chars": 113,
"preview": "[settings]\nmulti_line_output=3\ninclude_trailing_comma=True\nforce_grid_wrap=0\nuse_parentheses=True\nline_length=88\n"
},
{
"path": ".projectile",
"chars": 10,
"preview": "- /.venv/\n"
},
{
"path": "CHANGELOG.md",
"chars": 1616,
"preview": "# Changelog\n\n## 3.1.1 (unrelased)\n\n- Fix transformers language\n- Add initial prompt to transformers\n\n## 3.1.0\n\n- Refacto"
},
{
"path": "Dockerfile",
"chars": 807,
"preview": "FROM debian:bookworm-slim\nARG TARGETARCH\nARG TARGETVARIANT\n\n# Install faster-whisper\nWORKDIR /usr/src\n\nCOPY ./pyproject."
},
{
"path": "LICENSE.md",
"chars": 1071,
"preview": "MIT License\n\nCopyright (c) 2025 Michael Hansen\n\nPermission is hereby granted, free of charge, to any person obtaining a "
},
{
"path": "README.md",
"chars": 1262,
"preview": "# Wyoming Faster Whisper\n\n[Wyoming protocol](https://github.com/rhasspy/wyoming) server for the [faster-whisper](https:/"
},
{
"path": "docker_run.sh",
"chars": 134,
"preview": "#!/usr/bin/env bash\ncd /usr/src\n.venv/bin/python3 -m wyoming_faster_whisper \\\n --uri 'tcp://0.0.0.0:10300' --data-dir"
},
{
"path": "mypy.ini",
"chars": 88,
"preview": "[mypy]\nignore_missing_imports = true\n\n[mypy-setuptools.*]\nignore_missing_imports = True\n"
},
{
"path": "pylintrc",
"chars": 846,
"preview": "[MASTER]\nignore=faster_whisper\n\n[MESSAGES CONTROL]\ndisable=\n format,\n abstract-method,\n cyclic-import,\n duplicate-co"
},
{
"path": "pyproject.toml",
"chars": 1787,
"preview": "[project]\nname = \"wyoming-faster-whisper\"\nversion = \"3.1.1\"\ndescription = \"Wyoming Server for Faster Whisper\"\nreadme = \""
},
{
"path": "script/format",
"chars": 544,
"preview": "#!/usr/bin/env python3\nimport subprocess\nimport venv\nfrom pathlib import Path\n\n_DIR = Path(__file__).parent\n_PROGRAM_DIR"
},
{
"path": "script/lint",
"chars": 774,
"preview": "#!/usr/bin/env python3\nimport subprocess\nimport venv\nfrom pathlib import Path\n\n_DIR = Path(__file__).parent\n_PROGRAM_DIR"
},
{
"path": "script/package",
"chars": 1330,
"preview": "#!/usr/bin/env python3\nimport shutil\nimport subprocess\nimport sys\nimport platform\nimport venv\nfrom pathlib import Path\n\n"
},
{
"path": "script/run",
"chars": 446,
"preview": "#!/usr/bin/env python3\nimport sys\nimport subprocess\nimport venv\nfrom pathlib import Path\n\n_DIR = Path(__file__).parent\n_"
},
{
"path": "script/setup",
"chars": 1201,
"preview": "#!/usr/bin/env python3\nimport argparse\nimport subprocess\nimport venv\nfrom pathlib import Path\n\n_DIR = Path(__file__).par"
},
{
"path": "script/test",
"chars": 428,
"preview": "#!/usr/bin/env python3\nimport subprocess\nimport venv\nfrom pathlib import Path\n\n_DIR = Path(__file__).parent\n_PROGRAM_DIR"
},
{
"path": "setup.cfg",
"chars": 487,
"preview": "[flake8]\n# To work with Black\nmax-line-length = 88\n# E501: line too long\n# W503: Line break occurred before a binary ope"
},
{
"path": "tests/__init__.py",
"chars": 277,
"preview": "\"\"\"Tests for wyoming-faster-whisper.\"\"\"\n\nfrom pathlib import Path\n\n_DIR = Path(__file__).parent\n_PROGRAM_DIR = _DIR.pare"
},
{
"path": "tests/test_faster_whisper.py",
"chars": 2854,
"preview": "\"\"\"Tests for faster-whisper.\"\"\"\n\nimport asyncio\nimport re\nimport sys\nimport wave\nfrom asyncio.subprocess import PIPE\n\nim"
},
{
"path": "wyoming_faster_whisper/__init__.py",
"chars": 157,
"preview": "\"\"\"Wyoming server for faster-whisper.\"\"\"\n\nfrom importlib.metadata import version\n\n__version__ = version(\"wyoming_faster_"
},
{
"path": "wyoming_faster_whisper/__main__.py",
"chars": 7946,
"preview": "#!/usr/bin/env python3\nimport argparse\nimport asyncio\nimport logging\nimport platform\nimport re\nfrom functools import par"
},
{
"path": "wyoming_faster_whisper/const.py",
"chars": 951,
"preview": "\"\"\"Constants.\"\"\"\n\nfrom abc import ABC, abstractmethod\nfrom enum import Enum\nfrom pathlib import Path\nfrom typing import "
},
{
"path": "wyoming_faster_whisper/dispatch_handler.py",
"chars": 3852,
"preview": "\"\"\"Event handler for clients of the server.\"\"\"\n\nimport asyncio\nimport logging\nimport os\nimport tempfile\nimport wave\nfrom"
},
{
"path": "wyoming_faster_whisper/faster_whisper_handler.py",
"chars": 1403,
"preview": "\"\"\"Event handler for clients of the server.\"\"\"\n\nfrom pathlib import Path\nfrom typing import Any, Dict, Optional, Union\n\n"
},
{
"path": "wyoming_faster_whisper/models.py",
"chars": 7396,
"preview": "\"\"\"Logic for model selection, loading, and transcription.\"\"\"\n\nimport asyncio\nimport logging\nimport platform\nfrom collect"
},
{
"path": "wyoming_faster_whisper/onnx_asr_handler.py",
"chars": 2035,
"preview": "\"\"\"Code for transcription using the onnx-asr library.\"\"\"\n\nimport wave\nfrom pathlib import Path\nfrom typing import Option"
},
{
"path": "wyoming_faster_whisper/sherpa_handler.py",
"chars": 3272,
"preview": "\"\"\"Code for transcription using the sherpa-onnx library.\"\"\"\n\nimport logging\nimport shutil\nimport tarfile\nimport urllib.r"
},
{
"path": "wyoming_faster_whisper/transformers_whisper.py",
"chars": 2697,
"preview": "\"\"\"Code for Whisper transcription using HuggingFace's transformers library.\"\"\"\n\nimport wave\nfrom pathlib import Path\nfro"
}
]
About this extraction
This page contains the full source code of the rhasspy/wyoming-faster-whisper GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 32 files (51.6 KB), approximately 13.4k tokens, and a symbol index with 26 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.