SYMBOL INDEX (1208 symbols across 89 files)

FILE: benchmark_mlx_simul.py
  function load_librispeech_utterances (line 63) | def load_librispeech_utterances(data_dir: str, max_utterances: int = 0):
  function load_librispeech_chapters (line 98) | def load_librispeech_chapters(data_dir: str):
  function transcribe_simul (line 153) | def transcribe_simul(asr, audio, chunk_seconds=2.0):
  function transcribe_single_shot (line 191) | def transcribe_single_shot(asr, audio):
  function normalize_text (line 216) | def normalize_text(text: str) -> str:
  function main (line 224) | def main():

FILE: benchmarks/h100/bench_voxtral_hf_batch.py
  function norm (line 14) | def norm(t):
  function load_audio (line 17) | def load_audio(path):
  function transcribe_batch (line 30) | def transcribe_batch(audio_np):

FILE: benchmarks/h100/bench_voxtral_vllm_realtime.py
  function norm (line 12) | def norm(t):
  function transcribe (line 15) | async def transcribe(audio_path, max_tokens=4096):
  function main (line 57) | async def main():

FILE: benchmarks/h100/generate_figures.py
  function _save (line 38) | def _save(fig, name):
  function fig_scatter_clean (line 48) | def fig_scatter_clean():
  function fig_scatter_acl6060 (line 101) | def fig_scatter_acl6060():
  function fig_bars (line 143) | def fig_bars():
  function fig_robustness (line 195) | def fig_robustness():
  function fig_per_talk (line 233) | def fig_per_talk():

FILE: benchmarks/m5/generate_figures.py
  function _save (line 43) | def _save(fig, name):
  function fig_m5_vs_h100 (line 50) | def fig_m5_vs_h100():

FILE: chrome-extension/requestPermissions.js
  function getUserPermission (line 5) | async function getUserPermission() {

FILE: chrome-extension/sidepanel.js
  function run (line 3) | async function run() {

FILE: scripts/convert_hf_whisper.py
  function _load_state_dict (line 23) | def _load_state_dict(repo_path: Path) -> Dict[str, torch.Tensor]:
  function _load_config (line 45) | def _load_config(repo_path: Path) -> Dict:
  function _derive_audio_ctx (line 55) | def _derive_audio_ctx(chunk_length: float) -> Tuple[int, int]:
  function _build_dims (line 68) | def _build_dims(config: Dict, chunk_length: float) -> Dict:
  function _trim_positional_embedding (line 88) | def _trim_positional_embedding(
  function convert_checkpoint (line 105) | def convert_checkpoint(hf_path: Path, output_path: Path, chunk_length: f...
  function parse_args (line 119) | def parse_args() -> argparse.Namespace:
  function main (line 143) | def main():

FILE: scripts/create_long_samples.py
  function save_wav (line 20) | def save_wav(path, audio, sr=SR):
  function decode_audio (line 30) | def decode_audio(audio_bytes):
  function download_long_librispeech (line 36) | def download_long_librispeech(config, lang_code, target_dur=300):
  function download_long_mls (line 76) | def download_long_mls(config, lang_code, target_dur=300):
  function main (line 115) | def main():

FILE: scripts/detect_alignment_heads_qwen3.py
  function _apply_transformers_compat_patches (line 50) | def _apply_transformers_compat_patches():
  function text_similarity (line 137) | def text_similarity(generated: str, reference: str) -> float:
  function load_dataset_clips (line 155) | def load_dataset_clips(name, config, split, limit):
  function get_device (line 177) | def get_device():
  function load_qwen3_asr (line 190) | def load_qwen3_asr(model_id: str, device: torch.device, dtype: torch.dty...
  function find_audio_token_range (line 235) | def find_audio_token_range(input_ids: torch.Tensor, audio_token_id: int)...
  function timestamp_to_audio_token_position (line 244) | def timestamp_to_audio_token_position(
  function run_detection (line 264) | def run_detection(
  function main (line 536) | def main():

FILE: scripts/determine_alignment_heads.py
  function load_dataset_clips (line 33) | def load_dataset_clips(name, config, split, limit):
  function load_clips (line 53) | def load_clips(args):
  function _waveform_from_source (line 62) | def _waveform_from_source(source: AudioInput) -> torch.Tensor:
  function _parse_args (line 67) | def _parse_args():
  function collect_heads (line 125) | def collect_heads(
  function _select_heads_for_visualization (line 181) | def _select_heads_for_visualization(selection, strengths, top_k):
  function _extract_heatmaps (line 193) | def _extract_heatmaps(
  function _plot_heatmaps (line 245) | def _plot_heatmaps(
  function _dump_mask (line 270) | def _dump_mask(mask: torch.Tensor, output_path: str):
  function main (line 277) | def main():

FILE: scripts/generate_architecture.py
  function box (line 36) | def box(x, y, w, h, label, color=C_BORDER, bg=C_BOX_BG, fontsize=8, bold...
  function arrow (line 50) | def arrow(x1, y1, x2, y2, color=C_TEXTDIM, style="->", lw=1.2):
  function section_box (line 55) | def section_box(x, y, w, h, title, bg=C_PANEL, border=C_BORDER, title_co...

FILE: scripts/python_support_matrix.py
  class MatrixRow (line 35) | class MatrixRow:
  class CaseResult (line 87) | class CaseResult:
  function parse_args (line 97) | def parse_args() -> argparse.Namespace:
  function safe_slug (line 115) | def safe_slug(text: str) -> str:
  function status_style (line 119) | def status_style(status: str) -> str:
  function print_line (line 129) | def print_line(message: str, style: str | None = None) -> None:
  function tail_text (line 139) | def tail_text(text: str | None, max_chars: int = 220) -> str:
  function run_command (line 148) | def run_command(
  function detect_gpu_available (line 218) | def detect_gpu_available() -> bool:
  function download_sample (line 232) | def download_sample(repo_root: Path) -> Path:
  function sync_case_environment (line 252) | def sync_case_environment(
  function apply_expected_failure_policy (line 276) | def apply_expected_failure_policy(result: CaseResult) -> CaseResult:
  function build_offline_command (line 298) | def build_offline_command(
  function run_case (line 332) | def run_case(
  function print_summary (line 446) | def print_summary(results: list[CaseResult]) -> None:
  function main (line 519) | def main() -> int:

FILE: scripts/run_scatter_benchmark.py
  function is_backend_available (line 66) | def is_backend_available(backend):
  function get_system_info (line 88) | def get_system_info():
  function run_combo_on_samples (line 103) | async def run_combo_on_samples(combo, samples, lang="en", speed=0):
  function run_all (line 174) | async def run_all(combos, samples, lang="en", speed=0):
  function get_long_samples_for_lang (line 191) | def get_long_samples_for_lang(lang="en"):
  function generate_scatter (line 213) | def generate_scatter(results, system_info, output_path, n_samples, lang=...
  function main (line 348) | def main():

FILE: scripts/sync_extension.py
  function sync_extension_files (line 7) | def sync_extension_files():

FILE: tests/test_pipeline.py
  function backend_kwargs (line 83) | def backend_kwargs(backend: str) -> dict:
  function samples (line 92) | def samples():
  function short_sample (line 99) | def short_sample(samples):
  function medium_sample (line 104) | def medium_sample(samples):
  function meeting_sample (line 109) | def meeting_sample(samples):
  function test_transcription_quality (line 119) | async def test_transcription_quality(backend, short_sample):
  function test_medium_clip_timing_spans_audio (line 141) | async def test_medium_clip_timing_spans_audio(backend, medium_sample):
  function test_text_appears_progressively (line 173) | async def test_text_appears_progressively(backend, medium_sample):
  function test_buffer_lifecycle (line 207) | async def test_buffer_lifecycle(backend, medium_sample):
  function test_silence_flushes_all_words (line 232) | async def test_silence_flushes_all_words(backend, medium_sample):
  function test_play_pause_resume (line 290) | async def test_play_pause_resume(backend, medium_sample):
  function test_multiple_pauses (line 336) | async def test_multiple_pauses(backend, medium_sample):
  function test_short_pause_no_silence (line 378) | async def test_short_pause_no_silence(backend, medium_sample):
  function test_abrupt_cutoff (line 413) | async def test_abrupt_cutoff(backend, medium_sample):
  function test_timing_precision_and_monotonicity (line 443) | async def test_timing_precision_and_monotonicity(backend, medium_sample):
  function test_silence_timing_reflects_pause (line 469) | async def test_silence_timing_reflects_pause(backend, short_sample):
  function test_snapshot_history (line 503) | async def test_snapshot_history(backend, medium_sample):
  function test_metrics_collected (line 532) | async def test_metrics_collected(backend, short_sample):

FILE: whisperlivekit/audio_processor.py
  function get_all_from_queue (line 28) | async def get_all_from_queue(queue: asyncio.Queue) -> Union[object, Sile...
  class AudioProcessor (line 54) | class AudioProcessor:
    method __init__ (line 60) | def __init__(self, **kwargs: Any) -> None:
    method _push_silence_event (line 140) | async def _push_silence_event(self) -> None:
    method _begin_silence (line 148) | async def _begin_silence(self, at_sample: Optional[int] = None) -> None:
    method _end_silence (line 168) | async def _end_silence(self, at_sample: Optional[int] = None) -> None:
    method _enqueue_active_audio (line 188) | async def _enqueue_active_audio(self, pcm_chunk: np.ndarray) -> None:
    method _slice_before_silence (line 196) | def _slice_before_silence(self, pcm_array: np.ndarray, chunk_sample_st...
    method convert_pcm_to_float (line 207) | def convert_pcm_to_float(self, pcm_buffer: Union[bytes, bytearray]) ->...
    method get_current_state (line 211) | async def get_current_state(self) -> State:
    method ffmpeg_stdout_reader (line 230) | async def ffmpeg_stdout_reader(self) -> None:
    method _finish_transcription (line 280) | async def _finish_transcription(self) -> None:
    method transcription_processor (line 309) | async def transcription_processor(self) -> None:
    method diarization_processor (line 421) | async def diarization_processor(self) -> None:
    method translation_processor (line 444) | async def translation_processor(self) -> None:
    method results_formatter (line 479) | async def results_formatter(self) -> AsyncGenerator[FrontData, None]:
    method create_tasks (line 530) | async def create_tasks(self) -> AsyncGenerator[FrontData, None]:
    method watchdog (line 571) | async def watchdog(self, tasks_to_monitor: List[asyncio.Task]) -> None:
    method cleanup (line 598) | async def cleanup(self) -> None:
    method _processing_tasks_done (line 625) | def _processing_tasks_done(self) -> bool:
    method process_audio (line 636) | async def process_audio(self, message: Optional[bytes]) -> None:
    method handle_pcm_data (line 682) | async def handle_pcm_data(self) -> None:
    method _flush_remaining_pcm (line 734) | async def _flush_remaining_pcm(self) -> None:

FILE: whisperlivekit/backend_support.py
  function module_available (line 8) | def module_available(module_name):
  function mlx_backend_available (line 13) | def mlx_backend_available(warn_on_missing = False):
  function voxtral_hf_backend_available (line 32) | def voxtral_hf_backend_available():
  function faster_backend_available (line 38) | def faster_backend_available(warn_on_missing = False):

FILE: whisperlivekit/basic_server.py
  function lifespan (line 22) | async def lifespan(app: FastAPI):
  function get (line 37) | async def get():
  function health (line 42) | async def health():
  function handle_websocket_results (line 53) | async def handle_websocket_results(websocket, results_generator, diff_tr...
  function websocket_endpoint (line 71) | async def websocket_endpoint(websocket: WebSocket):
  function deepgram_websocket_endpoint (line 134) | async def deepgram_websocket_endpoint(websocket: WebSocket):
  function _convert_to_pcm (line 145) | async def _convert_to_pcm(audio_bytes: bytes) -> bytes:
  function _parse_time_str (line 164) | def _parse_time_str(time_str: str) -> float:
  function _format_openai_response (line 174) | def _format_openai_response(front_data, response_format: str, language: ...
  function _srt_timestamp (line 239) | def _srt_timestamp(seconds: float, fmt: str) -> str:
  function create_transcription (line 250) | async def create_transcription(
  function list_models (line 321) | async def list_models():
  function main (line 336) | def main():

FILE: whisperlivekit/benchmark/compat.py
  function backend_supports_language (line 30) | def backend_supports_language(backend: str, language: str) -> bool:
  function detect_available_backends (line 38) | def detect_available_backends() -> List[str]:
  function resolve_backend (line 85) | def resolve_backend(backend: str) -> str:

FILE: whisperlivekit/benchmark/datasets.py
  class BenchmarkSample (line 33) | class BenchmarkSample:
    method to_dict (line 47) | def to_dict(self) -> Dict:
  function _save_wav (line 218) | def _save_wav(path: Path, audio: np.ndarray, sample_rate: int = 16000) -...
  function _decode_audio (line 234) | def _decode_audio(audio_bytes: bytes) -> tuple:
  function _ensure_datasets (line 241) | def _ensure_datasets():
  function _download_librispeech (line 255) | def _download_librispeech(config: str, n_samples: int, skip: int,
  function _download_mls (line 299) | def _download_mls(config: str, n_samples: int, skip: int,
  function _download_fleurs (line 342) | def _download_fleurs(config: str, n_samples: int, skip: int,
  function _download_ami (line 385) | def _download_ami(max_duration: float = 60.0) -> List[Dict]:
  function _download_catalog_entry (line 444) | def _download_catalog_entry(name: str, spec: Dict) -> List[Dict]:
  function get_benchmark_samples (line 479) | def get_benchmark_samples(

FILE: whisperlivekit/benchmark/metrics.py
  class SampleResult (line 11) | class SampleResult:
    method to_dict (line 51) | def to_dict(self) -> Dict[str, Any]:
  class BenchmarkReport (line 77) | class BenchmarkReport:
    method n_samples (line 89) | def n_samples(self) -> int:
    method total_audio_s (line 93) | def total_audio_s(self) -> float:
    method total_processing_s (line 97) | def total_processing_s(self) -> float:
    method avg_wer (line 101) | def avg_wer(self) -> float:
    method weighted_wer (line 107) | def weighted_wer(self) -> float:
    method avg_rtf (line 119) | def avg_rtf(self) -> float:
    method overall_rtf (line 125) | def overall_rtf(self) -> float:
    method avg_latency_ms (line 131) | def avg_latency_ms(self) -> float:
    method p95_latency_ms (line 136) | def p95_latency_ms(self) -> float:
    method _group_by (line 142) | def _group_by(self, key: str) -> Dict[str, List[SampleResult]]:
    method wer_by_language (line 149) | def wer_by_language(self) -> Dict[str, float]:
    method rtf_by_language (line 155) | def rtf_by_language(self) -> Dict[str, float]:
    method wer_by_category (line 161) | def wer_by_category(self) -> Dict[str, float]:
    method languages (line 168) | def languages(self) -> List[str]:
    method categories (line 172) | def categories(self) -> List[str]:
    method to_dict (line 175) | def to_dict(self) -> Dict[str, Any]:
  function get_system_info (line 208) | def get_system_info() -> Dict[str, Any]:

FILE: whisperlivekit/benchmark/report.py
  function _wer_color (line 20) | def _wer_color(wer: float) -> str:
  function _rtf_color (line 28) | def _rtf_color(rtf: float) -> str:
  function _lat_color (line 36) | def _lat_color(ms: float) -> str:
  function print_report (line 44) | def print_report(report: BenchmarkReport, out: TextIO = sys.stderr) -> N...
  function print_transcriptions (line 143) | def print_transcriptions(report: BenchmarkReport, out: TextIO = sys.stde...
  function write_json (line 159) | def write_json(report: BenchmarkReport, path: str) -> None:

FILE: whisperlivekit/benchmark/runner.py
  class BenchmarkRunner (line 15) | class BenchmarkRunner:
    method __init__ (line 28) | def __init__(
    method run (line 46) | async def run(self) -> BenchmarkReport:
    method _run_sample (line 105) | async def _run_sample(

FILE: whisperlivekit/cascade_bridge.py
  class CascadeBridge (line 24) | class CascadeBridge:
    method __init__ (line 27) | def __init__(self, output_file: TextIO = None):
    method emit_tokens (line 32) | def emit_tokens(self, tokens: List[ASRToken], is_final: bool = False):
    method get_entries (line 48) | def get_entries(self) -> List[dict]:
    method get_text (line 51) | def get_text(self) -> str:
    method save (line 55) | def save(self, path: str):
  function run_stt_to_jsonl (line 62) | def run_stt_to_jsonl(

FILE: whisperlivekit/cli.py
  function _module_available (line 28) | def _module_available(name: str) -> bool:
  function _gpu_info (line 32) | def _gpu_info() -> str:
  function _check_platform (line 212) | def _check_platform(backend: dict) -> bool:
  function _is_installed (line 222) | def _is_installed(backend: dict) -> bool:
  function _check_ffmpeg (line 226) | def _check_ffmpeg() -> bool:
  function _scan_downloaded_models (line 232) | def _scan_downloaded_models() -> dict:
  function print_banner (line 266) | def print_banner(config, host: str, port: int, ssl: bool = False):
  function _model_is_downloaded (line 306) | def _model_is_downloaded(model_entry: dict, downloaded: dict) -> bool:
  function _best_backend_for_model (line 332) | def _best_backend_for_model(model_entry: dict) -> str:
  function cmd_models (line 357) | def cmd_models():
  function _hf_download (line 438) | def _hf_download(repo_id: str, label: str):
  function _resolve_pull_target (line 447) | def _resolve_pull_target(spec: str):
  function cmd_pull (line 544) | def cmd_pull(spec: str):
  function cmd_transcribe (line 568) | def cmd_transcribe(args: list):
  function _transcribe_files_quiet (line 602) | async def _transcribe_files_quiet(parsed):
  function _transcribe_files (line 618) | async def _transcribe_files(parsed):
  function _format_subtitle (line 679) | def _format_subtitle(result, fmt: str) -> str:
  function _subtitle_timestamp (line 710) | def _subtitle_timestamp(seconds: float, fmt: str) -> str:
  function cmd_bench (line 724) | def cmd_bench(args: list):
  function _suppress_logging (line 777) | def _suppress_logging():
  function _run_bench_new (line 788) | async def _run_bench_new(parsed, languages, categories):
  function cmd_listen (line 828) | def cmd_listen(args: list):
  function _listen_quiet (line 863) | async def _listen_quiet(parsed):
  function _listen_main (line 875) | async def _listen_main(parsed):
  function _resolve_run_spec (line 1005) | def _resolve_run_spec(spec: str):
  function cmd_run (line 1030) | def cmd_run(args: list):
  function cmd_rm (line 1098) | def cmd_rm(spec: str):
  function cmd_check (line 1158) | def cmd_check():
  function cmd_diagnose (line 1192) | def cmd_diagnose(args: list):
  function _probe_backend_state (line 1225) | def _probe_backend_state(processor) -> dict:
  function _probe_pipeline_state (line 1295) | def _probe_pipeline_state(processor) -> dict:
  function _diagnose_main (line 1313) | async def _diagnose_main(parsed):
  function _print_version (line 1582) | def _print_version():
  function _print_help (line 1592) | def _print_help():
  function main (line 1630) | def main():

FILE: whisperlivekit/config.py
  class WhisperLiveKitConfig (line 10) | class WhisperLiveKitConfig:
    method __post_init__ (line 79) | def __post_init__(self):
    method from_namespace (line 94) | def from_namespace(cls, ns) -> "WhisperLiveKitConfig":
    method from_kwargs (line 100) | def from_kwargs(cls, **kwargs) -> "WhisperLiveKitConfig":

FILE: whisperlivekit/core.py
  class TranscriptionEngine (line 13) | class TranscriptionEngine:
    method __new__ (line 18) | def __new__(cls, *args, **kwargs):
    method reset (line 28) | def reset(cls):
    method __init__ (line 38) | def __init__(self, config=None, **kwargs):
    method _do_init (line 56) | def _do_init(self, config=None, **kwargs):
  function online_factory (line 237) | def online_factory(args, asr, language=None):
  function online_diarization_factory (line 282) | def online_diarization_factory(args, diarization_backend):
  function online_translation_factory (line 294) | def online_translation_factory(args, translation_model):

FILE: whisperlivekit/deepgram_compat.py
  function _parse_time_str (line 28) | def _parse_time_str(time_str: str) -> float:
  function _line_to_words (line 38) | def _line_to_words(line: dict) -> list:
  function _lines_to_result (line 74) | def _lines_to_result(lines: list, is_final: bool, speech_final: bool,
  class DeepgramAdapter (line 120) | class DeepgramAdapter:
    method __init__ (line 123) | def __init__(self, websocket: WebSocket):
    method send_metadata (line 132) | async def send_metadata(self, config):
    method process_update (line 152) | async def process_update(self, front_data_dict: dict):
  function handle_deepgram_websocket (line 219) | async def handle_deepgram_websocket(websocket: WebSocket, transcription_...

FILE: whisperlivekit/diarization/diart_backend.py
  class DiarizationObserver (line 21) | class DiarizationObserver(Observer):
    method __init__ (line 24) | def __init__(self):
    method on_next (line 30) | def on_next(self, value: Tuple[Annotation, Any]):
    method get_segments (line 55) | def get_segments(self) -> List[SpeakerSegment]:
    method clear_old_segments (line 60) | def clear_old_segments(self, older_than: float = 30.0):
    method on_error (line 69) | def on_error(self, error):
    method on_completed (line 73) | def on_completed(self):
  class WebSocketAudioSource (line 78) | class WebSocketAudioSource(AudioSource):
    method __init__ (line 82) | def __init__(self, uri: str = "websocket", sample_rate: int = 16000, b...
    method read (line 94) | def read(self):
    method _process_chunks (line 104) | def _process_chunks(self):
    method close (line 150) | def close(self):
    method push_audio (line 155) | def push_audio(self, chunk: np.ndarray):
  class DiartDiarization (line 164) | class DiartDiarization:
    method __init__ (line 165) | def __init__(self, sample_rate: int = 16000, config : SpeakerDiarizati...
    method insert_silence (line 198) | def insert_silence(self, silence_duration):
    method insert_audio_chunk (line 201) | def insert_audio_chunk(self, pcm_array: np.ndarray):
    method diarize (line 206) | async def diarize(self):
    method close (line 210) | def close(self):
  function concatenate_speakers (line 216) | def concatenate_speakers(segments):
  function add_speaker_to_tokens (line 230) | def add_speaker_to_tokens(segments, tokens):
  function visualize_tokens (line 274) | def visualize_tokens(tokens):

FILE: whisperlivekit/diarization/sortformer_backend.py
  class StreamingSortformerState (line 20) | class StreamingSortformerState:
    method __init__ (line 37) | def __init__(self):
  class SortformerDiarization (line 49) | class SortformerDiarization:
    method __init__ (line 50) | def __init__(self, model_name: str = "nvidia/diar_streaming_sortformer...
    method _load_model (line 56) | def _load_model(self, model_name: str):
  class SortformerDiarizationOnline (line 86) | class SortformerDiarizationOnline:
    method __init__ (line 87) | def __init__(self, shared_model, sample_rate: int = 16000):
    method _init_streaming_state (line 136) | def _init_streaming_state(self):
    method insert_silence (line 160) | def insert_silence(self, silence_duration: Optional[float]):
    method insert_audio_chunk (line 171) | def insert_audio_chunk(self, pcm_array: np.ndarray):
    method diarize (line 177) | async def diarize(self):
    method _process_predictions (line 230) | def _process_predictions(self):
    method get_segments (line 266) | def get_segments(self) -> List[SpeakerSegment]:
    method close (line 271) | def close(self):
  function main (line 295) | async def main():

FILE: whisperlivekit/diarization/utils.py
  function extract_number (line 4) | def extract_number(s: str) -> int:

FILE: whisperlivekit/diff_protocol.py
  class DiffTracker (line 32) | class DiffTracker:
    method to_message (line 39) | def to_message(self, front_data: FrontData) -> Dict[str, Any]:
    method reset (line 101) | def reset(self) -> None:

FILE: whisperlivekit/ffmpeg_manager.py
  class FFmpegState (line 32) | class FFmpegState(Enum):
  class FFmpegManager (line 39) | class FFmpegManager:
    method __init__ (line 40) | def __init__(self, sample_rate: int = 16000, channels: int = 1):
    method start (line 52) | async def start(self) -> bool:
    method stop (line 103) | async def stop(self):
    method write_data (line 123) | async def write_data(self, data: bytes) -> bool:
    method read_data (line 139) | async def read_data(self, size: int) -> Optional[bytes]:
    method get_state (line 160) | async def get_state(self) -> FFmpegState:
    method restart (line 164) | async def restart(self) -> bool:
    method _drain_stderr (line 185) | async def _drain_stderr(self):

FILE: whisperlivekit/local_agreement/backends.py
  class ASRBase (line 15) | class ASRBase:
    method __init__ (line 19) | def __init__(self, lan, model_size=None, cache_dir=None, model_dir=Non...
    method load_model (line 29) | def load_model(self, model_size, cache_dir, model_dir):
    method transcribe (line 32) | def transcribe(self, audio, init_prompt=""):
    method use_vad (line 35) | def use_vad(self):
  class WhisperASR (line 39) | class WhisperASR(ASRBase):
    method load_model (line 43) | def load_model(self, model_size=None, cache_dir=None, model_dir=None):
    method transcribe (line 62) | def transcribe(self, audio, init_prompt=""):
    method ts_words (line 79) | def ts_words(self, r) -> List[ASRToken]:
    method segments_end_ts (line 95) | def segments_end_ts(self, res) -> List[float]:
    method use_vad (line 98) | def use_vad(self):
  class FasterWhisperASR (line 101) | class FasterWhisperASR(ASRBase):
    method load_model (line 105) | def load_model(self, model_size=None, cache_dir=None, model_dir=None):
    method transcribe (line 129) | def transcribe(self, audio: np.ndarray, init_prompt: str = "") -> list:
    method ts_words (line 141) | def ts_words(self, segments) -> List[ASRToken]:
    method segments_end_ts (line 151) | def segments_end_ts(self, segments) -> List[float]:
    method use_vad (line 154) | def use_vad(self):
  class MLXWhisper (line 157) | class MLXWhisper(ASRBase):
    method load_model (line 163) | def load_model(self, model_size=None, cache_dir=None, model_dir=None):
    method translate_model_name (line 182) | def translate_model_name(self, model_name):
    method transcribe (line 190) | def transcribe(self, audio, init_prompt=""):
    method ts_words (line 203) | def ts_words(self, segments) -> List[ASRToken]:
    method segments_end_ts (line 213) | def segments_end_ts(self, res) -> List[float]:
    method use_vad (line 216) | def use_vad(self):
  class OpenaiApiASR (line 220) | class OpenaiApiASR(ASRBase):
    method __init__ (line 222) | def __init__(self, lan=None, temperature=0, logfile=sys.stderr):
    method load_model (line 233) | def load_model(self, *args, **kwargs):
    method ts_words (line 238) | def ts_words(self, segments) -> List[ASRToken]:
    method segments_end_ts (line 257) | def segments_end_ts(self, res) -> List[float]:
    method transcribe (line 260) | def transcribe(self, audio_data, prompt=None, *args, **kwargs):
    method use_vad (line 283) | def use_vad(self):

FILE: whisperlivekit/local_agreement/online_asr.py
  class HypothesisBuffer (line 11) | class HypothesisBuffer:
    method __init__ (line 20) | def __init__(self, logfile=sys.stderr, confidence_validation=False):
    method insert (line 29) | def insert(self, new_tokens: List[ASRToken], offset: float):
    method flush (line 59) | def flush(self) -> List[ASRToken]:
    method pop_committed (line 88) | def pop_committed(self, time: float):
  class OnlineASRProcessor (line 97) | class OnlineASRProcessor:
    method __init__ (line 108) | def __init__(
    method new_speaker (line 139) | def new_speaker(self, change_speaker):
    method init (line 144) | def init(self, offset: Optional[float] = None):
    method get_audio_buffer_end_time (line 153) | def get_audio_buffer_end_time(self) -> float:
    method insert_audio_chunk (line 157) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:...
    method start_silence (line 161) | def start_silence(self):
    method end_silence (line 166) | def end_silence(self, silence_duration: Optional[float], offset: float):
    method insert_silence (line 181) | def insert_silence(self, silence_duration, offset):
    method prompt (line 187) | def prompt(self) -> Tuple[str, str]:
    method get_buffer (line 211) | def get_buffer(self):
    method process_iter (line 218) | def process_iter(self) -> Tuple[List[ASRToken], float]:
    method chunk_completed_sentence (line 267) | def chunk_completed_sentence(self):
    method chunk_completed_segment (line 300) | def chunk_completed_segment(self, res):
    method chunk_at (line 338) | def chunk_at(self, time: float):
    method words_to_sentences (line 354) | def words_to_sentences(self, tokens: List[ASRToken]) -> List[Sentence]:
    method finish (line 399) | def finish(self) -> Tuple[List[ASRToken], float]:
    method concatenate_tokens (line 410) | def concatenate_tokens(

FILE: whisperlivekit/local_agreement/whisper_online.py
  function create_tokenizer (line 20) | def create_tokenizer(lan):
  function backend_factory (line 67) | def backend_factory(
  function _normalize_backend_choice (line 163) | def _normalize_backend_choice(

FILE: whisperlivekit/metrics.py
  function normalize_text (line 12) | def normalize_text(text: str) -> str:
  function compute_wer (line 24) | def compute_wer(reference: str, hypothesis: str) -> Dict:
  function compute_timestamp_accuracy (line 85) | def compute_timestamp_accuracy(

FILE: whisperlivekit/metrics_collector.py
  class SessionMetrics (line 16) | class SessionMetrics:
    method rtf (line 39) | def rtf(self) -> float:
    method avg_latency_ms (line 46) | def avg_latency_ms(self) -> float:
    method p95_latency_ms (line 53) | def p95_latency_ms(self) -> float:
    method to_dict (line 62) | def to_dict(self) -> Dict:
    method log_summary (line 79) | def log_summary(self) -> None:

FILE: whisperlivekit/model_paths.py
  class ModelInfo (line 9) | class ModelInfo:
    method has_pytorch (line 17) | def has_pytorch(self) -> bool:
    method is_sharded (line 21) | def is_sharded(self) -> bool:
    method primary_pytorch_file (line 25) | def primary_pytorch_file(self) -> Optional[Path]:
  function _is_ct2_model_bin (line 40) | def _is_ct2_model_bin(directory: Path, filename: str) -> bool:
  function _collect_pytorch_files (line 68) | def _collect_pytorch_files(directory: Path) -> List[Path]:
  function detect_model_format (line 135) | def detect_model_format(model_path: Union[str, Path]) -> ModelInfo:
  function model_path_and_type (line 180) | def model_path_and_type(model_path: Union[str, Path]) -> Tuple[Optional[...
  function resolve_model_path (line 195) | def resolve_model_path(model_path: Union[str, Path]) -> Path:

FILE: whisperlivekit/parse_args.py
  function parse_args (line 5) | def parse_args():

FILE: whisperlivekit/qwen3_asr.py
  function _patch_transformers_compat (line 14) | def _patch_transformers_compat():
  class Qwen3ASR (line 126) | class Qwen3ASR(ASRBase):
    method __init__ (line 132) | def __init__(self, lan="auto", model_size=None, cache_dir=None,
    method load_model (line 139) | def load_model(self, model_size=None, cache_dir=None, model_dir=None):
    method _qwen3_language (line 168) | def _qwen3_language(self) -> Optional[str]:
    method transcribe (line 173) | def transcribe(self, audio: np.ndarray, init_prompt: str = ""):
    method _detected_language (line 200) | def _detected_language(result) -> Optional[str]:
    method ts_words (line 211) | def ts_words(self, result) -> List[ASRToken]:
    method segments_end_ts (line 245) | def segments_end_ts(self, result) -> List[float]:
    method use_vad (line 259) | def use_vad(self):

FILE: whisperlivekit/qwen3_mlx_asr.py
  class Qwen3MLXASR (line 60) | class Qwen3MLXASR:
    method __init__ (line 67) | def __init__(self, logfile=sys.stderr, **kwargs):
    method transcribe (line 96) | def transcribe(self, audio):
  class Qwen3MLXOnlineProcessor (line 105) | class Qwen3MLXOnlineProcessor:
    method __init__ (line 123) | def __init__(self, asr: Qwen3MLXASR, logfile=sys.stderr):
    method insert_audio_chunk (line 155) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:...
    method _transcribe_buffer (line 162) | def _transcribe_buffer(self) -> List[ASRToken]:
    method _local_agreement (line 209) | def _local_agreement(self, new_tokens: List[ASRToken]) -> List[ASRToken]:
    method _trim_buffer_if_needed (line 260) | def _trim_buffer_if_needed(self):
    method process_iter (line 292) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]:
    method get_buffer (line 324) | def get_buffer(self) -> Transcript:
    method _flush_all (line 335) | def _flush_all(self) -> List[ASRToken]:
    method _reset_for_new_utterance (line 355) | def _reset_for_new_utterance(self):
    method start_silence (line 368) | def start_silence(self) -> Tuple[List[ASRToken], float]:
    method end_silence (line 379) | def end_silence(self, silence_duration: float, offset: float):
    method new_speaker (line 383) | def new_speaker(self, change_speaker):
    method warmup (line 386) | def warmup(self, audio, init_prompt=""):
    method finish (line 389) | def finish(self) -> Tuple[List[ASRToken], float]:

FILE: whisperlivekit/qwen3_mlx_simul.py
  class Qwen3MLXSimulConfig (line 67) | class Qwen3MLXSimulConfig:
  class _SessionState (line 84) | class _SessionState:
  class Qwen3MLXSimulStreamingASR (line 104) | class Qwen3MLXSimulStreamingASR:
    method __init__ (line 111) | def __init__(
    method _load_alignment_heads (line 187) | def _load_alignment_heads(
    method _warmup (line 216) | def _warmup(self, audio: np.ndarray):
    method transcribe (line 236) | def transcribe(self, audio):
  class _AttnCaptureWrapper (line 245) | class _AttnCaptureWrapper:
    method __init__ (line 259) | def __init__(self, original, layer_idx, head_indices, gqa_ratio,
    method __call__ (line 270) | def __call__(self, x, cos, sin, mask=None, cache=None, layer_idx=0):
    method __getattr__ (line 305) | def __getattr__(self, name):
  function _install_alignment_hooks (line 309) | def _install_alignment_hooks(model, heads_by_layer, gqa_ratio, audio_sta...
  function _remove_alignment_hooks (line 329) | def _remove_alignment_hooks(model, originals):
  class Qwen3MLXSimulStreamingOnlineProcessor (line 340) | class Qwen3MLXSimulStreamingOnlineProcessor:
    method __init__ (line 351) | def __init__(self, asr: Qwen3MLXSimulStreamingASR, logfile=sys.stderr):
    method speaker (line 361) | def speaker(self):
    method speaker (line 365) | def speaker(self, value):
    method global_time_offset (line 369) | def global_time_offset(self):
    method global_time_offset (line 373) | def global_time_offset(self, value):
    method insert_audio_chunk (line 378) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:...
    method process_iter (line 392) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]:
    method _infer (line 416) | def _infer(self, is_last: bool) -> List[ASRToken]:
    method _build_timestamped_words (line 625) | def _build_timestamped_words(
    method start_silence (line 697) | def start_silence(self) -> Tuple[List[ASRToken], float]:
    method end_silence (line 706) | def end_silence(self, silence_duration: float, offset: float):
    method new_speaker (line 720) | def new_speaker(self, change_speaker):
    method get_buffer (line 726) | def get_buffer(self) -> Transcript:
    method warmup (line 729) | def warmup(self, audio: np.ndarray, init_prompt: str = ""):
    method finish (line 739) | def finish(self) -> Tuple[List[ASRToken], float]:

FILE: whisperlivekit/qwen3_simul.py
  class Qwen3SimulConfig (line 52) | class Qwen3SimulConfig:
  class _AudioEmbedCache (line 70) | class _AudioEmbedCache:
    method trim_front (line 106) | def trim_front(self, trim_samples: int, sample_rate: int = 16000):
    method reset (line 119) | def reset(self):
  class Qwen3SimulState (line 128) | class Qwen3SimulState:
  class Qwen3SimulStreamingASR (line 154) | class Qwen3SimulStreamingASR:
    method __init__ (line 164) | def __init__(
    method _load_model (line 204) | def _load_model(self, model_size, model_dir, model_cache_dir, model_pa...
    method _load_alignment_heads (line 266) | def _load_alignment_heads(
    method _warmup (line 303) | def _warmup(self, audio: np.ndarray):
    method transcribe (line 330) | def transcribe(self, audio):
  class Qwen3SimulStreamingOnlineProcessor (line 335) | class Qwen3SimulStreamingOnlineProcessor:
    method __init__ (line 351) | def __init__(self, asr: Qwen3SimulStreamingASR, logfile=sys.stderr):
    method _build_prompt_template (line 363) | def _build_prompt_template(self):
    method speaker (line 382) | def speaker(self):
    method speaker (line 386) | def speaker(self, value):
    method global_time_offset (line 390) | def global_time_offset(self):
    method global_time_offset (line 394) | def global_time_offset(self, value):
    method insert_audio_chunk (line 397) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:...
    method start_silence (line 413) | def start_silence(self) -> Tuple[List[ASRToken], float]:
    method end_silence (line 427) | def end_silence(self, silence_duration: float, offset: float):
    method new_speaker (line 443) | def new_speaker(self, change_speaker: ChangeSpeaker):
    method get_buffer (line 450) | def get_buffer(self) -> Transcript:
    method _encode_audio_cached (line 454) | def _encode_audio_cached(self) -> Optional[torch.Tensor]:
    method _build_inputs_with_cached_audio (line 604) | def _build_inputs_with_cached_audio(
    method process_iter (line 697) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]:
    method _infer (line 737) | def _infer(self, is_last: bool) -> List[ASRToken]:
    method _build_timestamped_words (line 1085) | def _build_timestamped_words(
    method _median_frame (line 1164) | def _median_frame(frames: List[int]) -> Optional[int]:
    method warmup (line 1171) | def warmup(self, audio: np.ndarray, init_prompt: str = ""):
    method finish (line 1182) | def finish(self) -> Tuple[List[ASRToken], float]:

FILE: whisperlivekit/qwen3_simul_kv.py
  class Qwen3SimulKVConfig (line 36) | class Qwen3SimulKVConfig:
  class _AudioEmbedCache (line 52) | class _AudioEmbedCache:
    method reset (line 59) | def reset(self):
  class Qwen3SimulKVState (line 67) | class Qwen3SimulKVState:
    method reset_kv (line 98) | def reset_kv(self):
  class Qwen3SimulKVASR (line 110) | class Qwen3SimulKVASR:
    method __init__ (line 117) | def __init__(
    method _load_model (line 156) | def _load_model(self, model_size, model_dir, model_cache_dir, model_pa...
    method _load_alignment_heads (line 208) | def _load_alignment_heads(self, path):
    method _warmup (line 225) | def _warmup(self, audio):
    method transcribe (line 238) | def transcribe(self, audio):
  class Qwen3SimulKVOnlineProcessor (line 242) | class Qwen3SimulKVOnlineProcessor:
    method __init__ (line 254) | def __init__(self, asr: Qwen3SimulKVASR, logfile=sys.stderr):
    method _build_prompt_template (line 262) | def _build_prompt_template(self):
    method speaker (line 277) | def speaker(self):
    method speaker (line 281) | def speaker(self, value):
    method global_time_offset (line 285) | def global_time_offset(self):
    method global_time_offset (line 289) | def global_time_offset(self, value):
    method insert_audio_chunk (line 292) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:...
    method start_silence (line 305) | def start_silence(self) -> Tuple[List[ASRToken], float]:
    method end_silence (line 314) | def end_silence(self, silence_duration: float, offset: float):
    method new_speaker (line 327) | def new_speaker(self, change_speaker: ChangeSpeaker):
    method get_buffer (line 333) | def get_buffer(self) -> Transcript:
    method _encode_audio (line 336) | def _encode_audio(self) -> Tuple[torch.Tensor, int]:
    method _build_full_inputs (line 415) | def _build_full_inputs(self, audio_embeds: torch.Tensor) -> dict:
    method process_iter (line 475) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]:
    method _infer (line 500) | def _infer(self, is_last: bool) -> List[ASRToken]:
    method _build_timestamped_words (line 710) | def _build_timestamped_words(
    method warmup (line 775) | def warmup(self, audio: np.ndarray, init_prompt: str = ""):
    method finish (line 784) | def finish(self) -> Tuple[List[ASRToken], float]:

FILE: whisperlivekit/session_asr_proxy.py
  class SessionASRProxy (line 10) | class SessionASRProxy:
    method __init__ (line 22) | def __init__(self, asr, language: str):
    method __getattr__ (line 30) | def __getattr__(self, name):
    method transcribe (line 33) | def transcribe(self, audio, init_prompt=""):

FILE: whisperlivekit/silero_vad_iterator.py
  function is_onnx_available (line 11) | def is_onnx_available() -> bool:
  function init_jit_model (line 20) | def init_jit_model(model_path: str, device=torch.device('cpu')):
  class OnnxSession (line 27) | class OnnxSession():
    method __init__ (line 32) | def __init__(self, path, force_onnx_cpu=False):
  class OnnxWrapper (line 52) | class OnnxWrapper():
    method __init__ (line 57) | def __init__(self, session: OnnxSession, force_onnx_cpu=False):
    method session (line 63) | def session(self):
    method _validate_input (line 66) | def _validate_input(self, x, sr: int):
    method reset_states (line 84) | def reset_states(self, batch_size=1):
    method __call__ (line 90) | def __call__(self, x, sr: int):
  function _get_onnx_model_path (line 128) | def _get_onnx_model_path(model_path: str = None, opset_version: int = 16...
  function load_onnx_session (line 156) | def load_onnx_session(model_path: str = None, opset_version: int = 16, f...
  function load_jit_vad (line 164) | def load_jit_vad(model_path: str = None):
  class VADIterator (line 188) | class VADIterator:
    method __init__ (line 195) | def __init__(self,
    method reset_states (line 235) | def reset_states(self):
    method __call__ (line 243) | def __call__(self, x, return_seconds=False, time_resolution: int = 1):
  class FixedVADIterator (line 288) | class FixedVADIterator(VADIterator):
    method reset_states (line 293) | def reset_states(self):
    method __call__ (line 297) | def __call__(self, x, return_seconds=False):

FILE: whisperlivekit/simul_whisper/align_att_base.py
  class AlignAttBase (line 14) | class AlignAttBase(ABC):
    method speaker (line 30) | def speaker(self):
    method speaker (line 34) | def speaker(self, value):
    method global_time_offset (line 38) | def global_time_offset(self):
    method global_time_offset (line 42) | def global_time_offset(self, value):
    method _base_init (line 47) | def _base_init(self, cfg: AlignAttConfig, model):
    method _init_state_common (line 64) | def _init_state_common(self, cfg: AlignAttConfig):
    method warmup (line 75) | def warmup(self, audio):
    method create_tokenizer (line 84) | def create_tokenizer(self, language=None):
    method trim_context (line 93) | def trim_context(self):
    method refresh_segment (line 108) | def refresh_segment(self, complete=False):
    method segments_len (line 124) | def segments_len(self):
    method _apply_minseglen (line 127) | def _apply_minseglen(self):
    method _clean_cache (line 134) | def _clean_cache(self):
    method debug_print_tokens (line 137) | def debug_print_tokens(self, tokens):
    method _detect_language_if_needed (line 143) | def _detect_language_if_needed(self, encoder_feature):
    method infer (line 164) | def infer(self, is_last=False):
    method _split_tokens (line 309) | def _split_tokens(self, tokens_list, fire_detected, is_last):
    method _build_timestamped_words (line 322) | def _build_timestamped_words(self, split_words, split_tokens, l_absolu...
    method _handle_pending_tokens (line 360) | def _handle_pending_tokens(self, split_words, split_tokens):
    method _apply_dry_penalty (line 394) | def _apply_dry_penalty(self, logits, current_tokens):
    method _init_state (line 444) | def _init_state(self, cfg: AlignAttConfig):
    method init_tokens (line 449) | def init_tokens(self):
    method init_context (line 454) | def init_context(self):
    method insert_audio (line 459) | def insert_audio(self, segment=None):
    method _current_tokens (line 464) | def _current_tokens(self):
    method fire_at_boundary (line 469) | def fire_at_boundary(self, feature):
    method lang_id (line 474) | def lang_id(self, encoder_features):
    method _concat_segments (line 479) | def _concat_segments(self):
    method _encode (line 484) | def _encode(self, input_segments):
    method _init_sum_logprobs (line 489) | def _init_sum_logprobs(self):
    method _get_logits_and_cross_attn (line 494) | def _get_logits_and_cross_attn(self, tokens, encoder_feature):
    method _check_no_speech (line 499) | def _check_no_speech(self, logits):
    method _suppress_blank_tokens (line 504) | def _suppress_blank_tokens(self, logits):
    method _apply_token_suppression (line 509) | def _apply_token_suppression(self, logits):
    method _update_tokens (line 514) | def _update_tokens(self, current_tokens, logits, sum_logprobs):
    method _process_cross_attention (line 519) | def _process_cross_attention(self, accumulated_cross_attns, content_me...
    method _get_attended_frames (line 524) | def _get_attended_frames(self, attn):
    method _is_special_token (line 529) | def _is_special_token(self, current_tokens):
    method _rewind_tokens (line 534) | def _rewind_tokens(self):
    method _tokens_to_list (line 539) | def _tokens_to_list(self, current_tokens, start_col):
    method _make_new_tokens_tensor (line 544) | def _make_new_tokens_tensor(self, hypothesis):
    method _evaluate (line 549) | def _evaluate(self, tensor):

FILE: whisperlivekit/simul_whisper/backend.py
  class SimulStreamingOnlineProcessor (line 36) | class SimulStreamingOnlineProcessor:
    method __init__ (line 40) | def __init__(self, asr, logfile=sys.stderr):
    method _create_alignatt (line 51) | def _create_alignatt(self):
    method start_silence (line 63) | def start_silence(self):
    method end_silence (line 67) | def end_silence(self, silence_duration, offset):
    method insert_audio_chunk (line 83) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time):
    method new_speaker (line 92) | def new_speaker(self, change_speaker: ChangeSpeaker):
    method get_buffer (line 99) | def get_buffer(self):
    method process_iter (line 103) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]:
    method warmup (line 125) | def warmup(self, audio, init_prompt=""):
    method __del__ (line 139) | def __del__(self):
  class SimulStreamingASR (line 148) | class SimulStreamingASR:
    method __init__ (line 152) | def __init__(self, logfile=sys.stderr, **kwargs):
    method _warmup_mlx_model (line 272) | def _warmup_mlx_model(self):
    method _resolve_encoder_backend (line 284) | def _resolve_encoder_backend(self, preferred_backend, compatible_whisp...
    method _has_custom_model_path (line 307) | def _has_custom_model_path(self):
    method _can_use_mlx (line 310) | def _can_use_mlx(self, compatible_whisper_mlx):
    method _can_use_faster (line 317) | def _can_use_faster(self, compatible_faster_whisper):
    method load_model (line 324) | def load_model(self):
    method set_translate_task (line 349) | def set_translate_task(self):
    method transcribe (line 360) | def transcribe(self, audio):

FILE: whisperlivekit/simul_whisper/beam.py
  class BeamPyTorchInference (line 6) | class BeamPyTorchInference(PyTorchInference):
    method _kv_cache_ids (line 9) | def _kv_cache_ids(self):
    method rearrange_kv_cache (line 15) | def rearrange_kv_cache(self, source_indices):
    method logits (line 21) | def logits(

FILE: whisperlivekit/simul_whisper/config.py
  class AlignAttConfig (line 6) | class AlignAttConfig():

FILE: whisperlivekit/simul_whisper/decoder_state.py
  class DecoderState (line 8) | class DecoderState:
    method clean_cache (line 50) | def clean_cache(self):
    method reset (line 73) | def reset(self, rewind_threshold: int = 200):
    method full_reset (line 86) | def full_reset(self, rewind_threshold: int = 200):

FILE: whisperlivekit/simul_whisper/eow_detection.py
  function load_cif (line 5) | def load_cif(cfg, n_audio_state, device):
  function resize (line 25) | def resize(alphas, target_lengths, threshold=0.999):
  function fire_at_boundary (line 50) | def fire_at_boundary(chunked_encoder_feature: torch.Tensor, cif_linear):

FILE: whisperlivekit/simul_whisper/mlx/decoder_state.py
  class MLXDecoderState (line 9) | class MLXDecoderState:
    method clean_cache (line 52) | def clean_cache(self):
    method reset (line 59) | def reset(self, rewind_threshold: int = 200):
    method full_reset (line 66) | def full_reset(self, rewind_threshold: int = 200):

FILE: whisperlivekit/simul_whisper/mlx/decoders.py
  class MLXGreedyDecoder (line 10) | class MLXGreedyDecoder:
    method __init__ (line 13) | def __init__(self, temperature: float, eot: int):
    method update (line 17) | def update(
    method finalize (line 50) | def finalize(self, tokens: mx.array, sum_logprobs: mx.array):
  class MLXBeamSearchDecoder (line 57) | class MLXBeamSearchDecoder:
    method __init__ (line 60) | def __init__(
    method reset (line 78) | def reset(self):
    method update (line 82) | def update(
    method finalize (line 156) | def finalize(self, preceding_tokens: mx.array, sum_logprobs: mx.array):
  class MLXInference (line 182) | class MLXInference:
    method __init__ (line 185) | def __init__(self, model, initial_token_length: int):
    method rearrange_kv_cache (line 190) | def rearrange_kv_cache(self, source_indices: List[int]):
    method logits (line 209) | def logits(

FILE: whisperlivekit/simul_whisper/mlx/simul_whisper.py
  class MLXTokenBuffer (line 20) | class MLXTokenBuffer:
    method __init__ (line 23) | def __init__(self, text="", tokenizer=None, prefix_token_ids=None):
    method as_token_ids (line 29) | def as_token_ids(self, tokenizer=None):
    method as_mlx_array (line 36) | def as_mlx_array(self) -> mx.array:
    method as_mlx_array_beam (line 40) | def as_mlx_array_beam(self, beam: int) -> mx.array:
    method as_text (line 44) | def as_text(self):
    method empty (line 48) | def empty(*a, **kw):
    method from_text (line 52) | def from_text(text, *a, **kw):
    method is_empty (line 55) | def is_empty(self):
    method trim_words (line 58) | def trim_words(self, num=1, after=0):
    method append_token_ids (line 68) | def append_token_ids(self, token_ids):
  function mlx_median_filter (line 89) | def mlx_median_filter(x: mx.array, filter_width: int) -> mx.array:
  class MLXAlignAtt (line 107) | class MLXAlignAtt(AlignAttBase):
    method __init__ (line 114) | def __init__(
    method _init_state (line 127) | def _init_state(self, cfg: AlignAttConfig):
    method _build_alignment_source (line 178) | def _build_alignment_source(self):
    method init_tokens (line 200) | def init_tokens(self):
    method init_context (line 211) | def init_context(self):
    method insert_audio (line 222) | def insert_audio(self, segment=None):
    method _current_tokens (line 245) | def _current_tokens(self) -> mx.array:
    method fire_at_boundary (line 260) | def fire_at_boundary(self, chunked_encoder_feature: mx.array) -> bool:
    method lang_id (line 267) | def lang_id(self, encoder_features: mx.array) -> Tuple[mx.array, List[...
    method _concat_segments (line 296) | def _concat_segments(self):
    method _encode (line 301) | def _encode(self, input_segments):
    method _init_sum_logprobs (line 312) | def _init_sum_logprobs(self):
    method _get_logits_and_cross_attn (line 315) | def _get_logits_and_cross_attn(self, tokens, encoder_feature):
    method _check_no_speech (line 324) | def _check_no_speech(self, logits):
    method _suppress_blank_tokens (line 335) | def _suppress_blank_tokens(self, logits):
    method _apply_token_suppression (line 340) | def _apply_token_suppression(self, logits):
    method _update_tokens (line 348) | def _update_tokens(self, current_tokens, logits, sum_logprobs):
    method _process_cross_attention (line 351) | def _process_cross_attention(
    method _get_attended_frames (line 398) | def _get_attended_frames(self, attn):
    method _is_special_token (line 403) | def _is_special_token(self, current_tokens):
    method _rewind_tokens (line 406) | def _rewind_tokens(self):
    method _tokens_to_list (line 411) | def _tokens_to_list(self, current_tokens, start_col):
    method _make_new_tokens_tensor (line 414) | def _make_new_tokens_tensor(self, hypothesis):
    method _evaluate (line 418) | def _evaluate(self, tensor):

FILE: whisperlivekit/simul_whisper/mlx_encoder.py
  function load_mlx_encoder (line 14) | def load_mlx_encoder(
  function load_mlx_model (line 62) | def load_mlx_model(

FILE: whisperlivekit/simul_whisper/simul_whisper.py
  function load_coreml_encoder (line 34) | def load_coreml_encoder():
  class AlignAtt (line 51) | class AlignAtt(AlignAttBase):
    method __init__ (line 59) | def __init__(
    method _init_state (line 86) | def _init_state(self, cfg: AlignAttConfig):
    method init_tokens (line 139) | def init_tokens(self):
    method init_context (line 150) | def init_context(self):
    method insert_audio (line 162) | def insert_audio(self, segment=None):
    method _current_tokens (line 182) | def _current_tokens(self):
    method fire_at_boundary (line 199) | def fire_at_boundary(self, chunked_encoder_feature: torch.Tensor):
    method lang_id (line 207) | def lang_id(self, encoder_features):
    method _concat_segments (line 234) | def _concat_segments(self):
    method _encode (line 239) | def _encode(self, input_segments):
    method _init_sum_logprobs (line 305) | def _init_sum_logprobs(self):
    method _get_logits_and_cross_attn (line 308) | def _get_logits_and_cross_attn(self, tokens, encoder_feature):
    method _check_no_speech (line 321) | def _check_no_speech(self, logits):
    method _suppress_blank_tokens (line 330) | def _suppress_blank_tokens(self, logits):
    method _apply_token_suppression (line 334) | def _apply_token_suppression(self, logits):
    method _update_tokens (line 338) | def _update_tokens(self, current_tokens, logits, sum_logprobs):
    method _process_cross_attention (line 341) | def _process_cross_attention(
    method _get_attended_frames (line 386) | def _get_attended_frames(self, attn):
    method _is_special_token (line 390) | def _is_special_token(self, current_tokens):
    method _rewind_tokens (line 393) | def _rewind_tokens(self):
    method _tokens_to_list (line 398) | def _tokens_to_list(self, current_tokens, start_col):
    method _make_new_tokens_tensor (line 401) | def _make_new_tokens_tensor(self, hypothesis):
    method _evaluate (line 408) | def _evaluate(self, tensor):
    method infer (line 412) | def infer(self, is_last=False):

FILE: whisperlivekit/simul_whisper/token_buffer.py
  class TokenBuffer (line 5) | class TokenBuffer:
    method __init__ (line 7) | def __init__(self, text="", tokenizer=None, device=None, prefix_token_...
    method as_token_ids (line 14) | def as_token_ids(self, tokenizer=None):
    method as_tensor (line 22) | def as_tensor(self, device=None):
    method as_tensor_beam (line 31) | def as_tensor_beam(self, beam, device=None):
    method as_text (line 36) | def as_text(self):
    method empty (line 40) | def empty(*a, **kw):
    method from_text (line 44) | def from_text(text, *a, **kw):
    method is_empty (line 47) | def is_empty(self):
    method trim_words (line 50) | def trim_words(self, num=1, after=0):
    method append_token_ids (line 67) | def append_token_ids(self, token_ids):
    method as_split_word_tokens (line 91) | def as_split_word_tokens(self):

FILE: whisperlivekit/test_client.py
  class TranscriptionResult (line 39) | class TranscriptionResult:
    method text (line 46) | def text(self) -> str:
    method committed_text (line 61) | def committed_text(self) -> str:
    method lines (line 72) | def lines(self) -> List[dict]:
    method n_updates (line 80) | def n_updates(self) -> int:
  function reconstruct_state (line 88) | def reconstruct_state(msg: dict, lines: List[dict]) -> dict:
  function load_audio_pcm (line 117) | def load_audio_pcm(audio_path: str, sample_rate: int = SAMPLE_RATE) -> b...
  function transcribe_audio (line 137) | async def transcribe_audio(
  function _print_result (line 268) | def _print_result(result: TranscriptionResult, output_json: bool = False...
  function main (line 302) | def main():

FILE: whisperlivekit/test_data.py
  class TestSample (line 46) | class TestSample:
    method has_timestamps (line 61) | def has_timestamps(self) -> bool:
  function _save_wav (line 65) | def _save_wav(path: Path, audio: np.ndarray, sample_rate: int = 16000) -...
  function _load_metadata (line 85) | def _load_metadata() -> Dict:
  function _save_metadata (line 93) | def _save_metadata(meta: Dict) -> None:
  function _ensure_datasets (line 98) | def _ensure_datasets():
  function _decode_audio (line 110) | def _decode_audio(audio_bytes: bytes) -> tuple:
  function _download_librispeech_samples (line 127) | def _download_librispeech_samples(n_samples: int = 3) -> List[Dict]:
  function _download_ami_sample (line 181) | def _download_ami_sample() -> List[Dict]:
  function download_test_samples (line 271) | def download_test_samples(force: bool = False) -> List[TestSample]:
  function get_samples (line 323) | def get_samples() -> List[TestSample]:
  function get_sample (line 328) | def get_sample(name: str) -> TestSample:
  function list_sample_names (line 345) | def list_sample_names() -> List[str]:
  function _meta_to_samples (line 350) | def _meta_to_samples(meta_list: List[Dict]) -> List[TestSample]:

FILE: whisperlivekit/test_harness.py
  function _parse_time (line 63) | def _parse_time(time_str: str) -> float:
  function load_audio_pcm (line 73) | def load_audio_pcm(audio_path: str, sample_rate: int = SAMPLE_RATE) -> b...
  class TestState (line 95) | class TestState:
    method from_front_data (line 115) | def from_front_data(cls, front_data: FrontData, audio_position: float ...
    method text (line 132) | def text(self) -> str:
    method committed_text (line 140) | def committed_text(self) -> str:
    method committed_word_count (line 145) | def committed_word_count(self) -> int:
    method buffer_word_count (line 151) | def buffer_word_count(self) -> int:
    method speakers (line 158) | def speakers(self) -> Set[int]:
    method n_speakers (line 163) | def n_speakers(self) -> int:
    method speaker_at (line 166) | def speaker_at(self, time_s: float) -> Optional[int]:
    method speakers_in (line 171) | def speakers_in(self, start_s: float, end_s: float) -> Set[int]:
    method speaker_timeline (line 180) | def speaker_timeline(self) -> List[Dict[str, Any]]:
    method n_speaker_changes (line 192) | def n_speaker_changes(self) -> int:
    method has_silence (line 203) | def has_silence(self) -> bool:
    method silence_segments (line 208) | def silence_segments(self) -> List[Dict[str, Any]]:
    method silence_at (line 212) | def silence_at(self, time_s: float) -> bool:
    method speech_lines (line 220) | def speech_lines(self) -> List[Dict[str, Any]]:
    method line_at (line 224) | def line_at(self, time_s: float) -> Optional[Dict[str, Any]]:
    method text_at (line 233) | def text_at(self, time_s: float) -> Optional[str]:
    method lines_between (line 238) | def lines_between(self, start_s: float, end_s: float) -> List[Dict[str...
    method text_between (line 248) | def text_between(self, start_s: float, end_s: float) -> str:
    method wer (line 257) | def wer(self, reference: str) -> float:
    method wer_detailed (line 267) | def wer_detailed(self, reference: str) -> Dict:
    method timestamps (line 275) | def timestamps(self) -> List[Dict[str, Any]]:
    method timing_valid (line 288) | def timing_valid(self) -> bool:
    method timing_monotonic (line 298) | def timing_monotonic(self) -> bool:
    method timing_errors (line 306) | def timing_errors(self) -> List[str]:
  class AudioPlayer (line 332) | class AudioPlayer:
    method __init__ (line 349) | def __init__(self, harness: "TestHarness", pcm_data: bytes, sample_rat...
    method position (line 357) | def position(self) -> float:
    method duration (line 362) | def duration(self) -> float:
    method remaining (line 367) | def remaining(self) -> float:
    method done (line 372) | def done(self) -> bool:
    method play (line 376) | async def play(
    method play_until (line 404) | async def play_until(
    method seek (line 421) | def seek(self, time_s: float) -> None:
    method reset (line 427) | def reset(self) -> None:
  class TestHarness (line 436) | class TestHarness:
    method __init__ (line 462) | def __init__(self, **kwargs: Any):
    method __aenter__ (line 473) | async def __aenter__(self) -> "TestHarness":
    method __aexit__ (line 493) | async def __aexit__(self, *exc: Any) -> None:
    method _collect_results (line 503) | async def _collect_results(self) -> None:
    method state (line 519) | def state(self) -> TestState:
    method history (line 524) | def history(self) -> List[TestState]:
    method audio_position (line 529) | def audio_position(self) -> float:
    method metrics (line 534) | def metrics(self):
    method on_update (line 540) | def on_update(self, callback: Callable[[TestState], None]) -> None:
    method load_audio (line 546) | def load_audio(self, source) -> AudioPlayer:
    method feed (line 559) | async def feed(
    method feed_pcm (line 577) | async def feed_pcm(
    method pause (line 603) | async def pause(self, duration_s: float, speed: float = 1.0) -> None:
    method silence (line 617) | async def silence(self, duration_s: float, speed: float = 1.0) -> None:
    method wait_for (line 623) | async def wait_for(
    method wait_for_text (line 646) | async def wait_for_text(self, timeout: float = 30.0) -> TestState:
    method wait_for_lines (line 650) | async def wait_for_lines(self, n: int = 1, timeout: float = 30.0) -> T...
    method wait_for_silence (line 654) | async def wait_for_silence(self, timeout: float = 30.0) -> TestState:
    method wait_for_speakers (line 658) | async def wait_for_speakers(self, n: int = 2, timeout: float = 30.0) -...
    method drain (line 662) | async def drain(self, seconds: float = 2.0) -> None:
    method finish (line 671) | async def finish(self, timeout: float = 30.0) -> TestState:
    method cut (line 687) | async def cut(self, timeout: float = 5.0) -> TestState:
    method snapshot_at (line 707) | def snapshot_at(self, audio_time: float) -> Optional[TestState]:
    method print_state (line 729) | def print_state(self) -> None:

FILE: whisperlivekit/thread_safety.py
  function get_model_lock (line 44) | def get_model_lock():
  function acquire_model_lock (line 49) | def acquire_model_lock(timeout=None):
  function release_model_lock (line 71) | def release_model_lock():
  class ModelLockContext (line 83) | class ModelLockContext:
    method __init__ (line 86) | def __init__(self, timeout=None):
    method __enter__ (line 90) | def __enter__(self):
    method __exit__ (line 94) | def __exit__(self, exc_type, exc_val, exc_tb):
  function print_deployment_recommendations (line 104) | def print_deployment_recommendations():

FILE: whisperlivekit/timed_objects.py
  function format_time (line 6) | def format_time(seconds: float) -> str:
  class Timed (line 18) | class Timed:
  class TimedText (line 23) | class TimedText(Timed):
    method has_punctuation (line 28) | def has_punctuation(self) -> bool:
    method is_within (line 31) | def is_within(self, other: 'TimedText') -> bool:
    method duration (line 34) | def duration(self) -> float:
    method contains_timespan (line 37) | def contains_timespan(self, other: 'TimedText') -> bool:
    method __bool__ (line 40) | def __bool__(self) -> bool:
    method __str__ (line 43) | def __str__(self) -> str:
  class ASRToken (line 47) | class ASRToken(TimedText):
    method with_offset (line 50) | def with_offset(self, offset: float) -> "ASRToken":
    method is_silence (line 54) | def is_silence(self) -> bool:
  class Sentence (line 59) | class Sentence(TimedText):
  class Transcript (line 63) | class Transcript(TimedText):
    method from_tokens (line 69) | def from_tokens(
  class SpeakerSegment (line 88) | class SpeakerSegment(Timed):
  class Translation (line 96) | class Translation(TimedText):
  class Silence (line 100) | class Silence():
    method compute_duration (line 107) | def compute_duration(self) -> Optional[float]:
    method is_silence (line 113) | def is_silence(self) -> bool:
  class Segment (line 118) | class Segment(TimedText):
    method from_tokens (line 128) | def from_tokens(
    method is_silence (line 155) | def is_silence(self) -> bool:
    method to_dict (line 159) | def to_dict(self) -> Dict[str, Any]:
  class PuncSegment (line 175) | class PuncSegment(Segment):
  class SilentSegment (line 178) | class SilentSegment(Segment):
    method __init__ (line 179) | def __init__(self, *args: Any, **kwargs: Any) -> None:
  class FrontData (line 186) | class FrontData():
    method to_dict (line 196) | def to_dict(self) -> Dict[str, Any]:
  class ChangeSpeaker (line 212) | class ChangeSpeaker:
  class State (line 217) | class State():

FILE: whisperlivekit/tokens_alignment.py
  class TokensAlignment (line 17) | class TokensAlignment:
    method __init__ (line 19) | def __init__(self, state: Any, args: Any, sep: Optional[str]) -> None:
    method update (line 45) | def update(self) -> None:
    method _prune (line 57) | def _prune(self) -> None:
    method add_translation (line 90) | def add_translation(self, segment: Segment) -> None:
    method compute_punctuations_segments (line 102) | def compute_punctuations_segments(self, tokens: Optional[List[ASRToken...
    method compute_new_punctuations_segments (line 134) | def compute_new_punctuations_segments(self) -> List[PuncSegment]:
    method concatenate_diar_segments (line 163) | def concatenate_diar_segments(self) -> List[SpeakerSegment]:
    method intersection_duration (line 177) | def intersection_duration(seg1: TimedText, seg2: TimedText) -> float:
    method get_lines_diarization (line 184) | def get_lines_diarization(self) -> Tuple[List[Segment], str]:
    method get_lines (line 217) | def get_lines(

FILE: whisperlivekit/vllm_realtime.py
  class VLLMRealtimeASR (line 27) | class VLLMRealtimeASR:
    method __init__ (line 34) | def __init__(self, vllm_url="ws://localhost:8000/v1/realtime",
    method transcribe (line 41) | def transcribe(self, audio):
  class VLLMRealtimeOnlineProcessor (line 45) | class VLLMRealtimeOnlineProcessor:
    method __init__ (line 57) | def __init__(self, asr: VLLMRealtimeASR):
    method _reset_state (line 70) | def _reset_state(self):
    method insert_audio_chunk (line 89) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:...
    method process_iter (line 94) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]:
    method get_buffer (line 101) | def get_buffer(self) -> Transcript:
    method start_silence (line 115) | def start_silence(self) -> Tuple[List[ASRToken], float]:
    method end_silence (line 148) | def end_silence(self, silence_duration: float, offset: float):
    method new_speaker (line 152) | def new_speaker(self, change_speaker):
    method warmup (line 155) | def warmup(self, audio, init_prompt=""):
    method finish (line 158) | def finish(self) -> Tuple[List[ASRToken], float]:
    method _connect (line 181) | def _connect(self):
    method _close_ws (line 206) | def _close_ws(self):
    method _recv_loop (line 219) | def _recv_loop(self):
    method _send_commit (line 259) | def _send_commit(self, final: bool):
    method _send_audio (line 271) | def _send_audio(self, audio: np.ndarray):
    method _send_pending_audio (line 289) | def _send_pending_audio(self):
    method _drain_deltas (line 313) | def _drain_deltas(self):
    method _wait_for_done (line 317) | def _wait_for_done(self, timeout: float = 10.0):
    method _time_for_word (line 328) | def _time_for_word(self, word_idx: int, n_words_total: int) -> Tuple[f...
    method _extract_new_words (line 338) | def _extract_new_words(self) -> List[ASRToken]:
    method _flush_all_pending_words (line 359) | def _flush_all_pending_words(self) -> List[ASRToken]:
    method _process_iter_inner (line 382) | def _process_iter_inner(self, is_last: bool) -> Tuple[List[ASRToken], ...

FILE: whisperlivekit/voxtral_hf_streaming.py
  class VoxtralHFStreamingASR (line 23) | class VoxtralHFStreamingASR:
    method __init__ (line 28) | def __init__(self, logfile=sys.stderr, **kwargs):
    method transcribe (line 63) | def transcribe(self, audio):
  class VoxtralHFStreamingOnlineProcessor (line 67) | class VoxtralHFStreamingOnlineProcessor:
    method __init__ (line 78) | def __init__(self, asr: VoxtralHFStreamingASR, logfile=sys.stderr):
    method _reset_state (line 104) | def _reset_state(self):
    method _get_pending_audio (line 135) | def _get_pending_audio(self) -> np.ndarray:
    method _set_pending_audio (line 145) | def _set_pending_audio(self, arr: np.ndarray):
    method _get_accumulated_text (line 154) | def _get_accumulated_text(self) -> str:
    method insert_audio_chunk (line 166) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:...
    method process_iter (line 172) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]:
    method get_buffer (line 179) | def get_buffer(self) -> Transcript:
    method start_silence (line 197) | def start_silence(self) -> Tuple[List[ASRToken], float]:
    method end_silence (line 239) | def end_silence(self, silence_duration: float, offset: float):
    method new_speaker (line 243) | def new_speaker(self, change_speaker):
    method warmup (line 246) | def warmup(self, audio, init_prompt=""):
    method finish (line 249) | def finish(self) -> Tuple[List[ASRToken], float]:
    method _start_generate_thread (line 280) | def _start_generate_thread(self):
    method _feed_pending_audio (line 356) | def _feed_pending_audio(self):
    method _append_text_fragment (line 371) | def _append_text_fragment(self, text_fragment: str):
    method _drain_streamer (line 378) | def _drain_streamer(self):
    method _drain_streamer_blocking (line 396) | def _drain_streamer_blocking(self, timeout=30.0):
    method _pos_to_time (line 445) | def _pos_to_time(self, token_position: int) -> float:
    method _audio_pos_for_char (line 449) | def _audio_pos_for_char(self, char_idx: int) -> int:
    method _word_timestamps (line 468) | def _word_timestamps(self, text: str, words: List[str], start_idx: int...
    method _extract_new_words (line 483) | def _extract_new_words(self) -> List[ASRToken]:
    method _flush_all_pending_words (line 510) | def _flush_all_pending_words(self) -> List[ASRToken]:
    method _process_iter_inner (line 538) | def _process_iter_inner(self, is_last: bool) -> Tuple[List[ASRToken], ...

FILE: whisperlivekit/voxtral_mlx/loader.py
  function download_weights (line 43) | def download_weights(model_id: str = DEFAULT_MODEL_ID) -> Path:
  function _translate_weight_name (line 113) | def _translate_weight_name(name: str) -> str | None:
  function _is_conv_weight (line 122) | def _is_conv_weight(name: str) -> bool:
  function _remap_converted_name (line 164) | def _remap_converted_name(name: str) -> str:
  function _has_converted_layout (line 180) | def _has_converted_layout(path: Path) -> bool:
  function _load_converted_weights (line 184) | def _load_converted_weights(path: Path):
  function _load_original_weights (line 219) | def _load_original_weights(path: Path):
  function _load_tokenizer (line 253) | def _load_tokenizer(model_dir: Path):
  function load_voxtral_model (line 262) | def load_voxtral_model(path_or_id: str = DEFAULT_MODEL_ID):

FILE: whisperlivekit/voxtral_mlx/model.py
  class SlidingKVCache (line 22) | class SlidingKVCache:
    method __init__ (line 32) | def __init__(self, capacity: int):
    method offset (line 40) | def offset(self) -> int:
    method _reorder (line 45) | def _reorder(self, buf):
    method _drop_oldest (line 56) | def _drop_oldest(self, buf, n_drop, tail=None):
    method _append_concat (line 64) | def _append_concat(self, k, v):
    method _write_inplace (line 79) | def _write_inplace(self, k, v):
    method update_and_fetch (line 121) | def update_and_fetch(self, k, v):
  class CausalConv (line 132) | class CausalConv(nn.Module):
    method __init__ (line 135) | def __init__(self, channels_in: int, channels_out: int, kernel: int, s...
    method __call__ (line 143) | def __call__(self, x: mx.array) -> mx.array:
  class _EncoderSelfAttention (line 149) | class _EncoderSelfAttention(nn.Module):
    method __init__ (line 150) | def __init__(self, dim: int, n_heads: int, head_dim: int, rope_theta: ...
    method __call__ (line 161) | def __call__(self, x, mask, cache=None):
  class _EncoderFFN (line 178) | class _EncoderFFN(nn.Module):
    method __init__ (line 181) | def __init__(self, dim: int, hidden: int):
    method __call__ (line 187) | def __call__(self, x):
  class _EncoderBlock (line 191) | class _EncoderBlock(nn.Module):
    method __init__ (line 192) | def __init__(self, dim, n_heads, head_dim, hidden, rope_theta):
    method __call__ (line 199) | def __call__(self, x, mask, cache=None):
  class StreamingEncoder (line 205) | class StreamingEncoder(nn.Module):
    method __init__ (line 210) | def __init__(
    method _apply_convs (line 233) | def _apply_convs(self, mel: mx.array) -> mx.array:
    method forward (line 239) | def forward(self, mel: mx.array) -> mx.array:
    method forward_conv_incremental (line 247) | def forward_conv_incremental(self, x_in, tail1, tail2):
    method forward_transformer_incremental (line 280) | def forward_transformer_incremental(self, x, cache_list):
  class _DecoderAttention (line 292) | class _DecoderAttention(nn.Module):
    method __init__ (line 295) | def __init__(self, dim, n_heads, n_kv_heads, head_dim, rope_theta):
    method __call__ (line 307) | def __call__(self, x, mask=None, cache=None):
  class _DecoderFFN (line 324) | class _DecoderFFN(nn.Module):
    method __init__ (line 327) | def __init__(self, dim, hidden):
    method __call__ (line 333) | def __call__(self, x):
  class AdaptiveScaling (line 337) | class AdaptiveScaling(nn.Module):
    method __init__ (line 341) | def __init__(self, dim, bottleneck):
    method __call__ (line 346) | def __call__(self, cond):
  class _DecoderBlock (line 350) | class _DecoderBlock(nn.Module):
    method __init__ (line 351) | def __init__(self, dim, n_heads, n_kv_heads, head_dim, hidden, rope_th...
    method __call__ (line 359) | def __call__(self, x, delay_cond, mask=None, cache=None):
  class TextDecoder (line 366) | class TextDecoder(nn.Module):
    method __init__ (line 369) | def __init__(
    method embed (line 389) | def embed(self, token_ids: mx.array) -> mx.array:
    method __call__ (line 392) | def __call__(self, x, delay_cond, mask=None, cache=None):
  class EncoderToDecoderAdapter (line 406) | class EncoderToDecoderAdapter(nn.Module):
    method __init__ (line 409) | def __init__(self, enc_dim: int, dec_dim: int):
    method __call__ (line 414) | def __call__(self, x):
  class DelayEmbedding (line 418) | class DelayEmbedding(nn.Module):
    method __init__ (line 422) | def __init__(self, dim: int = 3072, theta: float = 10000.0):
    method __call__ (line 429) | def __call__(self, delay: mx.array) -> mx.array:
  class VoxtralMLXModel (line 440) | class VoxtralMLXModel(nn.Module):
    method __init__ (line 443) | def __init__(self, config: dict):
    method encode (line 484) | def encode(self, mel: mx.array) -> mx.array:
    method encode_incremental (line 503) | def encode_incremental(self, new_mel, conv_tail1, conv_tail2, enc_cach...
    method decode (line 532) | def decode(self, embeddings, delay_cond, mask=None, cache=None):

FILE: whisperlivekit/voxtral_mlx/spectrogram.py
  function _build_slaney_filterbank (line 32) | def _build_slaney_filterbank(
  function _mel_filters (line 86) | def _mel_filters() -> mx.array:
  function _hann_window (line 102) | def _hann_window() -> mx.array:
  function _dft_matrices (line 109) | def _dft_matrices():
  function _stft_frames (line 123) | def _stft_frames(audio: mx.array, window: mx.array) -> mx.array:
  function _apply_mel_and_log (line 140) | def _apply_mel_and_log(power: mx.array) -> mx.array:
  function compute_mel (line 152) | def compute_mel(audio: np.ndarray) -> mx.array:
  function compute_mel_streaming (line 172) | def compute_mel_streaming(
  function pad_audio (line 206) | def pad_audio(

FILE: whisperlivekit/voxtral_mlx_asr.py
  function _prompt_tokens (line 43) | def _prompt_tokens(tokenizer, n_left_pad=LEFT_PAD_TOKENS, n_delay=6):
  class VoxtralMLXASR (line 55) | class VoxtralMLXASR:
    method __init__ (line 62) | def __init__(self, logfile=sys.stderr, **kwargs):
    method transcribe (line 84) | def transcribe(self, audio):
  class VoxtralMLXOnlineProcessor (line 93) | class VoxtralMLXOnlineProcessor:
    method __init__ (line 107) | def __init__(self, asr: VoxtralMLXASR, logfile=sys.stderr):
    method _reset_state (line 141) | def _reset_state(self):
    method _get_pending (line 177) | def _get_pending(self) -> np.ndarray:
    method _set_pending (line 187) | def _set_pending(self, arr: np.ndarray):
    method insert_audio_chunk (line 196) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:...
    method process_iter (line 205) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]:
    method _step (line 212) | def _step(self, is_last: bool) -> Tuple[List[ASRToken], float]:
    method _encode_pending (line 285) | def _encode_pending(self):
    method _do_prefill (line 323) | def _do_prefill(self):
    method _decode_positions (line 344) | def _decode_positions(self, n: int) -> bool:
    method _trim_embeds (line 396) | def _trim_embeds(self, n_consumed: int):
    method _sample (line 402) | def _sample(self, logits: mx.array) -> mx.array:
    method _audio_pos_to_time (line 407) | def _audio_pos_to_time(self, pos: int) -> float:
    method _word_time_range (line 411) | def _word_time_range(self, word_idx: int, n_words: int) -> Tuple[float...
    method _extract_committed_words (line 439) | def _extract_committed_words(self) -> List[ASRToken]:
    method _flush_all_words (line 457) | def _flush_all_words(self) -> List[ASRToken]:
    method get_buffer (line 477) | def get_buffer(self) -> Transcript:
    method _safe_decode_remaining (line 486) | def _safe_decode_remaining(self):
    method _flush_last_token_text (line 504) | def _flush_last_token_text(self):
    method _close_current_word (line 528) | def _close_current_word(self):
    method _flush_and_reset (line 535) | def _flush_and_reset(self) -> List[ASRToken]:
    method start_silence (line 585) | def start_silence(self) -> Tuple[List[ASRToken], float]:
    method end_silence (line 597) | def end_silence(self, silence_duration: float, offset: float):
    method new_speaker (line 601) | def new_speaker(self, change_speaker):
    method warmup (line 604) | def warmup(self, audio, init_prompt=""):
    method finish (line 607) | def finish(self) -> Tuple[List[ASRToken], float]:

FILE: whisperlivekit/warmup.py
  function load_file (line 6) | def load_file(warmup_file=None, timeout=5):
  function warmup_asr (line 43) | def warmup_asr(asr, warmup_file=None, timeout=5):

FILE: whisperlivekit/web/live_transcription.js
  function getWaveStroke (line 69) | function getWaveStroke() {
  function updateWaveStroke (line 76) | function updateWaveStroke() {
  function applyTheme (line 80) | function applyTheme(pref) {
  function enumerateMicrophones (line 119) | async function enumerateMicrophones() {
  function populateMicrophoneSelect (line 135) | function populateMicrophoneSelect() {
  function handleMicrophoneChange (line 154) | function handleMicrophoneChange() {
  function fmt1 (line 175) | function fmt1(x) {
  function setupWebSocket (line 215) | function setupWebSocket() {
  function renderLinesWithBuffer (line 333) | function renderLinesWithBuffer(
  function updateTimer (line 469) | function updateTimer() {
  function drawWaveform (line 478) | function drawWaveform() {
  function startRecording (line 520) | async function startRecording() {
  function stopRecording (line 639) | async function stopRecording() {
  function toggleRecording (line 725) | async function toggleRecording() {
  function updateUI (line 751) | function updateUI() {
  function checkAndRequestPermissions (line 802) | async function checkAndRequestPermissions() {

FILE: whisperlivekit/web/pcm_worklet.js
  class PCMForwarder (line 1) | class PCMForwarder extends AudioWorkletProcessor {
    method process (line 2) | process(inputs) {

FILE: whisperlivekit/web/recorder_worker.js
  function init (line 15) | function init(config) {
  function record (line 20) | function record(inputBuffer) {
  function resample (line 27) | function resample(buffer, from, to) {
  function toPCM (line 50) | function toPCM(input) {

FILE: whisperlivekit/web/web_interface.py
  function get_web_interface_html (line 7) | def get_web_interface_html():
  function get_inline_ui_html (line 16) | def get_inline_ui_html():
  function get (line 113) | async def get():

FILE: whisperlivekit/whisper/__init__.py
  function _download (line 57) | def _download(url: str, root: str, in_memory: bool) -> Union[bytes, str]:
  function available_models (line 101) | def available_models() -> List[str]:
  function _infer_dims_from_config (line 106) | def _infer_dims_from_config(path: str) -> Optional[ModelDimensions]:
  function _convert_hf_state_dict (line 163) | def _convert_hf_state_dict(state_dict: Dict[str, torch.Tensor]) -> Dict[...
  function _convert_mlx_state_dict (line 256) | def _convert_mlx_state_dict(state_dict: Dict[str, torch.Tensor]) -> Dict...
  function _load_lora_state (line 274) | def _load_lora_state(lora_path: str):
  function _collapse_hf_module_name (line 292) | def _collapse_hf_module_name(module: str):
  function _resolve_lora_path (line 302) | def _resolve_lora_path(lora_path: Optional[str]) -> Optional[str]:
  function _apply_lora_adapter (line 337) | def _apply_lora_adapter(state_dict: Dict[str, Tensor], lora_path: Option...
  function _load_checkpoint (line 397) | def _load_checkpoint(
  function _load_sharded_checkpoint (line 434) | def _load_sharded_checkpoint(
  function load_model (line 466) | def load_model(
  function convert_encoder_to_coreml (line 599) | def convert_encoder_to_coreml(

FILE: whisperlivekit/whisper/audio.py
  function load_audio (line 25) | def load_audio(file: str, sr: int = SAMPLE_RATE):
  function pad_or_trim (line 65) | def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1):
  function mel_filters (line 92) | def mel_filters(device, n_mels: int) -> torch.Tensor:
  function log_mel_spectrogram (line 110) | def log_mel_spectrogram(

FILE: whisperlivekit/whisper/decoding.py
  function detect_language (line 19) | def detect_language(
  class DecodingOptions (line 81) | class DecodingOptions:
  class DecodingResult (line 118) | class DecodingResult:
  class Inference (line 130) | class Inference:
    method logits (line 131) | def logits(self, tokens: Tensor, audio_features: Tensor) -> Tensor:
    method rearrange_kv_cache (line 135) | def rearrange_kv_cache(self, source_indices) -> None:
    method cleanup_caching (line 139) | def cleanup_caching(self) -> None:
  class PyTorchInference (line 144) | class PyTorchInference(Inference):
    method __init__ (line 145) | def __init__(self, model: "Whisper", initial_token_length: int):
    method logits (line 155) | def logits(self, tokens: Tensor, audio_features: Tensor) -> Tensor:
    method cleanup_caching (line 162) | def cleanup_caching(self):
    method rearrange_kv_cache (line 165) | def rearrange_kv_cache(self, source_indices):
  class SequenceRanker (line 173) | class SequenceRanker:
    method rank (line 174) | def rank(
  class MaximumLikelihoodRanker (line 184) | class MaximumLikelihoodRanker(SequenceRanker):
    method __init__ (line 190) | def __init__(self, length_penalty: Optional[float]):
    method rank (line 193) | def rank(self, tokens: List[List[Tensor]], sum_logprobs: List[List[flo...
  class TokenDecoder (line 210) | class TokenDecoder:
    method reset (line 211) | def reset(self):
    method update (line 214) | def update(
    method finalize (line 241) | def finalize(
  class GreedyDecoder (line 266) | class GreedyDecoder(TokenDecoder):
    method __init__ (line 267) | def __init__(self, temperature: float, eot: int):
    method update (line 271) | def update(
    method finalize (line 289) | def finalize(self, tokens: Tensor, sum_logprobs: Tensor):
  class BeamSearchDecoder (line 295) | class BeamSearchDecoder(TokenDecoder):
    method __init__ (line 296) | def __init__(
    method reset (line 314) | def reset(self):
    method update (line 317) | def update(
    method finalize (line 378) | def finalize(self, preceding_tokens: Tensor, sum_logprobs: Tensor):
  class LogitFilter (line 401) | class LogitFilter:
    method apply (line 402) | def apply(self, logits: Tensor, tokens: Tensor) -> None:
  class SuppressBlank (line 417) | class SuppressBlank(LogitFilter):
    method __init__ (line 418) | def __init__(self, tokenizer: Tokenizer, sample_begin: int):
    method apply (line 422) | def apply(self, logits: Tensor, tokens: Tensor):
  class SuppressTokens (line 427) | class SuppressTokens(LogitFilter):
    method __init__ (line 428) | def __init__(self, suppress_tokens: Sequence[int]):
    method apply (line 431) | def apply(self, logits: Tensor, tokens: Tensor):
  class ApplyTimestampRules (line 435) | class ApplyTimestampRules(LogitFilter):
    method __init__ (line 436) | def __init__(
    method apply (line 446) | def apply(self, logits: Tensor, tokens: Tensor):
  class DecodingTask (line 502) | class DecodingTask:
    method __init__ (line 508) | def __init__(self, model: "Whisper", options: DecodingOptions):
    method _verify_options (line 566) | def _verify_options(self, options: DecodingOptions) -> DecodingOptions:
    method _get_initial_tokens (line 581) | def _get_initial_tokens(self) -> Tuple[int]:
    method _get_suppress_tokens (line 609) | def _get_suppress_tokens(self) -> Tuple[int]:
    method _get_audio_features (line 638) | def _get_audio_features(self, mel: Tensor):
    method _detect_language (line 660) | def _detect_language(self, audio_features: Tensor, tokens: Tensor):
    method _main_loop (line 674) | def _main_loop(self, audio_features: Tensor, tokens: Tensor):
    method run (line 707) | def run(self, mel: Tensor) -> List[DecodingResult]:
  function decode (line 787) | def decode(

FILE: whisperlivekit/whisper/model.py
  class ModelDimensions (line 26) | class ModelDimensions:
  class LayerNorm (line 39) | class LayerNorm(nn.LayerNorm):
    method forward (line 40) | def forward(self, x: Tensor) -> Tensor:
  class Linear (line 44) | class Linear(nn.Linear):
    method forward (line 45) | def forward(self, x: Tensor) -> Tensor:
  class Conv1d (line 53) | class Conv1d(nn.Conv1d):
    method _conv_forward (line 54) | def _conv_forward(
  function sinusoids (line 62) | def sinusoids(length, channels, max_timescale=10000):
  function disable_sdpa (line 72) | def disable_sdpa():
  class MultiHeadAttention (line 81) | class MultiHeadAttention(nn.Module):
    method __init__ (line 84) | def __init__(self, n_state: int, n_head: int, cache_id: str = "", n_te...
    method forward (line 100) | def forward(
    method _update_self_attn_cache (line 130) | def _update_self_attn_cache(
    method qkv_attention (line 148) | def qkv_attention(
  class ResidualAttentionBlock (line 176) | class ResidualAttentionBlock(nn.Module):
    method __init__ (line 177) | def __init__(
    method forward (line 201) | def forward(
  class AudioEncoder (line 224) | class AudioEncoder(nn.Module):
    method __init__ (line 225) | def __init__(
    method forward (line 238) | def forward(self, x: Tensor):
  class TextDecoder (line 257) | class TextDecoder(nn.Module):
    method __init__ (line 258) | def __init__(
    method forward (line 281) | def forward(
  class Whisper (line 335) | class Whisper(nn.Module):
    method __init__ (line 336) | def __init__(self, dims: ModelDimensions, decoder_only: bool = False):
    method set_alignment_heads (line 363) | def set_alignment_heads(self, dump: bytes):
    method embed_audio (line 372) | def embed_audio(self, mel: torch.Tensor):
    method logits (line 375) | def logits(
    method forward (line 388) | def forward(
    method device (line 394) | def device(self):
    method is_multilingual (line 398) | def is_multilingual(self):
    method num_languages (line 402) | def num_languages(self):

FILE: whisperlivekit/whisper/normalizers/basic.py
  function remove_symbols_and_diacritics (line 27) | def remove_symbols_and_diacritics(s: str, keep=""):
  function remove_symbols (line 50) | def remove_symbols(s: str):
  class BasicTextNormalizer (line 60) | class BasicTextNormalizer:
    method __init__ (line 61) | def __init__(self, remove_diacritics: bool = False, split_letters: boo...
    method __call__ (line 67) | def __call__(self, s: str):

FILE: whisperlivekit/whisper/normalizers/english.py
  class EnglishNumberNormalizer (line 12) | class EnglishNumberNormalizer:
    method __init__ (line 23) | def __init__(self):
    method process_words (line 165) | def process_words(self, words: List[str]) -> Iterator[str]:
    method preprocess (line 388) | def preprocess(self, s: str):
    method postprocess (line 417) | def postprocess(self, s: str):
    method __call__ (line 442) | def __call__(self, s: str):
  class EnglishSpellingNormalizer (line 450) | class EnglishSpellingNormalizer:
    method __init__ (line 457) | def __init__(self):
    method __call__ (line 461) | def __call__(self, s: str):
  class EnglishTextNormalizer (line 465) | class EnglishTextNormalizer:
    method __init__ (line 466) | def __init__(self):
    method __call__ (line 526) | def __call__(self, s: str):

FILE: whisperlivekit/whisper/timing.py
  function median_filter (line 19) | def median_filter(x: torch.Tensor, filter_width: int):
  function backtrace (line 58) | def backtrace(trace: np.ndarray):
  function dtw_cpu (line 83) | def dtw_cpu(x: np.ndarray):
  function dtw_cuda (line 108) | def dtw_cuda(x, BLOCK_SIZE=1024):
  function dtw (line 141) | def dtw(x: torch.Tensor) -> np.ndarray:
  class WordTiming (line 155) | class WordTiming:
  function find_alignment (line 163) | def find_alignment(
  function merge_punctuations (line 245) | def merge_punctuations(alignment: List[WordTiming], prepended: str, appe...
  function add_word_timestamps (line 279) | def add_word_timestamps(

FILE: whisperlivekit/whisper/tokenizer.py
  class Tokenizer (line 132) | class Tokenizer:
    method __post_init__ (line 142) | def __post_init__(self):
    method encode (line 161) | def encode(self, text, **kwargs):
    method decode (line 164) | def decode(self, token_ids: List[int], **kwargs) -> str:
    method decode_with_timestamps (line 168) | def decode_with_timestamps(self, token_ids: List[int], **kwargs) -> str:
    method eot (line 176) | def eot(self) -> int:
    method transcribe (line 180) | def transcribe(self) -> int:
    method translate (line 184) | def translate(self) -> int:
    method sot (line 188) | def sot(self) -> int:
    method sot_lm (line 192) | def sot_lm(self) -> int:
    method sot_prev (line 196) | def sot_prev(self) -> int:
    method no_speech (line 200) | def no_speech(self) -> int:
    method no_timestamps (line 204) | def no_timestamps(self) -> int:
    method timestamp_begin (line 208) | def timestamp_begin(self) -> int:
    method language_token (line 212) | def language_token(self) -> int:
    method to_language_token (line 219) | def to_language_token(self, language):
    method all_language_tokens (line 226) | def all_language_tokens(self) -> Tuple[int]:
    method all_language_codes (line 234) | def all_language_codes(self) -> Tuple[str]:
    method sot_sequence_including_notimestamps (line 238) | def sot_sequence_including_notimestamps(self) -> Tuple[int]:
    method non_speech_tokens (line 242) | def non_speech_tokens(self) -> Tuple[int]:
    method split_to_word_tokens (line 277) | def split_to_word_tokens(self, tokens: List[int]):
    method split_tokens_on_unicode (line 286) | def split_tokens_on_unicode(self, tokens: List[int]):
    method split_tokens_on_spaces (line 316) | def split_tokens_on_spaces(self, tokens: List[int]):
  function get_encoding (line 336) | def get_encoding(name: str = "gpt2", num_languages: int = 99):
  function get_tokenizer (line 372) | def get_tokenizer(

FILE: whisperlivekit/whisper/transcribe.py
  function transcribe (line 21) | def transcribe(
  function cli (line 500) | def cli():

FILE: whisperlivekit/whisper/triton_ops.py
  function dtw_kernel (line 14) | def dtw_kernel(
  function median_kernel (line 44) | def median_kernel(filter_width: int):
  function median_filter_cuda (line 106) | def median_filter_cuda(x: torch.Tensor, filter_width: int):

FILE: whisperlivekit/whisper/utils.py
  function make_safe (line 12) | def make_safe(string):
  function make_safe (line 19) | def make_safe(string):
  function exact_div (line 24) | def exact_div(x, y):
  function str2bool (line 29) | def str2bool(string):
  function optional_int (line 37) | def optional_int(string):
  function optional_float (line 41) | def optional_float(string):
  function compression_ratio (line 45) | def compression_ratio(text) -> float:
  function format_timestamp (line 50) | def format_timestamp(
  function get_start (line 71) | def get_start(segments: List[dict]) -> Optional[float]:
  function get_end (line 78) | def get_end(segments: List[dict]) -> Optional[float]:
  class ResultWriter (line 85) | class ResultWriter:
    method __init__ (line 88) | def __init__(self, output_dir: str):
    method __call__ (line 91) | def __call__(
    method write_result (line 103) | def write_result(
  class WriteTXT (line 109) | class WriteTXT(ResultWriter):
    method write_result (line 112) | def write_result(
  class SubtitlesWriter (line 119) | class SubtitlesWriter(ResultWriter):
    method iterate_result (line 123) | def iterate_result(
    method format_timestamp (line 230) | def format_timestamp(self, seconds: float):
  class WriteVTT (line 238) | class WriteVTT(SubtitlesWriter):
    method write_result (line 243) | def write_result(
  class WriteSRT (line 251) | class WriteSRT(SubtitlesWriter):
    method write_result (line 256) | def write_result(
  class WriteTSV (line 265) | class WriteTSV(ResultWriter):
    method write_result (line 277) | def write_result(
  class WriteJSON (line 287) | class WriteJSON(ResultWriter):
    method write_result (line 290) | def write_result(
  function get_writer (line 296) | def get_writer(

FILE: whisperlivekit/whisper/val.py
  class Value (line 31) | class Value:
    method __init__ (line 34) | def __init__(self, data, children=(), local_grads=()):
    method __add__ (line 40) | def __add__(self, other):
    method __mul__ (line 44) | def __mul__(self, other):
    method __pow__ (line 48) | def __pow__(self, other): return Value(self.data**other, (self,), (oth...
    method log (line 49) | def log(self): return Value(math.log(self.data), (self,), (1/self.data,))
    method exp (line 50) | def exp(self): return Value(math.exp(self.data), (self,), (math.exp(se...
    method relu (line 51) | def relu(self): return Value(max(0, self.data), (self,), (float(self.d...
    method __neg__ (line 52) | def __neg__(self): return self * -1
    method __radd__ (line 53) | def __radd__(self, other): return self + other
    method __sub__ (line 54) | def __sub__(self, other): return self + (-other)
    method __rsub__ (line 55) | def __rsub__(self, other): return other + (-self)
    method __rmul__ (line 56) | def __rmul__(self, other): return self * other
    method __truediv__ (line 57) | def __truediv__(self, other): return self * other**-1
    method __rtruediv__ (line 58) | def __rtruediv__(self, other): return other * self**-1
    method backward (line 60) | def backward(self):
  function linear (line 95) | def linear(x, w):
  function softmax (line 99) | def softmax(logits):
  function rmsnorm (line 105) | def rmsnorm(x):
  function gpt (line 110) | def gpt(token_id, pos_id, keys, values):