SYMBOL INDEX (1208 symbols across 89 files) FILE: benchmark_mlx_simul.py function load_librispeech_utterances (line 63) | def load_librispeech_utterances(data_dir: str, max_utterances: int = 0): function load_librispeech_chapters (line 98) | def load_librispeech_chapters(data_dir: str): function transcribe_simul (line 153) | def transcribe_simul(asr, audio, chunk_seconds=2.0): function transcribe_single_shot (line 191) | def transcribe_single_shot(asr, audio): function normalize_text (line 216) | def normalize_text(text: str) -> str: function main (line 224) | def main(): FILE: benchmarks/h100/bench_voxtral_hf_batch.py function norm (line 14) | def norm(t): function load_audio (line 17) | def load_audio(path): function transcribe_batch (line 30) | def transcribe_batch(audio_np): FILE: benchmarks/h100/bench_voxtral_vllm_realtime.py function norm (line 12) | def norm(t): function transcribe (line 15) | async def transcribe(audio_path, max_tokens=4096): function main (line 57) | async def main(): FILE: benchmarks/h100/generate_figures.py function _save (line 38) | def _save(fig, name): function fig_scatter_clean (line 48) | def fig_scatter_clean(): function fig_scatter_acl6060 (line 101) | def fig_scatter_acl6060(): function fig_bars (line 143) | def fig_bars(): function fig_robustness (line 195) | def fig_robustness(): function fig_per_talk (line 233) | def fig_per_talk(): FILE: benchmarks/m5/generate_figures.py function _save (line 43) | def _save(fig, name): function fig_m5_vs_h100 (line 50) | def fig_m5_vs_h100(): FILE: chrome-extension/requestPermissions.js function getUserPermission (line 5) | async function getUserPermission() { FILE: chrome-extension/sidepanel.js function run (line 3) | async function run() { FILE: scripts/convert_hf_whisper.py function _load_state_dict (line 23) | def _load_state_dict(repo_path: Path) -> Dict[str, torch.Tensor]: function _load_config (line 45) | def _load_config(repo_path: Path) -> Dict: function _derive_audio_ctx (line 55) | def _derive_audio_ctx(chunk_length: float) -> Tuple[int, int]: function _build_dims (line 68) | def _build_dims(config: Dict, chunk_length: float) -> Dict: function _trim_positional_embedding (line 88) | def _trim_positional_embedding( function convert_checkpoint (line 105) | def convert_checkpoint(hf_path: Path, output_path: Path, chunk_length: f... function parse_args (line 119) | def parse_args() -> argparse.Namespace: function main (line 143) | def main(): FILE: scripts/create_long_samples.py function save_wav (line 20) | def save_wav(path, audio, sr=SR): function decode_audio (line 30) | def decode_audio(audio_bytes): function download_long_librispeech (line 36) | def download_long_librispeech(config, lang_code, target_dur=300): function download_long_mls (line 76) | def download_long_mls(config, lang_code, target_dur=300): function main (line 115) | def main(): FILE: scripts/detect_alignment_heads_qwen3.py function _apply_transformers_compat_patches (line 50) | def _apply_transformers_compat_patches(): function text_similarity (line 137) | def text_similarity(generated: str, reference: str) -> float: function load_dataset_clips (line 155) | def load_dataset_clips(name, config, split, limit): function get_device (line 177) | def get_device(): function load_qwen3_asr (line 190) | def load_qwen3_asr(model_id: str, device: torch.device, dtype: torch.dty... function find_audio_token_range (line 235) | def find_audio_token_range(input_ids: torch.Tensor, audio_token_id: int)... function timestamp_to_audio_token_position (line 244) | def timestamp_to_audio_token_position( function run_detection (line 264) | def run_detection( function main (line 536) | def main(): FILE: scripts/determine_alignment_heads.py function load_dataset_clips (line 33) | def load_dataset_clips(name, config, split, limit): function load_clips (line 53) | def load_clips(args): function _waveform_from_source (line 62) | def _waveform_from_source(source: AudioInput) -> torch.Tensor: function _parse_args (line 67) | def _parse_args(): function collect_heads (line 125) | def collect_heads( function _select_heads_for_visualization (line 181) | def _select_heads_for_visualization(selection, strengths, top_k): function _extract_heatmaps (line 193) | def _extract_heatmaps( function _plot_heatmaps (line 245) | def _plot_heatmaps( function _dump_mask (line 270) | def _dump_mask(mask: torch.Tensor, output_path: str): function main (line 277) | def main(): FILE: scripts/generate_architecture.py function box (line 36) | def box(x, y, w, h, label, color=C_BORDER, bg=C_BOX_BG, fontsize=8, bold... function arrow (line 50) | def arrow(x1, y1, x2, y2, color=C_TEXTDIM, style="->", lw=1.2): function section_box (line 55) | def section_box(x, y, w, h, title, bg=C_PANEL, border=C_BORDER, title_co... FILE: scripts/python_support_matrix.py class MatrixRow (line 35) | class MatrixRow: class CaseResult (line 87) | class CaseResult: function parse_args (line 97) | def parse_args() -> argparse.Namespace: function safe_slug (line 115) | def safe_slug(text: str) -> str: function status_style (line 119) | def status_style(status: str) -> str: function print_line (line 129) | def print_line(message: str, style: str | None = None) -> None: function tail_text (line 139) | def tail_text(text: str | None, max_chars: int = 220) -> str: function run_command (line 148) | def run_command( function detect_gpu_available (line 218) | def detect_gpu_available() -> bool: function download_sample (line 232) | def download_sample(repo_root: Path) -> Path: function sync_case_environment (line 252) | def sync_case_environment( function apply_expected_failure_policy (line 276) | def apply_expected_failure_policy(result: CaseResult) -> CaseResult: function build_offline_command (line 298) | def build_offline_command( function run_case (line 332) | def run_case( function print_summary (line 446) | def print_summary(results: list[CaseResult]) -> None: function main (line 519) | def main() -> int: FILE: scripts/run_scatter_benchmark.py function is_backend_available (line 66) | def is_backend_available(backend): function get_system_info (line 88) | def get_system_info(): function run_combo_on_samples (line 103) | async def run_combo_on_samples(combo, samples, lang="en", speed=0): function run_all (line 174) | async def run_all(combos, samples, lang="en", speed=0): function get_long_samples_for_lang (line 191) | def get_long_samples_for_lang(lang="en"): function generate_scatter (line 213) | def generate_scatter(results, system_info, output_path, n_samples, lang=... function main (line 348) | def main(): FILE: scripts/sync_extension.py function sync_extension_files (line 7) | def sync_extension_files(): FILE: tests/test_pipeline.py function backend_kwargs (line 83) | def backend_kwargs(backend: str) -> dict: function samples (line 92) | def samples(): function short_sample (line 99) | def short_sample(samples): function medium_sample (line 104) | def medium_sample(samples): function meeting_sample (line 109) | def meeting_sample(samples): function test_transcription_quality (line 119) | async def test_transcription_quality(backend, short_sample): function test_medium_clip_timing_spans_audio (line 141) | async def test_medium_clip_timing_spans_audio(backend, medium_sample): function test_text_appears_progressively (line 173) | async def test_text_appears_progressively(backend, medium_sample): function test_buffer_lifecycle (line 207) | async def test_buffer_lifecycle(backend, medium_sample): function test_silence_flushes_all_words (line 232) | async def test_silence_flushes_all_words(backend, medium_sample): function test_play_pause_resume (line 290) | async def test_play_pause_resume(backend, medium_sample): function test_multiple_pauses (line 336) | async def test_multiple_pauses(backend, medium_sample): function test_short_pause_no_silence (line 378) | async def test_short_pause_no_silence(backend, medium_sample): function test_abrupt_cutoff (line 413) | async def test_abrupt_cutoff(backend, medium_sample): function test_timing_precision_and_monotonicity (line 443) | async def test_timing_precision_and_monotonicity(backend, medium_sample): function test_silence_timing_reflects_pause (line 469) | async def test_silence_timing_reflects_pause(backend, short_sample): function test_snapshot_history (line 503) | async def test_snapshot_history(backend, medium_sample): function test_metrics_collected (line 532) | async def test_metrics_collected(backend, short_sample): FILE: whisperlivekit/audio_processor.py function get_all_from_queue (line 28) | async def get_all_from_queue(queue: asyncio.Queue) -> Union[object, Sile... class AudioProcessor (line 54) | class AudioProcessor: method __init__ (line 60) | def __init__(self, **kwargs: Any) -> None: method _push_silence_event (line 140) | async def _push_silence_event(self) -> None: method _begin_silence (line 148) | async def _begin_silence(self, at_sample: Optional[int] = None) -> None: method _end_silence (line 168) | async def _end_silence(self, at_sample: Optional[int] = None) -> None: method _enqueue_active_audio (line 188) | async def _enqueue_active_audio(self, pcm_chunk: np.ndarray) -> None: method _slice_before_silence (line 196) | def _slice_before_silence(self, pcm_array: np.ndarray, chunk_sample_st... method convert_pcm_to_float (line 207) | def convert_pcm_to_float(self, pcm_buffer: Union[bytes, bytearray]) ->... method get_current_state (line 211) | async def get_current_state(self) -> State: method ffmpeg_stdout_reader (line 230) | async def ffmpeg_stdout_reader(self) -> None: method _finish_transcription (line 280) | async def _finish_transcription(self) -> None: method transcription_processor (line 309) | async def transcription_processor(self) -> None: method diarization_processor (line 421) | async def diarization_processor(self) -> None: method translation_processor (line 444) | async def translation_processor(self) -> None: method results_formatter (line 479) | async def results_formatter(self) -> AsyncGenerator[FrontData, None]: method create_tasks (line 530) | async def create_tasks(self) -> AsyncGenerator[FrontData, None]: method watchdog (line 571) | async def watchdog(self, tasks_to_monitor: List[asyncio.Task]) -> None: method cleanup (line 598) | async def cleanup(self) -> None: method _processing_tasks_done (line 625) | def _processing_tasks_done(self) -> bool: method process_audio (line 636) | async def process_audio(self, message: Optional[bytes]) -> None: method handle_pcm_data (line 682) | async def handle_pcm_data(self) -> None: method _flush_remaining_pcm (line 734) | async def _flush_remaining_pcm(self) -> None: FILE: whisperlivekit/backend_support.py function module_available (line 8) | def module_available(module_name): function mlx_backend_available (line 13) | def mlx_backend_available(warn_on_missing = False): function voxtral_hf_backend_available (line 32) | def voxtral_hf_backend_available(): function faster_backend_available (line 38) | def faster_backend_available(warn_on_missing = False): FILE: whisperlivekit/basic_server.py function lifespan (line 22) | async def lifespan(app: FastAPI): function get (line 37) | async def get(): function health (line 42) | async def health(): function handle_websocket_results (line 53) | async def handle_websocket_results(websocket, results_generator, diff_tr... function websocket_endpoint (line 71) | async def websocket_endpoint(websocket: WebSocket): function deepgram_websocket_endpoint (line 134) | async def deepgram_websocket_endpoint(websocket: WebSocket): function _convert_to_pcm (line 145) | async def _convert_to_pcm(audio_bytes: bytes) -> bytes: function _parse_time_str (line 164) | def _parse_time_str(time_str: str) -> float: function _format_openai_response (line 174) | def _format_openai_response(front_data, response_format: str, language: ... function _srt_timestamp (line 239) | def _srt_timestamp(seconds: float, fmt: str) -> str: function create_transcription (line 250) | async def create_transcription( function list_models (line 321) | async def list_models(): function main (line 336) | def main(): FILE: whisperlivekit/benchmark/compat.py function backend_supports_language (line 30) | def backend_supports_language(backend: str, language: str) -> bool: function detect_available_backends (line 38) | def detect_available_backends() -> List[str]: function resolve_backend (line 85) | def resolve_backend(backend: str) -> str: FILE: whisperlivekit/benchmark/datasets.py class BenchmarkSample (line 33) | class BenchmarkSample: method to_dict (line 47) | def to_dict(self) -> Dict: function _save_wav (line 218) | def _save_wav(path: Path, audio: np.ndarray, sample_rate: int = 16000) -... function _decode_audio (line 234) | def _decode_audio(audio_bytes: bytes) -> tuple: function _ensure_datasets (line 241) | def _ensure_datasets(): function _download_librispeech (line 255) | def _download_librispeech(config: str, n_samples: int, skip: int, function _download_mls (line 299) | def _download_mls(config: str, n_samples: int, skip: int, function _download_fleurs (line 342) | def _download_fleurs(config: str, n_samples: int, skip: int, function _download_ami (line 385) | def _download_ami(max_duration: float = 60.0) -> List[Dict]: function _download_catalog_entry (line 444) | def _download_catalog_entry(name: str, spec: Dict) -> List[Dict]: function get_benchmark_samples (line 479) | def get_benchmark_samples( FILE: whisperlivekit/benchmark/metrics.py class SampleResult (line 11) | class SampleResult: method to_dict (line 51) | def to_dict(self) -> Dict[str, Any]: class BenchmarkReport (line 77) | class BenchmarkReport: method n_samples (line 89) | def n_samples(self) -> int: method total_audio_s (line 93) | def total_audio_s(self) -> float: method total_processing_s (line 97) | def total_processing_s(self) -> float: method avg_wer (line 101) | def avg_wer(self) -> float: method weighted_wer (line 107) | def weighted_wer(self) -> float: method avg_rtf (line 119) | def avg_rtf(self) -> float: method overall_rtf (line 125) | def overall_rtf(self) -> float: method avg_latency_ms (line 131) | def avg_latency_ms(self) -> float: method p95_latency_ms (line 136) | def p95_latency_ms(self) -> float: method _group_by (line 142) | def _group_by(self, key: str) -> Dict[str, List[SampleResult]]: method wer_by_language (line 149) | def wer_by_language(self) -> Dict[str, float]: method rtf_by_language (line 155) | def rtf_by_language(self) -> Dict[str, float]: method wer_by_category (line 161) | def wer_by_category(self) -> Dict[str, float]: method languages (line 168) | def languages(self) -> List[str]: method categories (line 172) | def categories(self) -> List[str]: method to_dict (line 175) | def to_dict(self) -> Dict[str, Any]: function get_system_info (line 208) | def get_system_info() -> Dict[str, Any]: FILE: whisperlivekit/benchmark/report.py function _wer_color (line 20) | def _wer_color(wer: float) -> str: function _rtf_color (line 28) | def _rtf_color(rtf: float) -> str: function _lat_color (line 36) | def _lat_color(ms: float) -> str: function print_report (line 44) | def print_report(report: BenchmarkReport, out: TextIO = sys.stderr) -> N... function print_transcriptions (line 143) | def print_transcriptions(report: BenchmarkReport, out: TextIO = sys.stde... function write_json (line 159) | def write_json(report: BenchmarkReport, path: str) -> None: FILE: whisperlivekit/benchmark/runner.py class BenchmarkRunner (line 15) | class BenchmarkRunner: method __init__ (line 28) | def __init__( method run (line 46) | async def run(self) -> BenchmarkReport: method _run_sample (line 105) | async def _run_sample( FILE: whisperlivekit/cascade_bridge.py class CascadeBridge (line 24) | class CascadeBridge: method __init__ (line 27) | def __init__(self, output_file: TextIO = None): method emit_tokens (line 32) | def emit_tokens(self, tokens: List[ASRToken], is_final: bool = False): method get_entries (line 48) | def get_entries(self) -> List[dict]: method get_text (line 51) | def get_text(self) -> str: method save (line 55) | def save(self, path: str): function run_stt_to_jsonl (line 62) | def run_stt_to_jsonl( FILE: whisperlivekit/cli.py function _module_available (line 28) | def _module_available(name: str) -> bool: function _gpu_info (line 32) | def _gpu_info() -> str: function _check_platform (line 212) | def _check_platform(backend: dict) -> bool: function _is_installed (line 222) | def _is_installed(backend: dict) -> bool: function _check_ffmpeg (line 226) | def _check_ffmpeg() -> bool: function _scan_downloaded_models (line 232) | def _scan_downloaded_models() -> dict: function print_banner (line 266) | def print_banner(config, host: str, port: int, ssl: bool = False): function _model_is_downloaded (line 306) | def _model_is_downloaded(model_entry: dict, downloaded: dict) -> bool: function _best_backend_for_model (line 332) | def _best_backend_for_model(model_entry: dict) -> str: function cmd_models (line 357) | def cmd_models(): function _hf_download (line 438) | def _hf_download(repo_id: str, label: str): function _resolve_pull_target (line 447) | def _resolve_pull_target(spec: str): function cmd_pull (line 544) | def cmd_pull(spec: str): function cmd_transcribe (line 568) | def cmd_transcribe(args: list): function _transcribe_files_quiet (line 602) | async def _transcribe_files_quiet(parsed): function _transcribe_files (line 618) | async def _transcribe_files(parsed): function _format_subtitle (line 679) | def _format_subtitle(result, fmt: str) -> str: function _subtitle_timestamp (line 710) | def _subtitle_timestamp(seconds: float, fmt: str) -> str: function cmd_bench (line 724) | def cmd_bench(args: list): function _suppress_logging (line 777) | def _suppress_logging(): function _run_bench_new (line 788) | async def _run_bench_new(parsed, languages, categories): function cmd_listen (line 828) | def cmd_listen(args: list): function _listen_quiet (line 863) | async def _listen_quiet(parsed): function _listen_main (line 875) | async def _listen_main(parsed): function _resolve_run_spec (line 1005) | def _resolve_run_spec(spec: str): function cmd_run (line 1030) | def cmd_run(args: list): function cmd_rm (line 1098) | def cmd_rm(spec: str): function cmd_check (line 1158) | def cmd_check(): function cmd_diagnose (line 1192) | def cmd_diagnose(args: list): function _probe_backend_state (line 1225) | def _probe_backend_state(processor) -> dict: function _probe_pipeline_state (line 1295) | def _probe_pipeline_state(processor) -> dict: function _diagnose_main (line 1313) | async def _diagnose_main(parsed): function _print_version (line 1582) | def _print_version(): function _print_help (line 1592) | def _print_help(): function main (line 1630) | def main(): FILE: whisperlivekit/config.py class WhisperLiveKitConfig (line 10) | class WhisperLiveKitConfig: method __post_init__ (line 79) | def __post_init__(self): method from_namespace (line 94) | def from_namespace(cls, ns) -> "WhisperLiveKitConfig": method from_kwargs (line 100) | def from_kwargs(cls, **kwargs) -> "WhisperLiveKitConfig": FILE: whisperlivekit/core.py class TranscriptionEngine (line 13) | class TranscriptionEngine: method __new__ (line 18) | def __new__(cls, *args, **kwargs): method reset (line 28) | def reset(cls): method __init__ (line 38) | def __init__(self, config=None, **kwargs): method _do_init (line 56) | def _do_init(self, config=None, **kwargs): function online_factory (line 237) | def online_factory(args, asr, language=None): function online_diarization_factory (line 282) | def online_diarization_factory(args, diarization_backend): function online_translation_factory (line 294) | def online_translation_factory(args, translation_model): FILE: whisperlivekit/deepgram_compat.py function _parse_time_str (line 28) | def _parse_time_str(time_str: str) -> float: function _line_to_words (line 38) | def _line_to_words(line: dict) -> list: function _lines_to_result (line 74) | def _lines_to_result(lines: list, is_final: bool, speech_final: bool, class DeepgramAdapter (line 120) | class DeepgramAdapter: method __init__ (line 123) | def __init__(self, websocket: WebSocket): method send_metadata (line 132) | async def send_metadata(self, config): method process_update (line 152) | async def process_update(self, front_data_dict: dict): function handle_deepgram_websocket (line 219) | async def handle_deepgram_websocket(websocket: WebSocket, transcription_... FILE: whisperlivekit/diarization/diart_backend.py class DiarizationObserver (line 21) | class DiarizationObserver(Observer): method __init__ (line 24) | def __init__(self): method on_next (line 30) | def on_next(self, value: Tuple[Annotation, Any]): method get_segments (line 55) | def get_segments(self) -> List[SpeakerSegment]: method clear_old_segments (line 60) | def clear_old_segments(self, older_than: float = 30.0): method on_error (line 69) | def on_error(self, error): method on_completed (line 73) | def on_completed(self): class WebSocketAudioSource (line 78) | class WebSocketAudioSource(AudioSource): method __init__ (line 82) | def __init__(self, uri: str = "websocket", sample_rate: int = 16000, b... method read (line 94) | def read(self): method _process_chunks (line 104) | def _process_chunks(self): method close (line 150) | def close(self): method push_audio (line 155) | def push_audio(self, chunk: np.ndarray): class DiartDiarization (line 164) | class DiartDiarization: method __init__ (line 165) | def __init__(self, sample_rate: int = 16000, config : SpeakerDiarizati... method insert_silence (line 198) | def insert_silence(self, silence_duration): method insert_audio_chunk (line 201) | def insert_audio_chunk(self, pcm_array: np.ndarray): method diarize (line 206) | async def diarize(self): method close (line 210) | def close(self): function concatenate_speakers (line 216) | def concatenate_speakers(segments): function add_speaker_to_tokens (line 230) | def add_speaker_to_tokens(segments, tokens): function visualize_tokens (line 274) | def visualize_tokens(tokens): FILE: whisperlivekit/diarization/sortformer_backend.py class StreamingSortformerState (line 20) | class StreamingSortformerState: method __init__ (line 37) | def __init__(self): class SortformerDiarization (line 49) | class SortformerDiarization: method __init__ (line 50) | def __init__(self, model_name: str = "nvidia/diar_streaming_sortformer... method _load_model (line 56) | def _load_model(self, model_name: str): class SortformerDiarizationOnline (line 86) | class SortformerDiarizationOnline: method __init__ (line 87) | def __init__(self, shared_model, sample_rate: int = 16000): method _init_streaming_state (line 136) | def _init_streaming_state(self): method insert_silence (line 160) | def insert_silence(self, silence_duration: Optional[float]): method insert_audio_chunk (line 171) | def insert_audio_chunk(self, pcm_array: np.ndarray): method diarize (line 177) | async def diarize(self): method _process_predictions (line 230) | def _process_predictions(self): method get_segments (line 266) | def get_segments(self) -> List[SpeakerSegment]: method close (line 271) | def close(self): function main (line 295) | async def main(): FILE: whisperlivekit/diarization/utils.py function extract_number (line 4) | def extract_number(s: str) -> int: FILE: whisperlivekit/diff_protocol.py class DiffTracker (line 32) | class DiffTracker: method to_message (line 39) | def to_message(self, front_data: FrontData) -> Dict[str, Any]: method reset (line 101) | def reset(self) -> None: FILE: whisperlivekit/ffmpeg_manager.py class FFmpegState (line 32) | class FFmpegState(Enum): class FFmpegManager (line 39) | class FFmpegManager: method __init__ (line 40) | def __init__(self, sample_rate: int = 16000, channels: int = 1): method start (line 52) | async def start(self) -> bool: method stop (line 103) | async def stop(self): method write_data (line 123) | async def write_data(self, data: bytes) -> bool: method read_data (line 139) | async def read_data(self, size: int) -> Optional[bytes]: method get_state (line 160) | async def get_state(self) -> FFmpegState: method restart (line 164) | async def restart(self) -> bool: method _drain_stderr (line 185) | async def _drain_stderr(self): FILE: whisperlivekit/local_agreement/backends.py class ASRBase (line 15) | class ASRBase: method __init__ (line 19) | def __init__(self, lan, model_size=None, cache_dir=None, model_dir=Non... method load_model (line 29) | def load_model(self, model_size, cache_dir, model_dir): method transcribe (line 32) | def transcribe(self, audio, init_prompt=""): method use_vad (line 35) | def use_vad(self): class WhisperASR (line 39) | class WhisperASR(ASRBase): method load_model (line 43) | def load_model(self, model_size=None, cache_dir=None, model_dir=None): method transcribe (line 62) | def transcribe(self, audio, init_prompt=""): method ts_words (line 79) | def ts_words(self, r) -> List[ASRToken]: method segments_end_ts (line 95) | def segments_end_ts(self, res) -> List[float]: method use_vad (line 98) | def use_vad(self): class FasterWhisperASR (line 101) | class FasterWhisperASR(ASRBase): method load_model (line 105) | def load_model(self, model_size=None, cache_dir=None, model_dir=None): method transcribe (line 129) | def transcribe(self, audio: np.ndarray, init_prompt: str = "") -> list: method ts_words (line 141) | def ts_words(self, segments) -> List[ASRToken]: method segments_end_ts (line 151) | def segments_end_ts(self, segments) -> List[float]: method use_vad (line 154) | def use_vad(self): class MLXWhisper (line 157) | class MLXWhisper(ASRBase): method load_model (line 163) | def load_model(self, model_size=None, cache_dir=None, model_dir=None): method translate_model_name (line 182) | def translate_model_name(self, model_name): method transcribe (line 190) | def transcribe(self, audio, init_prompt=""): method ts_words (line 203) | def ts_words(self, segments) -> List[ASRToken]: method segments_end_ts (line 213) | def segments_end_ts(self, res) -> List[float]: method use_vad (line 216) | def use_vad(self): class OpenaiApiASR (line 220) | class OpenaiApiASR(ASRBase): method __init__ (line 222) | def __init__(self, lan=None, temperature=0, logfile=sys.stderr): method load_model (line 233) | def load_model(self, *args, **kwargs): method ts_words (line 238) | def ts_words(self, segments) -> List[ASRToken]: method segments_end_ts (line 257) | def segments_end_ts(self, res) -> List[float]: method transcribe (line 260) | def transcribe(self, audio_data, prompt=None, *args, **kwargs): method use_vad (line 283) | def use_vad(self): FILE: whisperlivekit/local_agreement/online_asr.py class HypothesisBuffer (line 11) | class HypothesisBuffer: method __init__ (line 20) | def __init__(self, logfile=sys.stderr, confidence_validation=False): method insert (line 29) | def insert(self, new_tokens: List[ASRToken], offset: float): method flush (line 59) | def flush(self) -> List[ASRToken]: method pop_committed (line 88) | def pop_committed(self, time: float): class OnlineASRProcessor (line 97) | class OnlineASRProcessor: method __init__ (line 108) | def __init__( method new_speaker (line 139) | def new_speaker(self, change_speaker): method init (line 144) | def init(self, offset: Optional[float] = None): method get_audio_buffer_end_time (line 153) | def get_audio_buffer_end_time(self) -> float: method insert_audio_chunk (line 157) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:... method start_silence (line 161) | def start_silence(self): method end_silence (line 166) | def end_silence(self, silence_duration: Optional[float], offset: float): method insert_silence (line 181) | def insert_silence(self, silence_duration, offset): method prompt (line 187) | def prompt(self) -> Tuple[str, str]: method get_buffer (line 211) | def get_buffer(self): method process_iter (line 218) | def process_iter(self) -> Tuple[List[ASRToken], float]: method chunk_completed_sentence (line 267) | def chunk_completed_sentence(self): method chunk_completed_segment (line 300) | def chunk_completed_segment(self, res): method chunk_at (line 338) | def chunk_at(self, time: float): method words_to_sentences (line 354) | def words_to_sentences(self, tokens: List[ASRToken]) -> List[Sentence]: method finish (line 399) | def finish(self) -> Tuple[List[ASRToken], float]: method concatenate_tokens (line 410) | def concatenate_tokens( FILE: whisperlivekit/local_agreement/whisper_online.py function create_tokenizer (line 20) | def create_tokenizer(lan): function backend_factory (line 67) | def backend_factory( function _normalize_backend_choice (line 163) | def _normalize_backend_choice( FILE: whisperlivekit/metrics.py function normalize_text (line 12) | def normalize_text(text: str) -> str: function compute_wer (line 24) | def compute_wer(reference: str, hypothesis: str) -> Dict: function compute_timestamp_accuracy (line 85) | def compute_timestamp_accuracy( FILE: whisperlivekit/metrics_collector.py class SessionMetrics (line 16) | class SessionMetrics: method rtf (line 39) | def rtf(self) -> float: method avg_latency_ms (line 46) | def avg_latency_ms(self) -> float: method p95_latency_ms (line 53) | def p95_latency_ms(self) -> float: method to_dict (line 62) | def to_dict(self) -> Dict: method log_summary (line 79) | def log_summary(self) -> None: FILE: whisperlivekit/model_paths.py class ModelInfo (line 9) | class ModelInfo: method has_pytorch (line 17) | def has_pytorch(self) -> bool: method is_sharded (line 21) | def is_sharded(self) -> bool: method primary_pytorch_file (line 25) | def primary_pytorch_file(self) -> Optional[Path]: function _is_ct2_model_bin (line 40) | def _is_ct2_model_bin(directory: Path, filename: str) -> bool: function _collect_pytorch_files (line 68) | def _collect_pytorch_files(directory: Path) -> List[Path]: function detect_model_format (line 135) | def detect_model_format(model_path: Union[str, Path]) -> ModelInfo: function model_path_and_type (line 180) | def model_path_and_type(model_path: Union[str, Path]) -> Tuple[Optional[... function resolve_model_path (line 195) | def resolve_model_path(model_path: Union[str, Path]) -> Path: FILE: whisperlivekit/parse_args.py function parse_args (line 5) | def parse_args(): FILE: whisperlivekit/qwen3_asr.py function _patch_transformers_compat (line 14) | def _patch_transformers_compat(): class Qwen3ASR (line 126) | class Qwen3ASR(ASRBase): method __init__ (line 132) | def __init__(self, lan="auto", model_size=None, cache_dir=None, method load_model (line 139) | def load_model(self, model_size=None, cache_dir=None, model_dir=None): method _qwen3_language (line 168) | def _qwen3_language(self) -> Optional[str]: method transcribe (line 173) | def transcribe(self, audio: np.ndarray, init_prompt: str = ""): method _detected_language (line 200) | def _detected_language(result) -> Optional[str]: method ts_words (line 211) | def ts_words(self, result) -> List[ASRToken]: method segments_end_ts (line 245) | def segments_end_ts(self, result) -> List[float]: method use_vad (line 259) | def use_vad(self): FILE: whisperlivekit/qwen3_mlx_asr.py class Qwen3MLXASR (line 60) | class Qwen3MLXASR: method __init__ (line 67) | def __init__(self, logfile=sys.stderr, **kwargs): method transcribe (line 96) | def transcribe(self, audio): class Qwen3MLXOnlineProcessor (line 105) | class Qwen3MLXOnlineProcessor: method __init__ (line 123) | def __init__(self, asr: Qwen3MLXASR, logfile=sys.stderr): method insert_audio_chunk (line 155) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:... method _transcribe_buffer (line 162) | def _transcribe_buffer(self) -> List[ASRToken]: method _local_agreement (line 209) | def _local_agreement(self, new_tokens: List[ASRToken]) -> List[ASRToken]: method _trim_buffer_if_needed (line 260) | def _trim_buffer_if_needed(self): method process_iter (line 292) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]: method get_buffer (line 324) | def get_buffer(self) -> Transcript: method _flush_all (line 335) | def _flush_all(self) -> List[ASRToken]: method _reset_for_new_utterance (line 355) | def _reset_for_new_utterance(self): method start_silence (line 368) | def start_silence(self) -> Tuple[List[ASRToken], float]: method end_silence (line 379) | def end_silence(self, silence_duration: float, offset: float): method new_speaker (line 383) | def new_speaker(self, change_speaker): method warmup (line 386) | def warmup(self, audio, init_prompt=""): method finish (line 389) | def finish(self) -> Tuple[List[ASRToken], float]: FILE: whisperlivekit/qwen3_mlx_simul.py class Qwen3MLXSimulConfig (line 67) | class Qwen3MLXSimulConfig: class _SessionState (line 84) | class _SessionState: class Qwen3MLXSimulStreamingASR (line 104) | class Qwen3MLXSimulStreamingASR: method __init__ (line 111) | def __init__( method _load_alignment_heads (line 187) | def _load_alignment_heads( method _warmup (line 216) | def _warmup(self, audio: np.ndarray): method transcribe (line 236) | def transcribe(self, audio): class _AttnCaptureWrapper (line 245) | class _AttnCaptureWrapper: method __init__ (line 259) | def __init__(self, original, layer_idx, head_indices, gqa_ratio, method __call__ (line 270) | def __call__(self, x, cos, sin, mask=None, cache=None, layer_idx=0): method __getattr__ (line 305) | def __getattr__(self, name): function _install_alignment_hooks (line 309) | def _install_alignment_hooks(model, heads_by_layer, gqa_ratio, audio_sta... function _remove_alignment_hooks (line 329) | def _remove_alignment_hooks(model, originals): class Qwen3MLXSimulStreamingOnlineProcessor (line 340) | class Qwen3MLXSimulStreamingOnlineProcessor: method __init__ (line 351) | def __init__(self, asr: Qwen3MLXSimulStreamingASR, logfile=sys.stderr): method speaker (line 361) | def speaker(self): method speaker (line 365) | def speaker(self, value): method global_time_offset (line 369) | def global_time_offset(self): method global_time_offset (line 373) | def global_time_offset(self, value): method insert_audio_chunk (line 378) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:... method process_iter (line 392) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]: method _infer (line 416) | def _infer(self, is_last: bool) -> List[ASRToken]: method _build_timestamped_words (line 625) | def _build_timestamped_words( method start_silence (line 697) | def start_silence(self) -> Tuple[List[ASRToken], float]: method end_silence (line 706) | def end_silence(self, silence_duration: float, offset: float): method new_speaker (line 720) | def new_speaker(self, change_speaker): method get_buffer (line 726) | def get_buffer(self) -> Transcript: method warmup (line 729) | def warmup(self, audio: np.ndarray, init_prompt: str = ""): method finish (line 739) | def finish(self) -> Tuple[List[ASRToken], float]: FILE: whisperlivekit/qwen3_simul.py class Qwen3SimulConfig (line 52) | class Qwen3SimulConfig: class _AudioEmbedCache (line 70) | class _AudioEmbedCache: method trim_front (line 106) | def trim_front(self, trim_samples: int, sample_rate: int = 16000): method reset (line 119) | def reset(self): class Qwen3SimulState (line 128) | class Qwen3SimulState: class Qwen3SimulStreamingASR (line 154) | class Qwen3SimulStreamingASR: method __init__ (line 164) | def __init__( method _load_model (line 204) | def _load_model(self, model_size, model_dir, model_cache_dir, model_pa... method _load_alignment_heads (line 266) | def _load_alignment_heads( method _warmup (line 303) | def _warmup(self, audio: np.ndarray): method transcribe (line 330) | def transcribe(self, audio): class Qwen3SimulStreamingOnlineProcessor (line 335) | class Qwen3SimulStreamingOnlineProcessor: method __init__ (line 351) | def __init__(self, asr: Qwen3SimulStreamingASR, logfile=sys.stderr): method _build_prompt_template (line 363) | def _build_prompt_template(self): method speaker (line 382) | def speaker(self): method speaker (line 386) | def speaker(self, value): method global_time_offset (line 390) | def global_time_offset(self): method global_time_offset (line 394) | def global_time_offset(self, value): method insert_audio_chunk (line 397) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:... method start_silence (line 413) | def start_silence(self) -> Tuple[List[ASRToken], float]: method end_silence (line 427) | def end_silence(self, silence_duration: float, offset: float): method new_speaker (line 443) | def new_speaker(self, change_speaker: ChangeSpeaker): method get_buffer (line 450) | def get_buffer(self) -> Transcript: method _encode_audio_cached (line 454) | def _encode_audio_cached(self) -> Optional[torch.Tensor]: method _build_inputs_with_cached_audio (line 604) | def _build_inputs_with_cached_audio( method process_iter (line 697) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]: method _infer (line 737) | def _infer(self, is_last: bool) -> List[ASRToken]: method _build_timestamped_words (line 1085) | def _build_timestamped_words( method _median_frame (line 1164) | def _median_frame(frames: List[int]) -> Optional[int]: method warmup (line 1171) | def warmup(self, audio: np.ndarray, init_prompt: str = ""): method finish (line 1182) | def finish(self) -> Tuple[List[ASRToken], float]: FILE: whisperlivekit/qwen3_simul_kv.py class Qwen3SimulKVConfig (line 36) | class Qwen3SimulKVConfig: class _AudioEmbedCache (line 52) | class _AudioEmbedCache: method reset (line 59) | def reset(self): class Qwen3SimulKVState (line 67) | class Qwen3SimulKVState: method reset_kv (line 98) | def reset_kv(self): class Qwen3SimulKVASR (line 110) | class Qwen3SimulKVASR: method __init__ (line 117) | def __init__( method _load_model (line 156) | def _load_model(self, model_size, model_dir, model_cache_dir, model_pa... method _load_alignment_heads (line 208) | def _load_alignment_heads(self, path): method _warmup (line 225) | def _warmup(self, audio): method transcribe (line 238) | def transcribe(self, audio): class Qwen3SimulKVOnlineProcessor (line 242) | class Qwen3SimulKVOnlineProcessor: method __init__ (line 254) | def __init__(self, asr: Qwen3SimulKVASR, logfile=sys.stderr): method _build_prompt_template (line 262) | def _build_prompt_template(self): method speaker (line 277) | def speaker(self): method speaker (line 281) | def speaker(self, value): method global_time_offset (line 285) | def global_time_offset(self): method global_time_offset (line 289) | def global_time_offset(self, value): method insert_audio_chunk (line 292) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:... method start_silence (line 305) | def start_silence(self) -> Tuple[List[ASRToken], float]: method end_silence (line 314) | def end_silence(self, silence_duration: float, offset: float): method new_speaker (line 327) | def new_speaker(self, change_speaker: ChangeSpeaker): method get_buffer (line 333) | def get_buffer(self) -> Transcript: method _encode_audio (line 336) | def _encode_audio(self) -> Tuple[torch.Tensor, int]: method _build_full_inputs (line 415) | def _build_full_inputs(self, audio_embeds: torch.Tensor) -> dict: method process_iter (line 475) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]: method _infer (line 500) | def _infer(self, is_last: bool) -> List[ASRToken]: method _build_timestamped_words (line 710) | def _build_timestamped_words( method warmup (line 775) | def warmup(self, audio: np.ndarray, init_prompt: str = ""): method finish (line 784) | def finish(self) -> Tuple[List[ASRToken], float]: FILE: whisperlivekit/session_asr_proxy.py class SessionASRProxy (line 10) | class SessionASRProxy: method __init__ (line 22) | def __init__(self, asr, language: str): method __getattr__ (line 30) | def __getattr__(self, name): method transcribe (line 33) | def transcribe(self, audio, init_prompt=""): FILE: whisperlivekit/silero_vad_iterator.py function is_onnx_available (line 11) | def is_onnx_available() -> bool: function init_jit_model (line 20) | def init_jit_model(model_path: str, device=torch.device('cpu')): class OnnxSession (line 27) | class OnnxSession(): method __init__ (line 32) | def __init__(self, path, force_onnx_cpu=False): class OnnxWrapper (line 52) | class OnnxWrapper(): method __init__ (line 57) | def __init__(self, session: OnnxSession, force_onnx_cpu=False): method session (line 63) | def session(self): method _validate_input (line 66) | def _validate_input(self, x, sr: int): method reset_states (line 84) | def reset_states(self, batch_size=1): method __call__ (line 90) | def __call__(self, x, sr: int): function _get_onnx_model_path (line 128) | def _get_onnx_model_path(model_path: str = None, opset_version: int = 16... function load_onnx_session (line 156) | def load_onnx_session(model_path: str = None, opset_version: int = 16, f... function load_jit_vad (line 164) | def load_jit_vad(model_path: str = None): class VADIterator (line 188) | class VADIterator: method __init__ (line 195) | def __init__(self, method reset_states (line 235) | def reset_states(self): method __call__ (line 243) | def __call__(self, x, return_seconds=False, time_resolution: int = 1): class FixedVADIterator (line 288) | class FixedVADIterator(VADIterator): method reset_states (line 293) | def reset_states(self): method __call__ (line 297) | def __call__(self, x, return_seconds=False): FILE: whisperlivekit/simul_whisper/align_att_base.py class AlignAttBase (line 14) | class AlignAttBase(ABC): method speaker (line 30) | def speaker(self): method speaker (line 34) | def speaker(self, value): method global_time_offset (line 38) | def global_time_offset(self): method global_time_offset (line 42) | def global_time_offset(self, value): method _base_init (line 47) | def _base_init(self, cfg: AlignAttConfig, model): method _init_state_common (line 64) | def _init_state_common(self, cfg: AlignAttConfig): method warmup (line 75) | def warmup(self, audio): method create_tokenizer (line 84) | def create_tokenizer(self, language=None): method trim_context (line 93) | def trim_context(self): method refresh_segment (line 108) | def refresh_segment(self, complete=False): method segments_len (line 124) | def segments_len(self): method _apply_minseglen (line 127) | def _apply_minseglen(self): method _clean_cache (line 134) | def _clean_cache(self): method debug_print_tokens (line 137) | def debug_print_tokens(self, tokens): method _detect_language_if_needed (line 143) | def _detect_language_if_needed(self, encoder_feature): method infer (line 164) | def infer(self, is_last=False): method _split_tokens (line 309) | def _split_tokens(self, tokens_list, fire_detected, is_last): method _build_timestamped_words (line 322) | def _build_timestamped_words(self, split_words, split_tokens, l_absolu... method _handle_pending_tokens (line 360) | def _handle_pending_tokens(self, split_words, split_tokens): method _apply_dry_penalty (line 394) | def _apply_dry_penalty(self, logits, current_tokens): method _init_state (line 444) | def _init_state(self, cfg: AlignAttConfig): method init_tokens (line 449) | def init_tokens(self): method init_context (line 454) | def init_context(self): method insert_audio (line 459) | def insert_audio(self, segment=None): method _current_tokens (line 464) | def _current_tokens(self): method fire_at_boundary (line 469) | def fire_at_boundary(self, feature): method lang_id (line 474) | def lang_id(self, encoder_features): method _concat_segments (line 479) | def _concat_segments(self): method _encode (line 484) | def _encode(self, input_segments): method _init_sum_logprobs (line 489) | def _init_sum_logprobs(self): method _get_logits_and_cross_attn (line 494) | def _get_logits_and_cross_attn(self, tokens, encoder_feature): method _check_no_speech (line 499) | def _check_no_speech(self, logits): method _suppress_blank_tokens (line 504) | def _suppress_blank_tokens(self, logits): method _apply_token_suppression (line 509) | def _apply_token_suppression(self, logits): method _update_tokens (line 514) | def _update_tokens(self, current_tokens, logits, sum_logprobs): method _process_cross_attention (line 519) | def _process_cross_attention(self, accumulated_cross_attns, content_me... method _get_attended_frames (line 524) | def _get_attended_frames(self, attn): method _is_special_token (line 529) | def _is_special_token(self, current_tokens): method _rewind_tokens (line 534) | def _rewind_tokens(self): method _tokens_to_list (line 539) | def _tokens_to_list(self, current_tokens, start_col): method _make_new_tokens_tensor (line 544) | def _make_new_tokens_tensor(self, hypothesis): method _evaluate (line 549) | def _evaluate(self, tensor): FILE: whisperlivekit/simul_whisper/backend.py class SimulStreamingOnlineProcessor (line 36) | class SimulStreamingOnlineProcessor: method __init__ (line 40) | def __init__(self, asr, logfile=sys.stderr): method _create_alignatt (line 51) | def _create_alignatt(self): method start_silence (line 63) | def start_silence(self): method end_silence (line 67) | def end_silence(self, silence_duration, offset): method insert_audio_chunk (line 83) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time): method new_speaker (line 92) | def new_speaker(self, change_speaker: ChangeSpeaker): method get_buffer (line 99) | def get_buffer(self): method process_iter (line 103) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]: method warmup (line 125) | def warmup(self, audio, init_prompt=""): method __del__ (line 139) | def __del__(self): class SimulStreamingASR (line 148) | class SimulStreamingASR: method __init__ (line 152) | def __init__(self, logfile=sys.stderr, **kwargs): method _warmup_mlx_model (line 272) | def _warmup_mlx_model(self): method _resolve_encoder_backend (line 284) | def _resolve_encoder_backend(self, preferred_backend, compatible_whisp... method _has_custom_model_path (line 307) | def _has_custom_model_path(self): method _can_use_mlx (line 310) | def _can_use_mlx(self, compatible_whisper_mlx): method _can_use_faster (line 317) | def _can_use_faster(self, compatible_faster_whisper): method load_model (line 324) | def load_model(self): method set_translate_task (line 349) | def set_translate_task(self): method transcribe (line 360) | def transcribe(self, audio): FILE: whisperlivekit/simul_whisper/beam.py class BeamPyTorchInference (line 6) | class BeamPyTorchInference(PyTorchInference): method _kv_cache_ids (line 9) | def _kv_cache_ids(self): method rearrange_kv_cache (line 15) | def rearrange_kv_cache(self, source_indices): method logits (line 21) | def logits( FILE: whisperlivekit/simul_whisper/config.py class AlignAttConfig (line 6) | class AlignAttConfig(): FILE: whisperlivekit/simul_whisper/decoder_state.py class DecoderState (line 8) | class DecoderState: method clean_cache (line 50) | def clean_cache(self): method reset (line 73) | def reset(self, rewind_threshold: int = 200): method full_reset (line 86) | def full_reset(self, rewind_threshold: int = 200): FILE: whisperlivekit/simul_whisper/eow_detection.py function load_cif (line 5) | def load_cif(cfg, n_audio_state, device): function resize (line 25) | def resize(alphas, target_lengths, threshold=0.999): function fire_at_boundary (line 50) | def fire_at_boundary(chunked_encoder_feature: torch.Tensor, cif_linear): FILE: whisperlivekit/simul_whisper/mlx/decoder_state.py class MLXDecoderState (line 9) | class MLXDecoderState: method clean_cache (line 52) | def clean_cache(self): method reset (line 59) | def reset(self, rewind_threshold: int = 200): method full_reset (line 66) | def full_reset(self, rewind_threshold: int = 200): FILE: whisperlivekit/simul_whisper/mlx/decoders.py class MLXGreedyDecoder (line 10) | class MLXGreedyDecoder: method __init__ (line 13) | def __init__(self, temperature: float, eot: int): method update (line 17) | def update( method finalize (line 50) | def finalize(self, tokens: mx.array, sum_logprobs: mx.array): class MLXBeamSearchDecoder (line 57) | class MLXBeamSearchDecoder: method __init__ (line 60) | def __init__( method reset (line 78) | def reset(self): method update (line 82) | def update( method finalize (line 156) | def finalize(self, preceding_tokens: mx.array, sum_logprobs: mx.array): class MLXInference (line 182) | class MLXInference: method __init__ (line 185) | def __init__(self, model, initial_token_length: int): method rearrange_kv_cache (line 190) | def rearrange_kv_cache(self, source_indices: List[int]): method logits (line 209) | def logits( FILE: whisperlivekit/simul_whisper/mlx/simul_whisper.py class MLXTokenBuffer (line 20) | class MLXTokenBuffer: method __init__ (line 23) | def __init__(self, text="", tokenizer=None, prefix_token_ids=None): method as_token_ids (line 29) | def as_token_ids(self, tokenizer=None): method as_mlx_array (line 36) | def as_mlx_array(self) -> mx.array: method as_mlx_array_beam (line 40) | def as_mlx_array_beam(self, beam: int) -> mx.array: method as_text (line 44) | def as_text(self): method empty (line 48) | def empty(*a, **kw): method from_text (line 52) | def from_text(text, *a, **kw): method is_empty (line 55) | def is_empty(self): method trim_words (line 58) | def trim_words(self, num=1, after=0): method append_token_ids (line 68) | def append_token_ids(self, token_ids): function mlx_median_filter (line 89) | def mlx_median_filter(x: mx.array, filter_width: int) -> mx.array: class MLXAlignAtt (line 107) | class MLXAlignAtt(AlignAttBase): method __init__ (line 114) | def __init__( method _init_state (line 127) | def _init_state(self, cfg: AlignAttConfig): method _build_alignment_source (line 178) | def _build_alignment_source(self): method init_tokens (line 200) | def init_tokens(self): method init_context (line 211) | def init_context(self): method insert_audio (line 222) | def insert_audio(self, segment=None): method _current_tokens (line 245) | def _current_tokens(self) -> mx.array: method fire_at_boundary (line 260) | def fire_at_boundary(self, chunked_encoder_feature: mx.array) -> bool: method lang_id (line 267) | def lang_id(self, encoder_features: mx.array) -> Tuple[mx.array, List[... method _concat_segments (line 296) | def _concat_segments(self): method _encode (line 301) | def _encode(self, input_segments): method _init_sum_logprobs (line 312) | def _init_sum_logprobs(self): method _get_logits_and_cross_attn (line 315) | def _get_logits_and_cross_attn(self, tokens, encoder_feature): method _check_no_speech (line 324) | def _check_no_speech(self, logits): method _suppress_blank_tokens (line 335) | def _suppress_blank_tokens(self, logits): method _apply_token_suppression (line 340) | def _apply_token_suppression(self, logits): method _update_tokens (line 348) | def _update_tokens(self, current_tokens, logits, sum_logprobs): method _process_cross_attention (line 351) | def _process_cross_attention( method _get_attended_frames (line 398) | def _get_attended_frames(self, attn): method _is_special_token (line 403) | def _is_special_token(self, current_tokens): method _rewind_tokens (line 406) | def _rewind_tokens(self): method _tokens_to_list (line 411) | def _tokens_to_list(self, current_tokens, start_col): method _make_new_tokens_tensor (line 414) | def _make_new_tokens_tensor(self, hypothesis): method _evaluate (line 418) | def _evaluate(self, tensor): FILE: whisperlivekit/simul_whisper/mlx_encoder.py function load_mlx_encoder (line 14) | def load_mlx_encoder( function load_mlx_model (line 62) | def load_mlx_model( FILE: whisperlivekit/simul_whisper/simul_whisper.py function load_coreml_encoder (line 34) | def load_coreml_encoder(): class AlignAtt (line 51) | class AlignAtt(AlignAttBase): method __init__ (line 59) | def __init__( method _init_state (line 86) | def _init_state(self, cfg: AlignAttConfig): method init_tokens (line 139) | def init_tokens(self): method init_context (line 150) | def init_context(self): method insert_audio (line 162) | def insert_audio(self, segment=None): method _current_tokens (line 182) | def _current_tokens(self): method fire_at_boundary (line 199) | def fire_at_boundary(self, chunked_encoder_feature: torch.Tensor): method lang_id (line 207) | def lang_id(self, encoder_features): method _concat_segments (line 234) | def _concat_segments(self): method _encode (line 239) | def _encode(self, input_segments): method _init_sum_logprobs (line 305) | def _init_sum_logprobs(self): method _get_logits_and_cross_attn (line 308) | def _get_logits_and_cross_attn(self, tokens, encoder_feature): method _check_no_speech (line 321) | def _check_no_speech(self, logits): method _suppress_blank_tokens (line 330) | def _suppress_blank_tokens(self, logits): method _apply_token_suppression (line 334) | def _apply_token_suppression(self, logits): method _update_tokens (line 338) | def _update_tokens(self, current_tokens, logits, sum_logprobs): method _process_cross_attention (line 341) | def _process_cross_attention( method _get_attended_frames (line 386) | def _get_attended_frames(self, attn): method _is_special_token (line 390) | def _is_special_token(self, current_tokens): method _rewind_tokens (line 393) | def _rewind_tokens(self): method _tokens_to_list (line 398) | def _tokens_to_list(self, current_tokens, start_col): method _make_new_tokens_tensor (line 401) | def _make_new_tokens_tensor(self, hypothesis): method _evaluate (line 408) | def _evaluate(self, tensor): method infer (line 412) | def infer(self, is_last=False): FILE: whisperlivekit/simul_whisper/token_buffer.py class TokenBuffer (line 5) | class TokenBuffer: method __init__ (line 7) | def __init__(self, text="", tokenizer=None, device=None, prefix_token_... method as_token_ids (line 14) | def as_token_ids(self, tokenizer=None): method as_tensor (line 22) | def as_tensor(self, device=None): method as_tensor_beam (line 31) | def as_tensor_beam(self, beam, device=None): method as_text (line 36) | def as_text(self): method empty (line 40) | def empty(*a, **kw): method from_text (line 44) | def from_text(text, *a, **kw): method is_empty (line 47) | def is_empty(self): method trim_words (line 50) | def trim_words(self, num=1, after=0): method append_token_ids (line 67) | def append_token_ids(self, token_ids): method as_split_word_tokens (line 91) | def as_split_word_tokens(self): FILE: whisperlivekit/test_client.py class TranscriptionResult (line 39) | class TranscriptionResult: method text (line 46) | def text(self) -> str: method committed_text (line 61) | def committed_text(self) -> str: method lines (line 72) | def lines(self) -> List[dict]: method n_updates (line 80) | def n_updates(self) -> int: function reconstruct_state (line 88) | def reconstruct_state(msg: dict, lines: List[dict]) -> dict: function load_audio_pcm (line 117) | def load_audio_pcm(audio_path: str, sample_rate: int = SAMPLE_RATE) -> b... function transcribe_audio (line 137) | async def transcribe_audio( function _print_result (line 268) | def _print_result(result: TranscriptionResult, output_json: bool = False... function main (line 302) | def main(): FILE: whisperlivekit/test_data.py class TestSample (line 46) | class TestSample: method has_timestamps (line 61) | def has_timestamps(self) -> bool: function _save_wav (line 65) | def _save_wav(path: Path, audio: np.ndarray, sample_rate: int = 16000) -... function _load_metadata (line 85) | def _load_metadata() -> Dict: function _save_metadata (line 93) | def _save_metadata(meta: Dict) -> None: function _ensure_datasets (line 98) | def _ensure_datasets(): function _decode_audio (line 110) | def _decode_audio(audio_bytes: bytes) -> tuple: function _download_librispeech_samples (line 127) | def _download_librispeech_samples(n_samples: int = 3) -> List[Dict]: function _download_ami_sample (line 181) | def _download_ami_sample() -> List[Dict]: function download_test_samples (line 271) | def download_test_samples(force: bool = False) -> List[TestSample]: function get_samples (line 323) | def get_samples() -> List[TestSample]: function get_sample (line 328) | def get_sample(name: str) -> TestSample: function list_sample_names (line 345) | def list_sample_names() -> List[str]: function _meta_to_samples (line 350) | def _meta_to_samples(meta_list: List[Dict]) -> List[TestSample]: FILE: whisperlivekit/test_harness.py function _parse_time (line 63) | def _parse_time(time_str: str) -> float: function load_audio_pcm (line 73) | def load_audio_pcm(audio_path: str, sample_rate: int = SAMPLE_RATE) -> b... class TestState (line 95) | class TestState: method from_front_data (line 115) | def from_front_data(cls, front_data: FrontData, audio_position: float ... method text (line 132) | def text(self) -> str: method committed_text (line 140) | def committed_text(self) -> str: method committed_word_count (line 145) | def committed_word_count(self) -> int: method buffer_word_count (line 151) | def buffer_word_count(self) -> int: method speakers (line 158) | def speakers(self) -> Set[int]: method n_speakers (line 163) | def n_speakers(self) -> int: method speaker_at (line 166) | def speaker_at(self, time_s: float) -> Optional[int]: method speakers_in (line 171) | def speakers_in(self, start_s: float, end_s: float) -> Set[int]: method speaker_timeline (line 180) | def speaker_timeline(self) -> List[Dict[str, Any]]: method n_speaker_changes (line 192) | def n_speaker_changes(self) -> int: method has_silence (line 203) | def has_silence(self) -> bool: method silence_segments (line 208) | def silence_segments(self) -> List[Dict[str, Any]]: method silence_at (line 212) | def silence_at(self, time_s: float) -> bool: method speech_lines (line 220) | def speech_lines(self) -> List[Dict[str, Any]]: method line_at (line 224) | def line_at(self, time_s: float) -> Optional[Dict[str, Any]]: method text_at (line 233) | def text_at(self, time_s: float) -> Optional[str]: method lines_between (line 238) | def lines_between(self, start_s: float, end_s: float) -> List[Dict[str... method text_between (line 248) | def text_between(self, start_s: float, end_s: float) -> str: method wer (line 257) | def wer(self, reference: str) -> float: method wer_detailed (line 267) | def wer_detailed(self, reference: str) -> Dict: method timestamps (line 275) | def timestamps(self) -> List[Dict[str, Any]]: method timing_valid (line 288) | def timing_valid(self) -> bool: method timing_monotonic (line 298) | def timing_monotonic(self) -> bool: method timing_errors (line 306) | def timing_errors(self) -> List[str]: class AudioPlayer (line 332) | class AudioPlayer: method __init__ (line 349) | def __init__(self, harness: "TestHarness", pcm_data: bytes, sample_rat... method position (line 357) | def position(self) -> float: method duration (line 362) | def duration(self) -> float: method remaining (line 367) | def remaining(self) -> float: method done (line 372) | def done(self) -> bool: method play (line 376) | async def play( method play_until (line 404) | async def play_until( method seek (line 421) | def seek(self, time_s: float) -> None: method reset (line 427) | def reset(self) -> None: class TestHarness (line 436) | class TestHarness: method __init__ (line 462) | def __init__(self, **kwargs: Any): method __aenter__ (line 473) | async def __aenter__(self) -> "TestHarness": method __aexit__ (line 493) | async def __aexit__(self, *exc: Any) -> None: method _collect_results (line 503) | async def _collect_results(self) -> None: method state (line 519) | def state(self) -> TestState: method history (line 524) | def history(self) -> List[TestState]: method audio_position (line 529) | def audio_position(self) -> float: method metrics (line 534) | def metrics(self): method on_update (line 540) | def on_update(self, callback: Callable[[TestState], None]) -> None: method load_audio (line 546) | def load_audio(self, source) -> AudioPlayer: method feed (line 559) | async def feed( method feed_pcm (line 577) | async def feed_pcm( method pause (line 603) | async def pause(self, duration_s: float, speed: float = 1.0) -> None: method silence (line 617) | async def silence(self, duration_s: float, speed: float = 1.0) -> None: method wait_for (line 623) | async def wait_for( method wait_for_text (line 646) | async def wait_for_text(self, timeout: float = 30.0) -> TestState: method wait_for_lines (line 650) | async def wait_for_lines(self, n: int = 1, timeout: float = 30.0) -> T... method wait_for_silence (line 654) | async def wait_for_silence(self, timeout: float = 30.0) -> TestState: method wait_for_speakers (line 658) | async def wait_for_speakers(self, n: int = 2, timeout: float = 30.0) -... method drain (line 662) | async def drain(self, seconds: float = 2.0) -> None: method finish (line 671) | async def finish(self, timeout: float = 30.0) -> TestState: method cut (line 687) | async def cut(self, timeout: float = 5.0) -> TestState: method snapshot_at (line 707) | def snapshot_at(self, audio_time: float) -> Optional[TestState]: method print_state (line 729) | def print_state(self) -> None: FILE: whisperlivekit/thread_safety.py function get_model_lock (line 44) | def get_model_lock(): function acquire_model_lock (line 49) | def acquire_model_lock(timeout=None): function release_model_lock (line 71) | def release_model_lock(): class ModelLockContext (line 83) | class ModelLockContext: method __init__ (line 86) | def __init__(self, timeout=None): method __enter__ (line 90) | def __enter__(self): method __exit__ (line 94) | def __exit__(self, exc_type, exc_val, exc_tb): function print_deployment_recommendations (line 104) | def print_deployment_recommendations(): FILE: whisperlivekit/timed_objects.py function format_time (line 6) | def format_time(seconds: float) -> str: class Timed (line 18) | class Timed: class TimedText (line 23) | class TimedText(Timed): method has_punctuation (line 28) | def has_punctuation(self) -> bool: method is_within (line 31) | def is_within(self, other: 'TimedText') -> bool: method duration (line 34) | def duration(self) -> float: method contains_timespan (line 37) | def contains_timespan(self, other: 'TimedText') -> bool: method __bool__ (line 40) | def __bool__(self) -> bool: method __str__ (line 43) | def __str__(self) -> str: class ASRToken (line 47) | class ASRToken(TimedText): method with_offset (line 50) | def with_offset(self, offset: float) -> "ASRToken": method is_silence (line 54) | def is_silence(self) -> bool: class Sentence (line 59) | class Sentence(TimedText): class Transcript (line 63) | class Transcript(TimedText): method from_tokens (line 69) | def from_tokens( class SpeakerSegment (line 88) | class SpeakerSegment(Timed): class Translation (line 96) | class Translation(TimedText): class Silence (line 100) | class Silence(): method compute_duration (line 107) | def compute_duration(self) -> Optional[float]: method is_silence (line 113) | def is_silence(self) -> bool: class Segment (line 118) | class Segment(TimedText): method from_tokens (line 128) | def from_tokens( method is_silence (line 155) | def is_silence(self) -> bool: method to_dict (line 159) | def to_dict(self) -> Dict[str, Any]: class PuncSegment (line 175) | class PuncSegment(Segment): class SilentSegment (line 178) | class SilentSegment(Segment): method __init__ (line 179) | def __init__(self, *args: Any, **kwargs: Any) -> None: class FrontData (line 186) | class FrontData(): method to_dict (line 196) | def to_dict(self) -> Dict[str, Any]: class ChangeSpeaker (line 212) | class ChangeSpeaker: class State (line 217) | class State(): FILE: whisperlivekit/tokens_alignment.py class TokensAlignment (line 17) | class TokensAlignment: method __init__ (line 19) | def __init__(self, state: Any, args: Any, sep: Optional[str]) -> None: method update (line 45) | def update(self) -> None: method _prune (line 57) | def _prune(self) -> None: method add_translation (line 90) | def add_translation(self, segment: Segment) -> None: method compute_punctuations_segments (line 102) | def compute_punctuations_segments(self, tokens: Optional[List[ASRToken... method compute_new_punctuations_segments (line 134) | def compute_new_punctuations_segments(self) -> List[PuncSegment]: method concatenate_diar_segments (line 163) | def concatenate_diar_segments(self) -> List[SpeakerSegment]: method intersection_duration (line 177) | def intersection_duration(seg1: TimedText, seg2: TimedText) -> float: method get_lines_diarization (line 184) | def get_lines_diarization(self) -> Tuple[List[Segment], str]: method get_lines (line 217) | def get_lines( FILE: whisperlivekit/vllm_realtime.py class VLLMRealtimeASR (line 27) | class VLLMRealtimeASR: method __init__ (line 34) | def __init__(self, vllm_url="ws://localhost:8000/v1/realtime", method transcribe (line 41) | def transcribe(self, audio): class VLLMRealtimeOnlineProcessor (line 45) | class VLLMRealtimeOnlineProcessor: method __init__ (line 57) | def __init__(self, asr: VLLMRealtimeASR): method _reset_state (line 70) | def _reset_state(self): method insert_audio_chunk (line 89) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:... method process_iter (line 94) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]: method get_buffer (line 101) | def get_buffer(self) -> Transcript: method start_silence (line 115) | def start_silence(self) -> Tuple[List[ASRToken], float]: method end_silence (line 148) | def end_silence(self, silence_duration: float, offset: float): method new_speaker (line 152) | def new_speaker(self, change_speaker): method warmup (line 155) | def warmup(self, audio, init_prompt=""): method finish (line 158) | def finish(self) -> Tuple[List[ASRToken], float]: method _connect (line 181) | def _connect(self): method _close_ws (line 206) | def _close_ws(self): method _recv_loop (line 219) | def _recv_loop(self): method _send_commit (line 259) | def _send_commit(self, final: bool): method _send_audio (line 271) | def _send_audio(self, audio: np.ndarray): method _send_pending_audio (line 289) | def _send_pending_audio(self): method _drain_deltas (line 313) | def _drain_deltas(self): method _wait_for_done (line 317) | def _wait_for_done(self, timeout: float = 10.0): method _time_for_word (line 328) | def _time_for_word(self, word_idx: int, n_words_total: int) -> Tuple[f... method _extract_new_words (line 338) | def _extract_new_words(self) -> List[ASRToken]: method _flush_all_pending_words (line 359) | def _flush_all_pending_words(self) -> List[ASRToken]: method _process_iter_inner (line 382) | def _process_iter_inner(self, is_last: bool) -> Tuple[List[ASRToken], ... FILE: whisperlivekit/voxtral_hf_streaming.py class VoxtralHFStreamingASR (line 23) | class VoxtralHFStreamingASR: method __init__ (line 28) | def __init__(self, logfile=sys.stderr, **kwargs): method transcribe (line 63) | def transcribe(self, audio): class VoxtralHFStreamingOnlineProcessor (line 67) | class VoxtralHFStreamingOnlineProcessor: method __init__ (line 78) | def __init__(self, asr: VoxtralHFStreamingASR, logfile=sys.stderr): method _reset_state (line 104) | def _reset_state(self): method _get_pending_audio (line 135) | def _get_pending_audio(self) -> np.ndarray: method _set_pending_audio (line 145) | def _set_pending_audio(self, arr: np.ndarray): method _get_accumulated_text (line 154) | def _get_accumulated_text(self) -> str: method insert_audio_chunk (line 166) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:... method process_iter (line 172) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]: method get_buffer (line 179) | def get_buffer(self) -> Transcript: method start_silence (line 197) | def start_silence(self) -> Tuple[List[ASRToken], float]: method end_silence (line 239) | def end_silence(self, silence_duration: float, offset: float): method new_speaker (line 243) | def new_speaker(self, change_speaker): method warmup (line 246) | def warmup(self, audio, init_prompt=""): method finish (line 249) | def finish(self) -> Tuple[List[ASRToken], float]: method _start_generate_thread (line 280) | def _start_generate_thread(self): method _feed_pending_audio (line 356) | def _feed_pending_audio(self): method _append_text_fragment (line 371) | def _append_text_fragment(self, text_fragment: str): method _drain_streamer (line 378) | def _drain_streamer(self): method _drain_streamer_blocking (line 396) | def _drain_streamer_blocking(self, timeout=30.0): method _pos_to_time (line 445) | def _pos_to_time(self, token_position: int) -> float: method _audio_pos_for_char (line 449) | def _audio_pos_for_char(self, char_idx: int) -> int: method _word_timestamps (line 468) | def _word_timestamps(self, text: str, words: List[str], start_idx: int... method _extract_new_words (line 483) | def _extract_new_words(self) -> List[ASRToken]: method _flush_all_pending_words (line 510) | def _flush_all_pending_words(self) -> List[ASRToken]: method _process_iter_inner (line 538) | def _process_iter_inner(self, is_last: bool) -> Tuple[List[ASRToken], ... FILE: whisperlivekit/voxtral_mlx/loader.py function download_weights (line 43) | def download_weights(model_id: str = DEFAULT_MODEL_ID) -> Path: function _translate_weight_name (line 113) | def _translate_weight_name(name: str) -> str | None: function _is_conv_weight (line 122) | def _is_conv_weight(name: str) -> bool: function _remap_converted_name (line 164) | def _remap_converted_name(name: str) -> str: function _has_converted_layout (line 180) | def _has_converted_layout(path: Path) -> bool: function _load_converted_weights (line 184) | def _load_converted_weights(path: Path): function _load_original_weights (line 219) | def _load_original_weights(path: Path): function _load_tokenizer (line 253) | def _load_tokenizer(model_dir: Path): function load_voxtral_model (line 262) | def load_voxtral_model(path_or_id: str = DEFAULT_MODEL_ID): FILE: whisperlivekit/voxtral_mlx/model.py class SlidingKVCache (line 22) | class SlidingKVCache: method __init__ (line 32) | def __init__(self, capacity: int): method offset (line 40) | def offset(self) -> int: method _reorder (line 45) | def _reorder(self, buf): method _drop_oldest (line 56) | def _drop_oldest(self, buf, n_drop, tail=None): method _append_concat (line 64) | def _append_concat(self, k, v): method _write_inplace (line 79) | def _write_inplace(self, k, v): method update_and_fetch (line 121) | def update_and_fetch(self, k, v): class CausalConv (line 132) | class CausalConv(nn.Module): method __init__ (line 135) | def __init__(self, channels_in: int, channels_out: int, kernel: int, s... method __call__ (line 143) | def __call__(self, x: mx.array) -> mx.array: class _EncoderSelfAttention (line 149) | class _EncoderSelfAttention(nn.Module): method __init__ (line 150) | def __init__(self, dim: int, n_heads: int, head_dim: int, rope_theta: ... method __call__ (line 161) | def __call__(self, x, mask, cache=None): class _EncoderFFN (line 178) | class _EncoderFFN(nn.Module): method __init__ (line 181) | def __init__(self, dim: int, hidden: int): method __call__ (line 187) | def __call__(self, x): class _EncoderBlock (line 191) | class _EncoderBlock(nn.Module): method __init__ (line 192) | def __init__(self, dim, n_heads, head_dim, hidden, rope_theta): method __call__ (line 199) | def __call__(self, x, mask, cache=None): class StreamingEncoder (line 205) | class StreamingEncoder(nn.Module): method __init__ (line 210) | def __init__( method _apply_convs (line 233) | def _apply_convs(self, mel: mx.array) -> mx.array: method forward (line 239) | def forward(self, mel: mx.array) -> mx.array: method forward_conv_incremental (line 247) | def forward_conv_incremental(self, x_in, tail1, tail2): method forward_transformer_incremental (line 280) | def forward_transformer_incremental(self, x, cache_list): class _DecoderAttention (line 292) | class _DecoderAttention(nn.Module): method __init__ (line 295) | def __init__(self, dim, n_heads, n_kv_heads, head_dim, rope_theta): method __call__ (line 307) | def __call__(self, x, mask=None, cache=None): class _DecoderFFN (line 324) | class _DecoderFFN(nn.Module): method __init__ (line 327) | def __init__(self, dim, hidden): method __call__ (line 333) | def __call__(self, x): class AdaptiveScaling (line 337) | class AdaptiveScaling(nn.Module): method __init__ (line 341) | def __init__(self, dim, bottleneck): method __call__ (line 346) | def __call__(self, cond): class _DecoderBlock (line 350) | class _DecoderBlock(nn.Module): method __init__ (line 351) | def __init__(self, dim, n_heads, n_kv_heads, head_dim, hidden, rope_th... method __call__ (line 359) | def __call__(self, x, delay_cond, mask=None, cache=None): class TextDecoder (line 366) | class TextDecoder(nn.Module): method __init__ (line 369) | def __init__( method embed (line 389) | def embed(self, token_ids: mx.array) -> mx.array: method __call__ (line 392) | def __call__(self, x, delay_cond, mask=None, cache=None): class EncoderToDecoderAdapter (line 406) | class EncoderToDecoderAdapter(nn.Module): method __init__ (line 409) | def __init__(self, enc_dim: int, dec_dim: int): method __call__ (line 414) | def __call__(self, x): class DelayEmbedding (line 418) | class DelayEmbedding(nn.Module): method __init__ (line 422) | def __init__(self, dim: int = 3072, theta: float = 10000.0): method __call__ (line 429) | def __call__(self, delay: mx.array) -> mx.array: class VoxtralMLXModel (line 440) | class VoxtralMLXModel(nn.Module): method __init__ (line 443) | def __init__(self, config: dict): method encode (line 484) | def encode(self, mel: mx.array) -> mx.array: method encode_incremental (line 503) | def encode_incremental(self, new_mel, conv_tail1, conv_tail2, enc_cach... method decode (line 532) | def decode(self, embeddings, delay_cond, mask=None, cache=None): FILE: whisperlivekit/voxtral_mlx/spectrogram.py function _build_slaney_filterbank (line 32) | def _build_slaney_filterbank( function _mel_filters (line 86) | def _mel_filters() -> mx.array: function _hann_window (line 102) | def _hann_window() -> mx.array: function _dft_matrices (line 109) | def _dft_matrices(): function _stft_frames (line 123) | def _stft_frames(audio: mx.array, window: mx.array) -> mx.array: function _apply_mel_and_log (line 140) | def _apply_mel_and_log(power: mx.array) -> mx.array: function compute_mel (line 152) | def compute_mel(audio: np.ndarray) -> mx.array: function compute_mel_streaming (line 172) | def compute_mel_streaming( function pad_audio (line 206) | def pad_audio( FILE: whisperlivekit/voxtral_mlx_asr.py function _prompt_tokens (line 43) | def _prompt_tokens(tokenizer, n_left_pad=LEFT_PAD_TOKENS, n_delay=6): class VoxtralMLXASR (line 55) | class VoxtralMLXASR: method __init__ (line 62) | def __init__(self, logfile=sys.stderr, **kwargs): method transcribe (line 84) | def transcribe(self, audio): class VoxtralMLXOnlineProcessor (line 93) | class VoxtralMLXOnlineProcessor: method __init__ (line 107) | def __init__(self, asr: VoxtralMLXASR, logfile=sys.stderr): method _reset_state (line 141) | def _reset_state(self): method _get_pending (line 177) | def _get_pending(self) -> np.ndarray: method _set_pending (line 187) | def _set_pending(self, arr: np.ndarray): method insert_audio_chunk (line 196) | def insert_audio_chunk(self, audio: np.ndarray, audio_stream_end_time:... method process_iter (line 205) | def process_iter(self, is_last=False) -> Tuple[List[ASRToken], float]: method _step (line 212) | def _step(self, is_last: bool) -> Tuple[List[ASRToken], float]: method _encode_pending (line 285) | def _encode_pending(self): method _do_prefill (line 323) | def _do_prefill(self): method _decode_positions (line 344) | def _decode_positions(self, n: int) -> bool: method _trim_embeds (line 396) | def _trim_embeds(self, n_consumed: int): method _sample (line 402) | def _sample(self, logits: mx.array) -> mx.array: method _audio_pos_to_time (line 407) | def _audio_pos_to_time(self, pos: int) -> float: method _word_time_range (line 411) | def _word_time_range(self, word_idx: int, n_words: int) -> Tuple[float... method _extract_committed_words (line 439) | def _extract_committed_words(self) -> List[ASRToken]: method _flush_all_words (line 457) | def _flush_all_words(self) -> List[ASRToken]: method get_buffer (line 477) | def get_buffer(self) -> Transcript: method _safe_decode_remaining (line 486) | def _safe_decode_remaining(self): method _flush_last_token_text (line 504) | def _flush_last_token_text(self): method _close_current_word (line 528) | def _close_current_word(self): method _flush_and_reset (line 535) | def _flush_and_reset(self) -> List[ASRToken]: method start_silence (line 585) | def start_silence(self) -> Tuple[List[ASRToken], float]: method end_silence (line 597) | def end_silence(self, silence_duration: float, offset: float): method new_speaker (line 601) | def new_speaker(self, change_speaker): method warmup (line 604) | def warmup(self, audio, init_prompt=""): method finish (line 607) | def finish(self) -> Tuple[List[ASRToken], float]: FILE: whisperlivekit/warmup.py function load_file (line 6) | def load_file(warmup_file=None, timeout=5): function warmup_asr (line 43) | def warmup_asr(asr, warmup_file=None, timeout=5): FILE: whisperlivekit/web/live_transcription.js function getWaveStroke (line 69) | function getWaveStroke() { function updateWaveStroke (line 76) | function updateWaveStroke() { function applyTheme (line 80) | function applyTheme(pref) { function enumerateMicrophones (line 119) | async function enumerateMicrophones() { function populateMicrophoneSelect (line 135) | function populateMicrophoneSelect() { function handleMicrophoneChange (line 154) | function handleMicrophoneChange() { function fmt1 (line 175) | function fmt1(x) { function setupWebSocket (line 215) | function setupWebSocket() { function renderLinesWithBuffer (line 333) | function renderLinesWithBuffer( function updateTimer (line 469) | function updateTimer() { function drawWaveform (line 478) | function drawWaveform() { function startRecording (line 520) | async function startRecording() { function stopRecording (line 639) | async function stopRecording() { function toggleRecording (line 725) | async function toggleRecording() { function updateUI (line 751) | function updateUI() { function checkAndRequestPermissions (line 802) | async function checkAndRequestPermissions() { FILE: whisperlivekit/web/pcm_worklet.js class PCMForwarder (line 1) | class PCMForwarder extends AudioWorkletProcessor { method process (line 2) | process(inputs) { FILE: whisperlivekit/web/recorder_worker.js function init (line 15) | function init(config) { function record (line 20) | function record(inputBuffer) { function resample (line 27) | function resample(buffer, from, to) { function toPCM (line 50) | function toPCM(input) { FILE: whisperlivekit/web/web_interface.py function get_web_interface_html (line 7) | def get_web_interface_html(): function get_inline_ui_html (line 16) | def get_inline_ui_html(): function get (line 113) | async def get(): FILE: whisperlivekit/whisper/__init__.py function _download (line 57) | def _download(url: str, root: str, in_memory: bool) -> Union[bytes, str]: function available_models (line 101) | def available_models() -> List[str]: function _infer_dims_from_config (line 106) | def _infer_dims_from_config(path: str) -> Optional[ModelDimensions]: function _convert_hf_state_dict (line 163) | def _convert_hf_state_dict(state_dict: Dict[str, torch.Tensor]) -> Dict[... function _convert_mlx_state_dict (line 256) | def _convert_mlx_state_dict(state_dict: Dict[str, torch.Tensor]) -> Dict... function _load_lora_state (line 274) | def _load_lora_state(lora_path: str): function _collapse_hf_module_name (line 292) | def _collapse_hf_module_name(module: str): function _resolve_lora_path (line 302) | def _resolve_lora_path(lora_path: Optional[str]) -> Optional[str]: function _apply_lora_adapter (line 337) | def _apply_lora_adapter(state_dict: Dict[str, Tensor], lora_path: Option... function _load_checkpoint (line 397) | def _load_checkpoint( function _load_sharded_checkpoint (line 434) | def _load_sharded_checkpoint( function load_model (line 466) | def load_model( function convert_encoder_to_coreml (line 599) | def convert_encoder_to_coreml( FILE: whisperlivekit/whisper/audio.py function load_audio (line 25) | def load_audio(file: str, sr: int = SAMPLE_RATE): function pad_or_trim (line 65) | def pad_or_trim(array, length: int = N_SAMPLES, *, axis: int = -1): function mel_filters (line 92) | def mel_filters(device, n_mels: int) -> torch.Tensor: function log_mel_spectrogram (line 110) | def log_mel_spectrogram( FILE: whisperlivekit/whisper/decoding.py function detect_language (line 19) | def detect_language( class DecodingOptions (line 81) | class DecodingOptions: class DecodingResult (line 118) | class DecodingResult: class Inference (line 130) | class Inference: method logits (line 131) | def logits(self, tokens: Tensor, audio_features: Tensor) -> Tensor: method rearrange_kv_cache (line 135) | def rearrange_kv_cache(self, source_indices) -> None: method cleanup_caching (line 139) | def cleanup_caching(self) -> None: class PyTorchInference (line 144) | class PyTorchInference(Inference): method __init__ (line 145) | def __init__(self, model: "Whisper", initial_token_length: int): method logits (line 155) | def logits(self, tokens: Tensor, audio_features: Tensor) -> Tensor: method cleanup_caching (line 162) | def cleanup_caching(self): method rearrange_kv_cache (line 165) | def rearrange_kv_cache(self, source_indices): class SequenceRanker (line 173) | class SequenceRanker: method rank (line 174) | def rank( class MaximumLikelihoodRanker (line 184) | class MaximumLikelihoodRanker(SequenceRanker): method __init__ (line 190) | def __init__(self, length_penalty: Optional[float]): method rank (line 193) | def rank(self, tokens: List[List[Tensor]], sum_logprobs: List[List[flo... class TokenDecoder (line 210) | class TokenDecoder: method reset (line 211) | def reset(self): method update (line 214) | def update( method finalize (line 241) | def finalize( class GreedyDecoder (line 266) | class GreedyDecoder(TokenDecoder): method __init__ (line 267) | def __init__(self, temperature: float, eot: int): method update (line 271) | def update( method finalize (line 289) | def finalize(self, tokens: Tensor, sum_logprobs: Tensor): class BeamSearchDecoder (line 295) | class BeamSearchDecoder(TokenDecoder): method __init__ (line 296) | def __init__( method reset (line 314) | def reset(self): method update (line 317) | def update( method finalize (line 378) | def finalize(self, preceding_tokens: Tensor, sum_logprobs: Tensor): class LogitFilter (line 401) | class LogitFilter: method apply (line 402) | def apply(self, logits: Tensor, tokens: Tensor) -> None: class SuppressBlank (line 417) | class SuppressBlank(LogitFilter): method __init__ (line 418) | def __init__(self, tokenizer: Tokenizer, sample_begin: int): method apply (line 422) | def apply(self, logits: Tensor, tokens: Tensor): class SuppressTokens (line 427) | class SuppressTokens(LogitFilter): method __init__ (line 428) | def __init__(self, suppress_tokens: Sequence[int]): method apply (line 431) | def apply(self, logits: Tensor, tokens: Tensor): class ApplyTimestampRules (line 435) | class ApplyTimestampRules(LogitFilter): method __init__ (line 436) | def __init__( method apply (line 446) | def apply(self, logits: Tensor, tokens: Tensor): class DecodingTask (line 502) | class DecodingTask: method __init__ (line 508) | def __init__(self, model: "Whisper", options: DecodingOptions): method _verify_options (line 566) | def _verify_options(self, options: DecodingOptions) -> DecodingOptions: method _get_initial_tokens (line 581) | def _get_initial_tokens(self) -> Tuple[int]: method _get_suppress_tokens (line 609) | def _get_suppress_tokens(self) -> Tuple[int]: method _get_audio_features (line 638) | def _get_audio_features(self, mel: Tensor): method _detect_language (line 660) | def _detect_language(self, audio_features: Tensor, tokens: Tensor): method _main_loop (line 674) | def _main_loop(self, audio_features: Tensor, tokens: Tensor): method run (line 707) | def run(self, mel: Tensor) -> List[DecodingResult]: function decode (line 787) | def decode( FILE: whisperlivekit/whisper/model.py class ModelDimensions (line 26) | class ModelDimensions: class LayerNorm (line 39) | class LayerNorm(nn.LayerNorm): method forward (line 40) | def forward(self, x: Tensor) -> Tensor: class Linear (line 44) | class Linear(nn.Linear): method forward (line 45) | def forward(self, x: Tensor) -> Tensor: class Conv1d (line 53) | class Conv1d(nn.Conv1d): method _conv_forward (line 54) | def _conv_forward( function sinusoids (line 62) | def sinusoids(length, channels, max_timescale=10000): function disable_sdpa (line 72) | def disable_sdpa(): class MultiHeadAttention (line 81) | class MultiHeadAttention(nn.Module): method __init__ (line 84) | def __init__(self, n_state: int, n_head: int, cache_id: str = "", n_te... method forward (line 100) | def forward( method _update_self_attn_cache (line 130) | def _update_self_attn_cache( method qkv_attention (line 148) | def qkv_attention( class ResidualAttentionBlock (line 176) | class ResidualAttentionBlock(nn.Module): method __init__ (line 177) | def __init__( method forward (line 201) | def forward( class AudioEncoder (line 224) | class AudioEncoder(nn.Module): method __init__ (line 225) | def __init__( method forward (line 238) | def forward(self, x: Tensor): class TextDecoder (line 257) | class TextDecoder(nn.Module): method __init__ (line 258) | def __init__( method forward (line 281) | def forward( class Whisper (line 335) | class Whisper(nn.Module): method __init__ (line 336) | def __init__(self, dims: ModelDimensions, decoder_only: bool = False): method set_alignment_heads (line 363) | def set_alignment_heads(self, dump: bytes): method embed_audio (line 372) | def embed_audio(self, mel: torch.Tensor): method logits (line 375) | def logits( method forward (line 388) | def forward( method device (line 394) | def device(self): method is_multilingual (line 398) | def is_multilingual(self): method num_languages (line 402) | def num_languages(self): FILE: whisperlivekit/whisper/normalizers/basic.py function remove_symbols_and_diacritics (line 27) | def remove_symbols_and_diacritics(s: str, keep=""): function remove_symbols (line 50) | def remove_symbols(s: str): class BasicTextNormalizer (line 60) | class BasicTextNormalizer: method __init__ (line 61) | def __init__(self, remove_diacritics: bool = False, split_letters: boo... method __call__ (line 67) | def __call__(self, s: str): FILE: whisperlivekit/whisper/normalizers/english.py class EnglishNumberNormalizer (line 12) | class EnglishNumberNormalizer: method __init__ (line 23) | def __init__(self): method process_words (line 165) | def process_words(self, words: List[str]) -> Iterator[str]: method preprocess (line 388) | def preprocess(self, s: str): method postprocess (line 417) | def postprocess(self, s: str): method __call__ (line 442) | def __call__(self, s: str): class EnglishSpellingNormalizer (line 450) | class EnglishSpellingNormalizer: method __init__ (line 457) | def __init__(self): method __call__ (line 461) | def __call__(self, s: str): class EnglishTextNormalizer (line 465) | class EnglishTextNormalizer: method __init__ (line 466) | def __init__(self): method __call__ (line 526) | def __call__(self, s: str): FILE: whisperlivekit/whisper/timing.py function median_filter (line 19) | def median_filter(x: torch.Tensor, filter_width: int): function backtrace (line 58) | def backtrace(trace: np.ndarray): function dtw_cpu (line 83) | def dtw_cpu(x: np.ndarray): function dtw_cuda (line 108) | def dtw_cuda(x, BLOCK_SIZE=1024): function dtw (line 141) | def dtw(x: torch.Tensor) -> np.ndarray: class WordTiming (line 155) | class WordTiming: function find_alignment (line 163) | def find_alignment( function merge_punctuations (line 245) | def merge_punctuations(alignment: List[WordTiming], prepended: str, appe... function add_word_timestamps (line 279) | def add_word_timestamps( FILE: whisperlivekit/whisper/tokenizer.py class Tokenizer (line 132) | class Tokenizer: method __post_init__ (line 142) | def __post_init__(self): method encode (line 161) | def encode(self, text, **kwargs): method decode (line 164) | def decode(self, token_ids: List[int], **kwargs) -> str: method decode_with_timestamps (line 168) | def decode_with_timestamps(self, token_ids: List[int], **kwargs) -> str: method eot (line 176) | def eot(self) -> int: method transcribe (line 180) | def transcribe(self) -> int: method translate (line 184) | def translate(self) -> int: method sot (line 188) | def sot(self) -> int: method sot_lm (line 192) | def sot_lm(self) -> int: method sot_prev (line 196) | def sot_prev(self) -> int: method no_speech (line 200) | def no_speech(self) -> int: method no_timestamps (line 204) | def no_timestamps(self) -> int: method timestamp_begin (line 208) | def timestamp_begin(self) -> int: method language_token (line 212) | def language_token(self) -> int: method to_language_token (line 219) | def to_language_token(self, language): method all_language_tokens (line 226) | def all_language_tokens(self) -> Tuple[int]: method all_language_codes (line 234) | def all_language_codes(self) -> Tuple[str]: method sot_sequence_including_notimestamps (line 238) | def sot_sequence_including_notimestamps(self) -> Tuple[int]: method non_speech_tokens (line 242) | def non_speech_tokens(self) -> Tuple[int]: method split_to_word_tokens (line 277) | def split_to_word_tokens(self, tokens: List[int]): method split_tokens_on_unicode (line 286) | def split_tokens_on_unicode(self, tokens: List[int]): method split_tokens_on_spaces (line 316) | def split_tokens_on_spaces(self, tokens: List[int]): function get_encoding (line 336) | def get_encoding(name: str = "gpt2", num_languages: int = 99): function get_tokenizer (line 372) | def get_tokenizer( FILE: whisperlivekit/whisper/transcribe.py function transcribe (line 21) | def transcribe( function cli (line 500) | def cli(): FILE: whisperlivekit/whisper/triton_ops.py function dtw_kernel (line 14) | def dtw_kernel( function median_kernel (line 44) | def median_kernel(filter_width: int): function median_filter_cuda (line 106) | def median_filter_cuda(x: torch.Tensor, filter_width: int): FILE: whisperlivekit/whisper/utils.py function make_safe (line 12) | def make_safe(string): function make_safe (line 19) | def make_safe(string): function exact_div (line 24) | def exact_div(x, y): function str2bool (line 29) | def str2bool(string): function optional_int (line 37) | def optional_int(string): function optional_float (line 41) | def optional_float(string): function compression_ratio (line 45) | def compression_ratio(text) -> float: function format_timestamp (line 50) | def format_timestamp( function get_start (line 71) | def get_start(segments: List[dict]) -> Optional[float]: function get_end (line 78) | def get_end(segments: List[dict]) -> Optional[float]: class ResultWriter (line 85) | class ResultWriter: method __init__ (line 88) | def __init__(self, output_dir: str): method __call__ (line 91) | def __call__( method write_result (line 103) | def write_result( class WriteTXT (line 109) | class WriteTXT(ResultWriter): method write_result (line 112) | def write_result( class SubtitlesWriter (line 119) | class SubtitlesWriter(ResultWriter): method iterate_result (line 123) | def iterate_result( method format_timestamp (line 230) | def format_timestamp(self, seconds: float): class WriteVTT (line 238) | class WriteVTT(SubtitlesWriter): method write_result (line 243) | def write_result( class WriteSRT (line 251) | class WriteSRT(SubtitlesWriter): method write_result (line 256) | def write_result( class WriteTSV (line 265) | class WriteTSV(ResultWriter): method write_result (line 277) | def write_result( class WriteJSON (line 287) | class WriteJSON(ResultWriter): method write_result (line 290) | def write_result( function get_writer (line 296) | def get_writer( FILE: whisperlivekit/whisper/val.py class Value (line 31) | class Value: method __init__ (line 34) | def __init__(self, data, children=(), local_grads=()): method __add__ (line 40) | def __add__(self, other): method __mul__ (line 44) | def __mul__(self, other): method __pow__ (line 48) | def __pow__(self, other): return Value(self.data**other, (self,), (oth... method log (line 49) | def log(self): return Value(math.log(self.data), (self,), (1/self.data,)) method exp (line 50) | def exp(self): return Value(math.exp(self.data), (self,), (math.exp(se... method relu (line 51) | def relu(self): return Value(max(0, self.data), (self,), (float(self.d... method __neg__ (line 52) | def __neg__(self): return self * -1 method __radd__ (line 53) | def __radd__(self, other): return self + other method __sub__ (line 54) | def __sub__(self, other): return self + (-other) method __rsub__ (line 55) | def __rsub__(self, other): return other + (-self) method __rmul__ (line 56) | def __rmul__(self, other): return self * other method __truediv__ (line 57) | def __truediv__(self, other): return self * other**-1 method __rtruediv__ (line 58) | def __rtruediv__(self, other): return other * self**-1 method backward (line 60) | def backward(self): function linear (line 95) | def linear(x, w): function softmax (line 99) | def softmax(logits): function rmsnorm (line 105) | def rmsnorm(x): function gpt (line 110) | def gpt(token_id, pos_id, keys, values):