SYMBOL INDEX (298 symbols across 26 files) FILE: app.py function process_json_and_generate_audio (line 12) | def process_json_and_generate_audio(prompt_audio_role0_file, prompt_text... function update_ui_language (line 137) | def update_ui_language(language): FILE: inference.py class Model (line 18) | class Model(object): method __init__ (line 19) | def __init__(self): method _clean_text (line 52) | def _clean_text(self, text): method _process_text (line 66) | def _process_text(self, js): method inference (line 76) | def inference(self, js, streaming=False): method infer_with_prompt (line 90) | def infer_with_prompt(self, js): method infer_with_prompt_streaming (line 175) | def infer_with_prompt_streaming(self, js): method infer_without_prompt (line 255) | def infer_without_prompt(self, js): method infer_without_prompt_streaming (line 312) | def infer_without_prompt_streaming(self, js): FILE: modules/audio_detokenizer/audio_detokenizer.py class PrefixStreamingFlowMatchingDetokenizer (line 8) | class PrefixStreamingFlowMatchingDetokenizer: method __init__ (line 9) | def __init__(self, vocoder: BigVGANWrapper, fm: StreamingSemanticFMWra... method from_pretrained (line 34) | def from_pretrained(cls, vocoder_config, vocoder_ckpt, fm_config, fm_c... method prefill (line 44) | def prefill(self, timbre_speech, timbre_semantic_token, chunk_size: in... method detokenize_streaming (line 76) | def detokenize_streaming(self, semantic_token, ode_step=30, verbose=Fa... method clear_states (line 174) | def clear_states(self): function get_audio_detokenizer (line 180) | def get_audio_detokenizer(): function detokenize (line 201) | def detokenize(detokenizer, tokens, ref_wav, ref_tokens): function detokenize_streaming (line 220) | def detokenize_streaming(detokenizer, tokens, ref_wav, ref_tokens): function detokenize_noref (line 236) | def detokenize_noref(detokenizer, tokens): function detokenize_noref_streaming (line 255) | def detokenize_noref_streaming(detokenizer, tokens): FILE: modules/audio_detokenizer/bigvgan_wrapper.py class BigVGANWrapper (line 14) | class BigVGANWrapper: method __init__ (line 15) | def __init__(self, vocoder: BigVGAN, device: torch.device, h: AttrDict... method to_dtype (line 23) | def to_dtype(self, dtype): method extract_mel_from_wav (line 26) | def extract_mel_from_wav(self, wav_path=None, wav_data=None): method extract_mel_from_wav_batch (line 44) | def extract_mel_from_wav_batch(self, wav_data): method decode_mel (line 58) | def decode_mel(self, mel): method decode_mel_batch (line 69) | def decode_mel_batch(self, mel): method from_pretrained (line 81) | def from_pretrained(cls, model_config, ckpt_path, device): FILE: modules/audio_detokenizer/flow_matching/dit_block.py function reshape_for_broadcast (line 11) | def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor): function apply_rotary_emb (line 24) | def apply_rotary_emb( class Attention (line 39) | class Attention(nn.Module): method __init__ (line 41) | def __init__( method forward (line 67) | def forward(self, x: torch.Tensor, seq_len, cu_seqlens, max_seqlen, cu... function modulate (line 160) | def modulate(x, shift, scale): class FinalLayer (line 164) | class FinalLayer(nn.Module): method __init__ (line 168) | def __init__(self, hidden_size, out_channels): method forward (line 177) | def forward(self, x, c): class DiTBlock (line 184) | class DiTBlock(nn.Module): method __init__ (line 188) | def __init__(self, hidden_size, num_heads, mlp_ratio=4.0, ffn_type="co... method forward (line 209) | def forward(self, x, c, seq_len, cu_seqlens, cu_maxlen, cu_seqlens_k, ... FILE: modules/audio_detokenizer/flow_matching/model.py function precompute_freqs_cis (line 6) | def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0, class TimestepEmbedder (line 28) | class TimestepEmbedder(nn.Module): method __init__ (line 32) | def __init__(self, hidden_size, frequency_embedding_size=256): method timestep_embedding (line 42) | def timestep_embedding(t, dim, max_period=10000): method forward (line 62) | def forward(self, t): class SinusoidalPositionalEmbedding (line 68) | class SinusoidalPositionalEmbedding(nn.Module): method __init__ (line 74) | def __init__(self, embedding_dim, padding_idx, init_size=1024): method get_embedding (line 86) | def get_embedding(num_embeddings, embedding_dim, padding_idx=None): method forward (line 104) | def forward(self, input, incremental_state=None, timestep=None, **kwar... method max_positions (line 125) | def max_positions(self): method make_positions (line 129) | def make_positions(self, tensor, padding_idx): class DiTPrefix (line 144) | class DiTPrefix(nn.Module): method __init__ (line 148) | def __init__( method initialize_weights (line 218) | def initialize_weights(self): method forward (line 243) | def forward(self, x, position_ids, t, condition, seq_len, cu_seqlens, ... FILE: modules/audio_detokenizer/flow_matching/ode_wrapper.py function get_cached_zeros (line 8) | def get_cached_zeros(numel, device="cpu", dtype=torch.float32): class StreamingODEWrapperForPrefix (line 11) | class StreamingODEWrapperForPrefix(nn.Module): method __init__ (line 12) | def __init__(self, net, x_mask, x_cond, use_cfg=False, use_cfg_rescale... method clear_all_states (line 38) | def clear_all_states(self): method state_dict (line 48) | def state_dict(self): method load_state_dict (line 59) | def load_state_dict(self, state_dict): method set_conditions (line 68) | def set_conditions(self, x_mask, x_cond, start_position_id, cache={}): method update_incremental_state (line 108) | def update_incremental_state(self, reserve_kv_cache_tokens=0, max_kv_c... method forward (line 151) | def forward(self, t, x, args=None): FILE: modules/audio_detokenizer/flow_matching/scheduler.py class SchedulerBase (line 9) | class SchedulerBase(ABC): method __init__ (line 10) | def __init__(self) -> None: method set_timesteps (line 14) | def set_timesteps(self): method step (line 18) | def step(self): method add_noise (line 22) | def add_noise(self): class StreamingFlowMatchingScheduler (line 26) | class StreamingFlowMatchingScheduler(SchedulerBase): method __init__ (line 27) | def __init__(self, timesteps=1000, sigma_min=1e-4, method set_timesteps (line 39) | def set_timesteps(self, timesteps=15): method step (line 42) | def step(self, xt, predicted_v): method sample (line 50) | def sample(self, ode_wrapper, time_steps, xt, verbose=False, x0=None): method sample_by_neuralode (line 65) | def sample_by_neuralode(self, ode_wrapper, time_steps, xt, verbose=Fal... method add_noise (line 76) | def add_noise(self, original_samples: torch.FloatTensor, FILE: modules/audio_detokenizer/semantic_fm_prefix_streaming.py class StreamingSemanticFMWrapper (line 16) | class StreamingSemanticFMWrapper: method __init__ (line 17) | def __init__(self, speech_model: DiTPrefix, max_kv_cache_tokens=900, m... method infer_chunk (line 49) | def infer_chunk(self, xt_chunk, semantic_tokens_chunk, start_position_id, method infer_mel (line 105) | def infer_mel(self, semantic_tokens, ode_steps=15, chunk_size=150, ver... method clear_all_states (line 150) | def clear_all_states(self): method state_dict (line 155) | def state_dict(self): method load_state_dict (line 162) | def load_state_dict(self, state_dict): method update_incremental_state (line 168) | def update_incremental_state(self): method prefill (line 172) | def prefill(self, mel, semantic_token, chunk_size=150, verbose=False): method prefill_chunk (line 212) | def prefill_chunk(self, mel_chunk, semantic_tokens_chunk, start_positi... method from_pretrained (line 236) | def from_pretrained(cls, model_config, ckpt_path, device, max_prompt_c... FILE: modules/audio_detokenizer/vocoder/activations.py class Snake (line 6) | class Snake(nn.Module): method __init__ (line 23) | def __init__( method forward (line 48) | def forward(self, x): class SnakeBeta (line 62) | class SnakeBeta(nn.Module): method __init__ (line 80) | def __init__( method forward (line 110) | def forward(self, x): FILE: modules/audio_detokenizer/vocoder/alias_free_activation/cuda/activation1d.py class FusedAntiAliasActivation (line 14) | class FusedAntiAliasActivation(torch.autograd.Function): method forward (line 22) | def forward(ctx, inputs, up_ftr, down_ftr, alpha, beta): method backward (line 30) | def backward(ctx, output_grads): class Activation1d (line 35) | class Activation1d(nn.Module): method __init__ (line 36) | def __init__( method forward (line 54) | def forward(self, x): FILE: modules/audio_detokenizer/vocoder/alias_free_activation/cuda/anti_alias_activation.cpp function PYBIND11_MODULE (line 21) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: modules/audio_detokenizer/vocoder/alias_free_activation/cuda/load.py function load (line 17) | def load(): function _get_cuda_bare_metal_version (line 68) | def _get_cuda_bare_metal_version(cuda_dir): function _create_build_dir (line 81) | def _create_build_dir(buildpath): FILE: modules/audio_detokenizer/vocoder/alias_free_activation/torch/act.py class Activation1d (line 8) | class Activation1d(nn.Module): method __init__ (line 9) | def __init__( method forward (line 25) | def forward(self, x): FILE: modules/audio_detokenizer/vocoder/alias_free_activation/torch/filter.py function sinc (line 15) | def sinc(x: torch.Tensor): function kaiser_sinc_filter1d (line 30) | def kaiser_sinc_filter1d( class LowPassFilter1d (line 65) | class LowPassFilter1d(nn.Module): method __init__ (line 66) | def __init__( method forward (line 94) | def forward(self, x): FILE: modules/audio_detokenizer/vocoder/alias_free_activation/torch/resample.py class UpSample1d (line 10) | class UpSample1d(nn.Module): method __init__ (line 11) | def __init__(self, ratio=2, kernel_size=None): method forward (line 29) | def forward(self, x): class DownSample1d (line 41) | class DownSample1d(nn.Module): method __init__ (line 42) | def __init__(self, ratio=2, kernel_size=None): method forward (line 55) | def forward(self, x): FILE: modules/audio_detokenizer/vocoder/bigvgan.py function load_hparams_from_json (line 25) | def load_hparams_from_json(path) -> AttrDict: class AMPBlock1 (line 31) | class AMPBlock1(torch.nn.Module): method __init__ (line 44) | def __init__( method forward (line 132) | def forward(self, x): method remove_weight_norm (line 143) | def remove_weight_norm(self): class AMPBlock2 (line 150) | class AMPBlock2(torch.nn.Module): method __init__ (line 163) | def __init__( method forward (line 232) | def forward(self, x): method remove_weight_norm (line 238) | def remove_weight_norm(self): class BigVGAN (line 243) | class BigVGAN( method __init__ (line 266) | def __init__(self, h: AttrDict, use_cuda_kernel: bool = False): method forward (line 360) | def forward(self, x): method remove_weight_norm (line 388) | def remove_weight_norm(self): method _save_pretrained (line 403) | def _save_pretrained(self, save_directory: Path) -> None: method _from_pretrained (line 414) | def _from_pretrained( FILE: modules/audio_detokenizer/vocoder/utils.py function dynamic_range_compression_torch (line 7) | def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): function spectral_normalize_torch (line 11) | def spectral_normalize_torch(magnitudes): function get_melspec (line 14) | def get_melspec( class AttrDict (line 86) | class AttrDict(dict): method __init__ (line 87) | def __init__(self, *args, **kwargs): function load_checkpoint (line 91) | def load_checkpoint(filepath, device): function init_weights (line 98) | def init_weights(m, mean=0.0, std=0.01): function get_padding (line 104) | def get_padding(kernel_size, dilation=1): FILE: modules/audio_tokenizer/audio_tokenizer.py class AudioTokenizer (line 13) | class AudioTokenizer(object): method __init__ (line 14) | def __init__(self, **kwargs): method tokenize (line 38) | def tokenize(self, speech): function get_audio_tokenizer (line 67) | def get_audio_tokenizer(): FILE: modules/audio_tokenizer/quantize/factorized_vector_quantize.py function WNConv1d (line 9) | def WNConv1d(*args, **kwargs): function WNConvTranspose1d (line 13) | def WNConvTranspose1d(*args, **kwargs): class FactorizedVectorQuantize (line 17) | class FactorizedVectorQuantize(nn.Module): method __init__ (line 18) | def __init__( method forward (line 47) | def forward(self, z): method embed_code (line 91) | def embed_code(self, embed_id): method decode_code (line 94) | def decode_code(self, embed_id): method decode_latents (line 97) | def decode_latents(self, latents): method vq2emb (line 118) | def vq2emb(self, vq, out_proj=True): method latent2dist (line 124) | def latent2dist(self, latents): FILE: modules/audio_tokenizer/quantize/residual_vq.py class ResidualVQ (line 15) | class ResidualVQ(nn.Module): method __init__ (line 21) | def __init__( method forward (line 59) | def forward(self, z, n_quantizers: int = None): method vq2emb (line 135) | def vq2emb(self, vq, n_quantizers=None): method latent2dist (line 145) | def latent2dist(self, z, n_quantizers=None): FILE: modules/audio_tokenizer/quantize/vector_quantize.py function WNConv1d (line 9) | def WNConv1d(*args, **kwargs): function WNConvTranspose1d (line 13) | def WNConvTranspose1d(*args, **kwargs): function l2norm (line 17) | def l2norm(t): function ema_inplace (line 21) | def ema_inplace(moving_avg, new, decay): function laplace_smoothing (line 25) | def laplace_smoothing(x, n_categories, eps=1e-5): function sample_vectors (line 29) | def sample_vectors(samples, num): function kmeans (line 40) | def kmeans(samples, num_clusters, num_iters=10, use_cosine_sim=False): class EuclideanCodebook (line 71) | class EuclideanCodebook(nn.Module): method __init__ (line 72) | def __init__( method init_embed_ (line 104) | def init_embed_(self, data): method replace (line 111) | def replace(self, samples, mask): method expire_codes_ (line 117) | def expire_codes_(self, batch_samples): method forward (line 127) | def forward(self, x): method vq2emb (line 162) | def vq2emb(self, vq): method latent2dist (line 166) | def latent2dist(self, x): class SimpleCodebook (line 189) | class SimpleCodebook(nn.Module): method __init__ (line 190) | def __init__( method forward (line 204) | def forward(self, x): method vq2emb (line 225) | def vq2emb(self, vq): method latent2dist (line 229) | def latent2dist(self, x): class VectorQuantize (line 253) | class VectorQuantize(nn.Module): method __init__ (line 273) | def __init__( method forward (line 336) | def forward(self, z): method decode_latents (line 380) | def decode_latents(self, latents): method vq2emb (line 386) | def vq2emb(self, vq, out_proj=True): method latent2dist (line 393) | def latent2dist(self, latents): FILE: modules/audio_tokenizer/rep_codec.py function init_weights (line 9) | def init_weights(m): class RepCodec (line 17) | class RepCodec(nn.Module): method __init__ (line 18) | def __init__( method forward (line 136) | def forward(self, x): method quantize (line 177) | def quantize(self, x): method reset_parameters (line 196) | def reset_parameters(self): FILE: modules/audio_tokenizer/transformer.py class StyleAdaptiveLayerNorm (line 8) | class StyleAdaptiveLayerNorm(nn.Module): method __init__ (line 9) | def __init__(self, normalized_shape, eps=1e-5): method forward (line 17) | def forward(self, x, condition): class PositionalEncoding (line 30) | class PositionalEncoding(nn.Module): method __init__ (line 31) | def __init__(self, d_model, dropout, max_len=5000): method forward (line 44) | def forward(self, x): class TransformerFFNLayer (line 49) | class TransformerFFNLayer(nn.Module): method __init__ (line 50) | def __init__( method forward (line 70) | def forward(self, x): class TransformerEncoderLayer (line 81) | class TransformerEncoderLayer(nn.Module): method __init__ (line 82) | def __init__( method forward (line 117) | def forward(self, x, key_padding_mask, conditon=None): class TransformerEncoder (line 149) | class TransformerEncoder(nn.Module): method __init__ (line 150) | def __init__( method forward (line 217) | def forward(self, x, key_padding_mask, condition=None): FILE: modules/audio_tokenizer/vocos.py function safe_log (line 12) | def safe_log(x: torch.Tensor, clip_val: float = 1e-7) -> torch.Tensor: function symlog (line 26) | def symlog(x: torch.Tensor) -> torch.Tensor: function symexp (line 30) | def symexp(x: torch.Tensor) -> torch.Tensor: class STFT (line 34) | class STFT(nn.Module): method __init__ (line 35) | def __init__( method forward (line 50) | def forward(self, x: torch.Tensor) -> torch.Tensor: class ISTFT (line 78) | class ISTFT(nn.Module): method __init__ (line 93) | def __init__( method forward (line 106) | def forward(self, spec: torch.Tensor) -> torch.Tensor: class MDCT (line 164) | class MDCT(nn.Module): method __init__ (line 173) | def __init__(self, frame_len: int, padding: str = "same"): method forward (line 191) | def forward(self, audio: torch.Tensor) -> torch.Tensor: class IMDCT (line 225) | class IMDCT(nn.Module): method __init__ (line 234) | def __init__(self, frame_len: int, padding: str = "same"): method forward (line 250) | def forward(self, X: torch.Tensor) -> torch.Tensor: class FourierHead (line 293) | class FourierHead(nn.Module): method forward (line 296) | def forward(self, x: torch.Tensor) -> torch.Tensor: class ISTFTHead (line 308) | class ISTFTHead(FourierHead): method __init__ (line 320) | def __init__(self, dim: int, n_fft: int, hop_length: int, padding: str... method forward (line 328) | def forward(self, x: torch.Tensor) -> torch.Tensor: class IMDCTSymExpHead (line 358) | class IMDCTSymExpHead(FourierHead): method __init__ (line 371) | def __init__( method forward (line 395) | def forward(self, x: torch.Tensor) -> torch.Tensor: class IMDCTCosHead (line 418) | class IMDCTCosHead(FourierHead): method __init__ (line 429) | def __init__( method forward (line 441) | def forward(self, x: torch.Tensor) -> torch.Tensor: class ConvNeXtBlock (line 463) | class ConvNeXtBlock(nn.Module): method __init__ (line 475) | def __init__( method forward (line 502) | def forward( class AdaLayerNorm (line 524) | class AdaLayerNorm(nn.Module): method __init__ (line 533) | def __init__(self, num_embeddings: int, embedding_dim: int, eps: float... method forward (line 546) | def forward(self, x: torch.Tensor, cond_embedding_id: torch.Tensor) ->... class ResBlock1 (line 554) | class ResBlock1(nn.Module): method __init__ (line 570) | def __init__( method forward (line 676) | def forward(self, x: torch.Tensor) -> torch.Tensor: method remove_weight_norm (line 687) | def remove_weight_norm(self): method get_padding (line 694) | def get_padding(kernel_size: int, dilation: int = 1) -> int: class Backbone (line 698) | class Backbone(nn.Module): method forward (line 701) | def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor: class VocosBackbone (line 714) | class VocosBackbone(Backbone): method __init__ (line 728) | def __init__( method _init_weights (line 760) | def _init_weights(self, m): method forward (line 765) | def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor: class VocosResNetBackbone (line 780) | class VocosResNetBackbone(Backbone): method __init__ (line 791) | def __init__( method forward (line 811) | def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor: class Vocos (line 818) | class Vocos(nn.Module): method __init__ (line 819) | def __init__( method forward (line 841) | def forward(self, x): FILE: modules/tokenizer/tokenizer.py function encode_pieces (line 9) | def encode_pieces(sp_model: spm.SentencePieceProcessor, text: str, sampl... class AbstractTokenizer (line 20) | class AbstractTokenizer(ABC): method __init__ (line 23) | def __init__(self, name): method vocab_size (line 29) | def vocab_size(self): method vocab (line 34) | def vocab(self): method inv_vocab (line 40) | def inv_vocab(self): method tokenize (line 45) | def tokenize(self, text): method detokenize (line 48) | def detokenize(self, token_ids): method cls (line 53) | def cls(self): method sep (line 58) | def sep(self): method pad (line 63) | def pad(self): method eod (line 68) | def eod(self): method mask (line 73) | def mask(self): class SPieceTokenizer (line 78) | class SPieceTokenizer(AbstractTokenizer): method __init__ (line 79) | def __init__(self, spm_file: str): method encode_pieces (line 96) | def encode_pieces(self, text: str, sample=False): method _initialize_index_2_bytes (line 103) | def _initialize_index_2_bytes(self): method set_add_dummy_prefix (line 111) | def set_add_dummy_prefix(self, add_dummy_prefix: bool = False): method add_special_id (line 119) | def add_special_id(self, token_id): method has_dummy_prefix (line 123) | def has_dummy_prefix(self): method vocab_size (line 128) | def vocab_size(self): method vocab (line 132) | def vocab(self): method get_array_bytes (line 136) | def get_array_bytes(self, array): method tokenize (line 139) | def tokenize(self, text): method encode (line 143) | def encode(self, text: str, bos: bool=False, eos: bool=False, **kwargs... method convert_tokens_to_ids (line 152) | def convert_tokens_to_ids(self, tokens: str | list[str]) -> int | list... method detokenize (line 157) | def detokenize(self, token_ids): method decode (line 164) | def decode(self, token_ids: Union[int, list[int]], skip_special_tokens... method get_token_id (line 170) | def get_token_id(self, token): method inv_vocab (line 173) | def inv_vocab(self): method decode_pieces (line 177) | def decode_pieces(self, pieces): method eod (line 181) | def eod(self): method pad_id (line 185) | def pad_id(self): method eos_id (line 189) | def eos_id(self): method bos_id (line 193) | def bos_id(self): method unk_id (line 197) | def unk_id(self): method pad_token_id (line 201) | def pad_token_id(self): method eos_token_id (line 205) | def eos_token_id(self): class ExtraTokens (line 210) | class ExtraTokens: function instantiate_extra_tokens (line 221) | def instantiate_extra_tokens(tokenizer: AbstractTokenizer): function get_tokenizer_and_extra_tokens (line 238) | def get_tokenizer_and_extra_tokens():