SYMBOL INDEX (3295 symbols across 224 files) FILE: NeuralSeq/configs/tts/emotion/pre_align.py class EmoPreAlign (line 7) | class EmoPreAlign(BasePreprocessor): method meta_data (line 9) | def meta_data(self): FILE: NeuralSeq/configs/tts/libritts/pre_align.py class LibrittsPreAlign (line 7) | class LibrittsPreAlign(BasePreprocessor): method meta_data (line 8) | def meta_data(self): FILE: NeuralSeq/data_gen/tts/base_binarizer.py class BinarizationError (line 18) | class BinarizationError(Exception): class BaseBinarizer (line 22) | class BaseBinarizer: method __init__ (line 23) | def __init__(self, processed_data_dir=None): method train_item_names (line 60) | def train_item_names(self): method valid_item_names (line 64) | def valid_item_names(self): method test_item_names (line 68) | def test_item_names(self): method build_spk_map (line 71) | def build_spk_map(self): method item_name2spk_id (line 80) | def item_name2spk_id(self, item_name): method _phone_encoder (line 83) | def _phone_encoder(self): method meta_data (line 96) | def meta_data(self, prefix): method process (line 111) | def process(self): method process_data (line 123) | def process_data(self, prefix): method process_item (line 160) | def process_item(cls, item_name, ph, txt, tg_fn, wav_fn, spk_id, encod... method get_align (line 188) | def get_align(tg_fn, ph, mel, phone_encoded, res): method get_pitch (line 200) | def get_pitch(wav, mel, res): method get_f0cwt (line 208) | def get_f0cwt(f0, res): FILE: NeuralSeq/data_gen/tts/base_binarizer_emotion.py class BinarizationError (line 24) | class BinarizationError(Exception): class EmotionBinarizer (line 28) | class EmotionBinarizer: method __init__ (line 29) | def __init__(self, processed_data_dir=None): method load_meta_data (line 42) | def load_meta_data(self): method train_item_names (line 64) | def train_item_names(self): method valid_item_names (line 68) | def valid_item_names(self): method test_item_names (line 72) | def test_item_names(self): method build_spk_map (line 75) | def build_spk_map(self): method build_emo_map (line 85) | def build_emo_map(self): method item_name2spk_id (line 94) | def item_name2spk_id(self, item_name): method item_name2emo_id (line 97) | def item_name2emo_id(self, item_name): method _phone_encoder (line 100) | def _phone_encoder(self): method _word_encoder (line 114) | def _word_encoder(self): method meta_data (line 133) | def meta_data(self, prefix): method process (line 149) | def process(self): method process_data (line 172) | def process_data(self, prefix): method process_item (line 215) | def process_item(cls, item_name, ph, txt, tg_fn, wav_fn, spk_id, emoti... method get_align (line 262) | def get_align(tg_fn, res): method get_pitch (line 277) | def get_pitch(res): method get_f0cwt (line 286) | def get_f0cwt(res): method get_word (line 301) | def get_word(res, word_encoder): method num_workers (line 346) | def num_workers(self): FILE: NeuralSeq/data_gen/tts/base_preprocess.py class BasePreprocessor (line 19) | class BasePreprocessor: method __init__ (line 20) | def __init__(self): method meta_data (line 28) | def meta_data(self): method process (line 34) | def process(self): method preprocess_first_pass (line 117) | def preprocess_first_pass(cls, item_name, txt_raw, txt_processor, method txt_to_ph (line 147) | def txt_to_ph(txt_processor, txt_raw, preprocess_args): method process_wav (line 157) | def process_wav(item_name, wav_fn, processed_dir, wav_processed_tmp, p... method _phone_encoder (line 179) | def _phone_encoder(self, ph_set): method _word_encoder (line 190) | def _word_encoder(self, word_set): method preprocess_second_pass (line 208) | def preprocess_second_pass(cls, word, ph, spk_name, word_encoder, ph_e... method build_spk_map (line 214) | def build_spk_map(self, spk_names): method build_mfa_inputs (line 222) | def build_mfa_inputs(cls, item, mfa_input_dir, mfa_group, wav_processe... method load_spk_map (line 238) | def load_spk_map(self, base_dir): method load_dict (line 243) | def load_dict(self, base_dir): method meta_csv_filename (line 249) | def meta_csv_filename(self): method wav_processed_dirname (line 253) | def wav_processed_dirname(self): FILE: NeuralSeq/data_gen/tts/binarizer_zh.py class ZhBinarizer (line 12) | class ZhBinarizer(BaseBinarizer): method get_align (line 14) | def get_align(tg_fn, ph, mel, phone_encoded, res): FILE: NeuralSeq/data_gen/tts/data_gen_utils.py function trim_long_silences (line 27) | def trim_long_silences(path, sr=None, return_raw_wav=False, norm=True, v... function process_utterance (line 93) | def process_utterance(wav_path, function get_pitch (line 150) | def get_pitch(wav_data, mel, hparams): function remove_empty_lines (line 187) | def remove_empty_lines(text): class TextGrid (line 197) | class TextGrid(object): method __init__ (line 198) | def __init__(self, text): method _extract_pattern (line 208) | def _extract_pattern(self, pattern, inc): method _get_type (line 225) | def _get_type(self): method _get_time_intval (line 228) | def _get_time_intval(self): method _get_size (line 232) | def _get_size(self): method _get_item_list (line 235) | def _get_item_list(self): method toJson (line 264) | def toJson(self): function get_mel2ph (line 274) | def get_mel2ph(tg_fn, ph, mel, hparams): function build_phone_encoder (line 340) | def build_phone_encoder(data_dir): function build_word_encoder (line 346) | def build_word_encoder(data_dir): function is_sil_phoneme (line 351) | def is_sil_phoneme(p): function build_token_encoder (line 355) | def build_token_encoder(token_list_file): FILE: NeuralSeq/data_gen/tts/emotion/audio.py function preprocess_wav (line 13) | def preprocess_wav(fpath_or_wav: Union[str, Path, np.ndarray], function wav_to_mel_spectrogram (line 43) | def wav_to_mel_spectrogram(wav): function trim_long_silences (line 58) | def trim_long_silences(wav): function normalize_volume (line 101) | def normalize_volume(wav, target_dBFS, increase_only=False, decrease_onl... FILE: NeuralSeq/data_gen/tts/emotion/inference.py function load_model (line 15) | def load_model(weights_fpath: Path, device=None): function is_loaded (line 39) | def is_loaded(): function embed_frames_batch (line 43) | def embed_frames_batch(frames_batch): function compute_partial_slices (line 59) | def compute_partial_slices(n_samples, partial_utterance_n_frames=partial... function embed_utterance (line 111) | def embed_utterance(wav, using_partials=True, return_partials=False, **k... function embed_speaker (line 158) | def embed_speaker(wavs, **kwargs): function plot_embedding_as_heatmap (line 162) | def plot_embedding_as_heatmap(embed, ax=None, title="", shape=None, colo... FILE: NeuralSeq/data_gen/tts/emotion/model.py class EmotionEncoder (line 11) | class EmotionEncoder(nn.Module): method __init__ (line 12) | def __init__(self, device, loss_device): method do_gradient_ops (line 33) | def do_gradient_ops(self): method forward (line 41) | def forward(self, utterances, hidden_init=None): method inference (line 63) | def inference(self, utterances, hidden_init=None): FILE: NeuralSeq/data_gen/tts/emotion/test_emotion.py function tuneThresholdfromScore (line 32) | def tuneThresholdfromScore(scores, labels, target_fa, target_fr=None): function loadWAV (line 55) | def loadWAV(filename, max_frames, evalmode=True, num_eval=10): function evaluateFromList (line 84) | def evaluateFromList(listfilename, print_interval=100, test_path='', mul... FILE: NeuralSeq/data_gen/tts/txt_processors/base_text_processor.py function register_txt_processors (line 5) | def register_txt_processors(name): function get_txt_processor_cls (line 13) | def get_txt_processor_cls(name): class BaseTxtProcessor (line 17) | class BaseTxtProcessor: method sp_phonemes (line 19) | def sp_phonemes(): method process (line 23) | def process(cls, txt, preprocess_args): method postprocess (line 27) | def postprocess(cls, txt_struct, preprocess_args): method add_bdr (line 40) | def add_bdr(cls, txt_struct): FILE: NeuralSeq/data_gen/tts/txt_processors/en.py class EnG2p (line 12) | class EnG2p(G2p): method __call__ (line 15) | def __call__(self, text): class TxtProcessor (line 44) | class TxtProcessor(BaseTxtProcessor): method preprocess_text (line 48) | def preprocess_text(text): method process (line 66) | def process(cls, txt, preprocess_args): FILE: NeuralSeq/data_gen/tts/txt_processors/zh.py class TxtProcessor (line 9) | class TxtProcessor(BaseTxtProcessor): method preprocess_text (line 15) | def preprocess_text(text): method process (line 28) | def process(cls, txt, pre_align_args): FILE: NeuralSeq/data_gen/tts/txt_processors/zh_g2pM.py class TxtProcessor (line 15) | class TxtProcessor(zh.TxtProcessor): method sp_phonemes (line 19) | def sp_phonemes(): method process (line 23) | def process(cls, txt, pre_align_args): FILE: NeuralSeq/data_gen/tts/wav_processors/base_processor.py function register_wav_processors (line 4) | def register_wav_processors(name): function get_wav_processor_cls (line 12) | def get_wav_processor_cls(name): class BaseWavProcessor (line 16) | class BaseWavProcessor: method name (line 18) | def name(self): method output_fn (line 21) | def output_fn(self, input_fn): method process (line 24) | def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, pre... FILE: NeuralSeq/data_gen/tts/wav_processors/common_processors.py class ConvertToWavProcessor (line 12) | class ConvertToWavProcessor(BaseWavProcessor): method name (line 14) | def name(self): method process (line 17) | def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, pre... class ResampleProcessor (line 27) | class ResampleProcessor(BaseWavProcessor): method name (line 29) | def name(self): method process (line 32) | def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, pre... class TrimSILProcessor (line 46) | class TrimSILProcessor(BaseWavProcessor): method name (line 48) | def name(self): method process (line 51) | def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, pre... class TrimAllSILProcessor (line 60) | class TrimAllSILProcessor(BaseWavProcessor): method name (line 62) | def name(self): method process (line 65) | def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, pre... class DenoiseProcessor (line 77) | class DenoiseProcessor(BaseWavProcessor): method name (line 79) | def name(self): method process (line 82) | def process(self, input_fn, sr, tmp_dir, processed_dir, item_name, pre... FILE: NeuralSeq/egs/datasets/audio/emotion/pre_align.py class EmoPreAlign (line 7) | class EmoPreAlign(BasePreprocessor): method meta_data (line 9) | def meta_data(self): FILE: NeuralSeq/egs/datasets/audio/libritts/pre_align.py class LibrittsPreAlign (line 7) | class LibrittsPreAlign(BasePreprocessor): method meta_data (line 8) | def meta_data(self): FILE: NeuralSeq/egs/datasets/audio/lj/preprocess.py class LJPreprocess (line 4) | class LJPreprocess(BasePreprocessor): method meta_data (line 5) | def meta_data(self): FILE: NeuralSeq/egs/datasets/audio/vctk/pre_align.py class VCTKPreAlign (line 7) | class VCTKPreAlign(BasePreAlign): method meta_data (line 8) | def meta_data(self): FILE: NeuralSeq/inference/svs/base_svs_infer.py class BaseSVSInfer (line 18) | class BaseSVSInfer: method __init__ (line 19) | def __init__(self, hparams, device=None): method build_model (line 40) | def build_model(self): method forward_model (line 43) | def forward_model(self, inp): method build_vocoder (line 46) | def build_vocoder(self): method run_vocoder (line 61) | def run_vocoder(self, c, **kwargs): method preprocess_word_level_input (line 72) | def preprocess_word_level_input(self, inp): method preprocess_phoneme_level_input (line 141) | def preprocess_phoneme_level_input(self, inp): method preprocess_input (line 155) | def preprocess_input(self, inp, input_type='word'): method input_to_batch (line 200) | def input_to_batch(self, item): method postprocess_output (line 225) | def postprocess_output(self, output): method infer_once (line 228) | def infer_once(self, inp): method example_run (line 235) | def example_run(cls, inp): FILE: NeuralSeq/inference/svs/ds_cascade.py class DiffSingerCascadeInfer (line 8) | class DiffSingerCascadeInfer(BaseSVSInfer): method build_model (line 9) | def build_model(self): method forward_model (line 22) | def forward_model(self, inp): FILE: NeuralSeq/inference/svs/ds_e2e.py class DiffSingerE2EInfer (line 13) | class DiffSingerE2EInfer(BaseSVSInfer): method build_model (line 14) | def build_model(self): method forward_model (line 32) | def forward_model(self, inp): FILE: NeuralSeq/inference/svs/opencpop/map.py function cpop_pinyin2ph_func (line 1) | def cpop_pinyin2ph_func(): FILE: NeuralSeq/inference/tts/GenerSpeech.py class GenerSpeechInfer (line 13) | class GenerSpeechInfer(BaseTTSInfer): method build_model (line 14) | def build_model(self): method preprocess_input (line 20) | def preprocess_input(self, inp): method input_to_batch (line 69) | def input_to_batch(self, item): method forward_model (line 104) | def forward_model(self, inp): FILE: NeuralSeq/inference/tts/PortaSpeech.py class TTSInference (line 6) | class TTSInference(BaseTTSInfer): method __init__ (line 7) | def __init__(self, hparams, device=None): method build_model (line 13) | def build_model(self): method forward_model (line 20) | def forward_model(self, inp): method preprocess_input (line 37) | def preprocess_input(self, inp): method input_to_batch (line 58) | def input_to_batch(self, item): method postprocess_output (line 81) | def postprocess_output(self, output): FILE: NeuralSeq/inference/tts/base_tts_infer.py class BaseTTSInfer (line 14) | class BaseTTSInfer: method __init__ (line 15) | def __init__(self, hparams, device=None): method build_model (line 32) | def build_model(self): method forward_model (line 35) | def forward_model(self, inp): method build_asr (line 38) | def build_asr(self): method build_vocoder (line 44) | def build_vocoder(self): method run_vocoder (line 52) | def run_vocoder(self, c): method preprocess_input (line 57) | def preprocess_input(self, inp): method input_to_batch (line 60) | def input_to_batch(self, item): method postprocess_output (line 63) | def postprocess_output(self, output): method infer_once (line 66) | def infer_once(self, inp): method example_run (line 73) | def example_run(cls, inp): method asr (line 83) | def asr(self, file): FILE: NeuralSeq/modules/GenerSpeech/model/generspeech.py class GenerSpeech (line 15) | class GenerSpeech(FastSpeech2): method __init__ (line 20) | def __init__(self, dictionary, out_dims=None): method forward (line 75) | def forward(self, txt_tokens, mel2ph=None, ref_mel2ph=None, ref_mel2wo... method get_prosody_ph (line 121) | def get_prosody_ph(self, encoder_out, ref_mels, ret, infer=False, glob... method get_prosody_word (line 149) | def get_prosody_word(self, encoder_out, ref_mels, ret, infer=False, gl... method get_prosody_utter (line 176) | def get_prosody_utter(self, encoder_out, ref_mels, ret, infer=False, g... method inpaint_pitch (line 205) | def inpaint_pitch(self, pitch_inp_domain_agnostic, pitch_inp_domain_sp... method run_post_glow (line 233) | def run_post_glow(self, tgt_mels, infer, is_training, ret): FILE: NeuralSeq/modules/GenerSpeech/model/glow_modules.py class LayerNorm (line 11) | class LayerNorm(nn.Module): method __init__ (line 12) | def __init__(self, channels, eps=1e-4): method forward (line 20) | def forward(self, x): class ConvReluNorm (line 32) | class ConvReluNorm(nn.Module): method __init__ (line 33) | def __init__(self, in_channels, hidden_channels, out_channels, kernel_... method forward (line 57) | def forward(self, x, x_mask): class ActNorm (line 68) | class ActNorm(nn.Module): # glow中的线性变换层 method __init__ (line 69) | def __init__(self, channels, ddi=False, **kwargs): method forward (line 77) | def forward(self, x, x_mask=None, reverse=False, **kwargs): method store_inverse (line 93) | def store_inverse(self): method set_ddi (line 96) | def set_ddi(self, ddi): method initialize (line 99) | def initialize(self, x, x_mask): class InvConvNear (line 114) | class InvConvNear(nn.Module): # 可逆卷积 method __init__ (line 115) | def __init__(self, channels, n_split=4, no_jacobian=False, lu=True, n_... method forward (line 147) | def forward(self, x, x_mask=None, reverse=False, **kwargs): method _get_weight (line 184) | def _get_weight(self): method store_inverse (line 191) | def store_inverse(self): class InvConv (line 196) | class InvConv(nn.Module): method __init__ (line 197) | def __init__(self, channels, no_jacobian=False, lu=True, **kwargs): method get_weight (line 225) | def get_weight(self, device, reverse): method forward (line 242) | def forward(self, x, x_mask=None, reverse=False, **kwargs): method store_inverse (line 268) | def store_inverse(self): class Flip (line 272) | class Flip(nn.Module): method forward (line 273) | def forward(self, x, *args, reverse=False, **kwargs): method store_inverse (line 278) | def store_inverse(self): class CouplingBlock (line 282) | class CouplingBlock(nn.Module): # 仿射耦合层 method __init__ (line 283) | def __init__(self, in_channels, hidden_channels, kernel_size, dilation... method forward (line 311) | def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs): method store_inverse (line 334) | def store_inverse(self): class GlowFFTBlocks (line 338) | class GlowFFTBlocks(FFTBlocks): method __init__ (line 339) | def __init__(self, hidden_size=128, gin_channels=256, num_layers=2, ff... method forward (line 346) | def forward(self, x, x_mask=None, g=None): class TransformerCouplingBlock (line 361) | class TransformerCouplingBlock(nn.Module): method __init__ (line 362) | def __init__(self, in_channels, hidden_channels, n_layers, method forward (line 386) | def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs): method store_inverse (line 409) | def store_inverse(self): class FreqFFTCouplingBlock (line 413) | class FreqFFTCouplingBlock(nn.Module): method __init__ (line 414) | def __init__(self, in_channels, hidden_channels, n_layers, method forward (line 452) | def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs): method store_inverse (line 492) | def store_inverse(self): class Glow (line 496) | class Glow(nn.Module): method __init__ (line 497) | def __init__(self, method forward (line 556) | def forward(self, x, x_mask=None, g=None, reverse=False, return_hidden... method store_inverse (line 582) | def store_inverse(self): class GlowV2 (line 594) | class GlowV2(nn.Module): method __init__ (line 595) | def __init__(self, method forward (line 648) | def forward(self, x=None, x_mask=None, g=None, reverse=False, concat_z... method store_inverse (line 717) | def store_inverse(self): method get_prior (line 730) | def get_prior(self, B, T, device, noise_scale=0.66): function squeeze (line 742) | def squeeze(x, x_mask=None, n_sqz=2): function unsqueeze (line 757) | def unsqueeze(x, x_mask=None, n_sqz=2): FILE: NeuralSeq/modules/GenerSpeech/model/mixstyle.py class MixStyle (line 5) | class MixStyle(nn.Module): method __init__ (line 11) | def __init__(self, p=0.5, alpha=0.1, eps=1e-6, hidden_size=256): method __repr__ (line 31) | def __repr__(self): method set_activation_status (line 34) | def set_activation_status(self, status=True): method forward (line 37) | def forward(self, x, spk_embed): FILE: NeuralSeq/modules/GenerSpeech/model/prosody_util.py class VQEmbeddingEMA (line 16) | class VQEmbeddingEMA(nn.Module): method __init__ (line 17) | def __init__(self, n_embeddings, embedding_dim, commitment_cost=0.25, ... method encode (line 33) | def encode(self, x): method forward (line 47) | def forward(self, x): class CrossAttenLayer (line 95) | class CrossAttenLayer(nn.Module): method __init__ (line 96) | def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1): method forward (line 107) | def forward(self, src, local_emotion, emotion_key_padding_mask=None, f... class ProsodyAligner (line 129) | class ProsodyAligner(nn.Module): method __init__ (line 130) | def __init__(self, num_layers, guided_sigma=0.3, guided_layers=None, n... method forward (line 138) | def forward(self, src, local_emotion, src_key_padding_mask=None, emoti... function _make_guided_attention_mask (line 162) | def _make_guided_attention_mask(ilen, rilen, olen, rolen, sigma): class LocalStyleAdaptor (line 172) | class LocalStyleAdaptor(nn.Module): method __init__ (line 173) | def __init__(self, hidden_size, num_vq_codes=64, padding_idx=0): method forward (line 182) | def forward(self, ref_mels, mel2ph=None, no_vq=False): class LambdaLayer (line 204) | class LambdaLayer(nn.Module): method __init__ (line 205) | def __init__(self, lambd): method forward (line 209) | def forward(self, x): class Conv1d (line 213) | class Conv1d(nn.Conv1d): method __init__ (line 216) | def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,... method forward (line 221) | def forward(self, x): function init_weights_func (line 225) | def init_weights_func(m): class ResidualBlock (line 231) | class ResidualBlock(nn.Module): method __init__ (line 234) | def __init__(self, channels, kernel_size, dilation, n=2, norm_type='bn... method forward (line 264) | def forward(self, x): class Pad (line 275) | class Pad(nn.ZeroPad2d): method __init__ (line 276) | def __init__(self, kernel_size, dilation): class ZeroTemporalPad (line 284) | class ZeroTemporalPad(nn.ZeroPad2d): method __init__ (line 287) | def __init__(self, kernel_size, dilation, causal=False): class ConvBlocks (line 298) | class ConvBlocks(nn.Module): method __init__ (line 301) | def __init__(self, channels, out_dims, dilations, kernel_size, method forward (line 324) | def forward(self, x): class TextConvEncoder (line 338) | class TextConvEncoder(ConvBlocks): method __init__ (line 339) | def __init__(self, embed_tokens, channels, out_dims, dilations, kernel... method forward (line 348) | def forward(self, txt_tokens): class ConditionalConvBlocks (line 360) | class ConditionalConvBlocks(ConvBlocks): method __init__ (line 361) | def __init__(self, channels, g_channels, out_dims, dilations, kernel_s... method forward (line 372) | def forward(self, x, g, x_mask): FILE: NeuralSeq/modules/GenerSpeech/model/wavenet.py function fused_add_tanh_sigmoid_multiply (line 5) | def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): class WN (line 14) | class WN(torch.nn.Module): method __init__ (line 15) | def __init__(self, hidden_channels, kernel_size, dilation_rate, n_laye... method forward (line 54) | def forward(self, x, x_mask=None, g=None, **kwargs): method remove_weight_norm (line 80) | def remove_weight_norm(self): FILE: NeuralSeq/modules/GenerSpeech/task/dataset.py class GenerSpeech_dataset (line 30) | class GenerSpeech_dataset(BaseTTSDataset): method __init__ (line 31) | def __init__(self, prefix, shuffle=False, test_items=None, test_sizes=... method load_test_inputs (line 55) | def load_test_inputs(self, test_input_dir): method _get_item (line 89) | def _get_item(self, index): method __getitem__ (line 96) | def __getitem__(self, index): method collater (line 146) | def collater(self, samples): FILE: NeuralSeq/modules/GenerSpeech/task/generspeech.py class GenerSpeechTask (line 25) | class GenerSpeechTask(FastSpeech2Task): method __init__ (line 26) | def __init__(self): method build_tts_model (line 30) | def build_tts_model(self): method build_model (line 33) | def build_model(self): method run_model (line 40) | def run_model(self, model, sample, return_output=False): method validation_step (line 72) | def validation_step(self, sample, batch_idx): method test_step (line 125) | def test_step(self, sample, batch_idx): method after_infer (line 155) | def after_infer(self, predictions, sil_start_frame=0): method save_result (line 232) | def save_result(wav_out, mel, base_fn, gen_dir, str_phs=None, mel2ph=N... FILE: NeuralSeq/modules/commons/align_ops.py function build_word_mask (line 5) | def build_word_mask(x2word, y2word): function mel2ph_to_mel2word (line 9) | def mel2ph_to_mel2word(mel2ph, ph2word): function clip_mel2token_to_multiple (line 15) | def clip_mel2token_to_multiple(mel2token, frames_multiple): function expand_states (line 21) | def expand_states(h, mel2token): FILE: NeuralSeq/modules/commons/common_layers.py class Reshape (line 10) | class Reshape(nn.Module): method __init__ (line 11) | def __init__(self, *args): method forward (line 15) | def forward(self, x): class Permute (line 19) | class Permute(nn.Module): method __init__ (line 20) | def __init__(self, *args): method forward (line 24) | def forward(self, x): class LinearNorm (line 28) | class LinearNorm(torch.nn.Module): method __init__ (line 29) | def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'): method forward (line 37) | def forward(self, x): class ConvNorm (line 41) | class ConvNorm(torch.nn.Module): method __init__ (line 42) | def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, method forward (line 57) | def forward(self, signal): function Embedding (line 62) | def Embedding(num_embeddings, embedding_dim, padding_idx=None): function LayerNorm (line 70) | def LayerNorm(normalized_shape, eps=1e-5, elementwise_affine=True, expor... function Linear (line 79) | def Linear(in_features, out_features, bias=True): class SinusoidalPositionalEmbedding (line 87) | class SinusoidalPositionalEmbedding(nn.Module): method __init__ (line 93) | def __init__(self, embedding_dim, padding_idx, init_size=1024): method get_embedding (line 105) | def get_embedding(num_embeddings, embedding_dim, padding_idx=None): method forward (line 123) | def forward(self, input, incremental_state=None, timestep=None, positi... method max_positions (line 144) | def max_positions(self): class ConvTBC (line 149) | class ConvTBC(nn.Module): method __init__ (line 150) | def __init__(self, in_channels, out_channels, kernel_size, padding=0): method forward (line 161) | def forward(self, input): class MultiheadAttention (line 165) | class MultiheadAttention(nn.Module): method __init__ (line 166) | def __init__(self, embed_dim, num_heads, kdim=None, vdim=None, dropout... method reset_parameters (line 218) | def reset_parameters(self): method forward (line 235) | def forward( method in_proj_qkv (line 421) | def in_proj_qkv(self, query): method in_proj_q (line 424) | def in_proj_q(self, query): method in_proj_k (line 433) | def in_proj_k(self, key): method in_proj_v (line 443) | def in_proj_v(self, value): method _in_proj (line 453) | def _in_proj(self, input, start=0, end=None): method apply_sparse_mask (line 462) | def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz): class Swish (line 466) | class Swish(torch.autograd.Function): method forward (line 468) | def forward(ctx, i): method backward (line 474) | def backward(ctx, grad_output): class CustomSwish (line 480) | class CustomSwish(nn.Module): method forward (line 481) | def forward(self, input_tensor): class TransformerFFNLayer (line 485) | class TransformerFFNLayer(nn.Module): method __init__ (line 486) | def __init__(self, hidden_size, filter_size, padding="SAME", kernel_si... method forward (line 502) | def forward(self, x, incremental_state=None): class BatchNorm1dTBC (line 524) | class BatchNorm1dTBC(nn.Module): method __init__ (line 525) | def __init__(self, c): method forward (line 529) | def forward(self, x): class EncSALayer (line 541) | class EncSALayer(nn.Module): method __init__ (line 542) | def __init__(self, c, num_heads, dropout, attention_dropout=0.1, method forward (line 563) | def forward(self, x, encoder_padding_mask=None, **kwargs): class DecSALayer (line 590) | class DecSALayer(nn.Module): method __init__ (line 591) | def __init__(self, c, num_heads, dropout, attention_dropout=0.1, relu_... method forward (line 607) | def forward( FILE: NeuralSeq/modules/commons/conv.py class LambdaLayer (line 10) | class LambdaLayer(nn.Module): method __init__ (line 11) | def __init__(self, lambd): method forward (line 15) | def forward(self, x): function init_weights_func (line 19) | def init_weights_func(m): class ResidualBlock (line 25) | class ResidualBlock(nn.Module): method __init__ (line 28) | def __init__(self, channels, kernel_size, dilation, n=2, norm_type='bn... method forward (line 58) | def forward(self, x): class ConvBlocks (line 69) | class ConvBlocks(nn.Module): method __init__ (line 72) | def __init__(self, hidden_size, out_dims, dilations, kernel_size, method forward (line 100) | def forward(self, x, nonpadding=None): class TextConvEncoder (line 120) | class TextConvEncoder(ConvBlocks): method __init__ (line 121) | def __init__(self, dict_size, hidden_size, out_dims, dilations, kernel... method forward (line 131) | def forward(self, txt_tokens): class ConditionalConvBlocks (line 143) | class ConditionalConvBlocks(ConvBlocks): method __init__ (line 144) | def __init__(self, hidden_size, c_cond, c_out, dilations, kernel_size, method forward (line 155) | def forward(self, x, cond, nonpadding=None): FILE: NeuralSeq/modules/commons/espnet_positional_embedding.py class PositionalEncoding (line 5) | class PositionalEncoding(torch.nn.Module): method __init__ (line 14) | def __init__(self, d_model, dropout_rate, max_len=5000, reverse=False): method extend_pe (line 24) | def extend_pe(self, x): method forward (line 47) | def forward(self, x: torch.Tensor): class ScaledPositionalEncoding (line 59) | class ScaledPositionalEncoding(PositionalEncoding): method __init__ (line 68) | def __init__(self, d_model, dropout_rate, max_len=5000): method reset_parameters (line 73) | def reset_parameters(self): method forward (line 77) | def forward(self, x): class RelPositionalEncoding (line 89) | class RelPositionalEncoding(PositionalEncoding): method __init__ (line 98) | def __init__(self, d_model, dropout_rate, max_len=5000): method forward (line 102) | def forward(self, x): FILE: NeuralSeq/modules/commons/normalizing_flow/glow_modules.py class ActNorm (line 10) | class ActNorm(nn.Module): method __init__ (line 11) | def __init__(self, channels, ddi=False, **kwargs): method forward (line 19) | def forward(self, x, x_mask=None, reverse=False, **kwargs): method store_inverse (line 35) | def store_inverse(self): method set_ddi (line 38) | def set_ddi(self, ddi): method initialize (line 41) | def initialize(self, x, x_mask): class InvConvNear (line 56) | class InvConvNear(nn.Module): method __init__ (line 57) | def __init__(self, channels, n_split=4, no_jacobian=False, lu=True, n_... method forward (line 89) | def forward(self, x, x_mask=None, reverse=False, **kwargs): method _get_weight (line 126) | def _get_weight(self): method store_inverse (line 133) | def store_inverse(self): class InvConv (line 138) | class InvConv(nn.Module): method __init__ (line 139) | def __init__(self, channels, no_jacobian=False, lu=True, **kwargs): method get_weight (line 167) | def get_weight(self, device, reverse): method forward (line 184) | def forward(self, x, x_mask=None, reverse=False, **kwargs): method store_inverse (line 210) | def store_inverse(self): class CouplingBlock (line 214) | class CouplingBlock(nn.Module): method __init__ (line 215) | def __init__(self, in_channels, hidden_channels, kernel_size, dilation... method forward (line 241) | def forward(self, x, x_mask=None, reverse=False, g=None, **kwargs): method store_inverse (line 264) | def store_inverse(self): class Glow (line 268) | class Glow(nn.Module): method __init__ (line 269) | def __init__(self, method forward (line 327) | def forward(self, x, x_mask=None, g=None, reverse=False, return_hidden... method store_inverse (line 353) | def store_inverse(self): FILE: NeuralSeq/modules/commons/normalizing_flow/res_flow.py class FlipLayer (line 7) | class FlipLayer(nn.Module): method forward (line 8) | def forward(self, x, nonpadding, cond=None, reverse=False): class CouplingLayer (line 13) | class CouplingLayer(nn.Module): method __init__ (line 14) | def __init__(self, c_in, hidden_size, kernel_size, n_layers, p_dropout... method forward (line 32) | def forward(self, x, nonpadding, cond=None, reverse=False): class ResFlow (line 42) | class ResFlow(nn.Module): method __init__ (line 43) | def __init__(self, method forward (line 58) | def forward(self, x, nonpadding, cond=None, reverse=False): FILE: NeuralSeq/modules/commons/normalizing_flow/utils.py function squeeze (line 4) | def squeeze(x, x_mask=None, n_sqz=2): function unsqueeze (line 19) | def unsqueeze(x, x_mask=None, n_sqz=2): FILE: NeuralSeq/modules/commons/rel_transformer.py function convert_pad_shape (line 11) | def convert_pad_shape(pad_shape): function shift_1d (line 17) | def shift_1d(x): function sequence_mask (line 22) | def sequence_mask(length, max_length=None): class Encoder (line 29) | class Encoder(nn.Module): method __init__ (line 30) | def __init__(self, hidden_channels, filter_channels, n_heads, n_layers... method forward (line 59) | def forward(self, x, x_mask): class MultiHeadAttention (line 86) | class MultiHeadAttention(nn.Module): method __init__ (line 87) | def __init__(self, channels, out_channels, n_heads, window_size=None, ... method forward (line 121) | def forward(self, x, c, attn_mask=None): method attention (line 131) | def attention(self, query, key, value, mask=None): method _matmul_with_relative_values (line 164) | def _matmul_with_relative_values(self, x, y): method _matmul_with_relative_keys (line 173) | def _matmul_with_relative_keys(self, x, y): method _get_relative_embeddings (line 182) | def _get_relative_embeddings(self, relative_embeddings, length): method _relative_position_to_absolute_position (line 197) | def _relative_position_to_absolute_position(self, x): method _absolute_position_to_relative_position (line 214) | def _absolute_position_to_relative_position(self, x): method _attention_bias_proximal (line 228) | def _attention_bias_proximal(self, length): class FFN (line 240) | class FFN(nn.Module): method __init__ (line 241) | def __init__(self, in_channels, out_channels, filter_channels, kernel_... method forward (line 254) | def forward(self, x, x_mask): class LayerNorm (line 265) | class LayerNorm(nn.Module): method __init__ (line 266) | def __init__(self, channels, eps=1e-4): method forward (line 274) | def forward(self, x): class ConvReluNorm (line 286) | class ConvReluNorm(nn.Module): method __init__ (line 287) | def __init__(self, in_channels, hidden_channels, out_channels, kernel_... method forward (line 311) | def forward(self, x, x_mask): class RelTransformerEncoder (line 321) | class RelTransformerEncoder(nn.Module): method __init__ (line 322) | def __init__(self, method forward (line 368) | def forward(self, x, x_mask=None): class Pooler (line 383) | class Pooler(nn.Module): method __init__ (line 392) | def __init__(self, pooler_type): method forward (line 397) | def forward(self, attention_mask, outputs): class Similarity (line 420) | class Similarity(nn.Module): method __init__ (line 425) | def __init__(self, temp): method forward (line 433) | def forward(self, x, y): class BertPredictionHeadTransform (line 451) | class BertPredictionHeadTransform(nn.Module): method __init__ (line 452) | def __init__(self, hidden_size): method forward (line 458) | def forward(self, hidden_states): class BertLMPredictionHead (line 465) | class BertLMPredictionHead(nn.Module): method __init__ (line 466) | def __init__(self, hid_dim, out_dim): method forward (line 473) | def forward(self, hidden_states): class BERTRelTransformerEncoder (line 483) | class BERTRelTransformerEncoder(nn.Module): method __init__ (line 484) | def __init__(self, method forward (line 582) | def forward(self, x, x_mask=None, bert_feats=None, ph2word=None, **kwa... FILE: NeuralSeq/modules/commons/ssim.py function gaussian (line 319) | def gaussian(window_size, sigma): function create_window (line 324) | def create_window(window_size, channel): function _ssim (line 331) | def _ssim(img1, img2, window, window_size, channel, size_average=True): class SSIM (line 354) | class SSIM(torch.nn.Module): method __init__ (line 355) | def __init__(self, window_size=11, size_average=True): method forward (line 362) | def forward(self, img1, img2): function ssim (line 383) | def ssim(img1, img2, window_size=11, size_average=True): FILE: NeuralSeq/modules/commons/transformer.py class SinusoidalPositionalEmbedding (line 13) | class SinusoidalPositionalEmbedding(nn.Module): method __init__ (line 19) | def __init__(self, embedding_dim, padding_idx, init_size=1024): method get_embedding (line 31) | def get_embedding(num_embeddings, embedding_dim, padding_idx=None): method forward (line 49) | def forward(self, input, incremental_state=None, timestep=None, positi... method max_positions (line 70) | def max_positions(self): class TransformerFFNLayer (line 75) | class TransformerFFNLayer(nn.Module): method __init__ (line 76) | def __init__(self, hidden_size, filter_size, padding="SAME", kernel_si... method forward (line 90) | def forward(self, x, incremental_state=None): method _get_input_buffer (line 114) | def _get_input_buffer(self, incremental_state): method _set_input_buffer (line 121) | def _set_input_buffer(self, incremental_state, buffer): method clear_buffer (line 129) | def clear_buffer(self, incremental_state): class MultiheadAttention (line 137) | class MultiheadAttention(nn.Module): method __init__ (line 138) | def __init__(self, embed_dim, num_heads, kdim=None, vdim=None, dropout... method reset_parameters (line 190) | def reset_parameters(self): method forward (line 207) | def forward( method in_proj_qkv (line 420) | def in_proj_qkv(self, query): method in_proj_q (line 423) | def in_proj_q(self, query): method in_proj_k (line 432) | def in_proj_k(self, key): method in_proj_v (line 442) | def in_proj_v(self, value): method _in_proj (line 452) | def _in_proj(self, input, start=0, end=None): method _get_input_buffer (line 460) | def _get_input_buffer(self, incremental_state): method _set_input_buffer (line 467) | def _set_input_buffer(self, incremental_state, buffer): method apply_sparse_mask (line 475) | def apply_sparse_mask(self, attn_weights, tgt_len, src_len, bsz): method clear_buffer (line 478) | def clear_buffer(self, incremental_state=None): class EncSALayer (line 488) | class EncSALayer(nn.Module): method __init__ (line 489) | def __init__(self, c, num_heads, dropout, attention_dropout=0.1, method forward (line 503) | def forward(self, x, encoder_padding_mask=None, **kwargs): class DecSALayer (line 530) | class DecSALayer(nn.Module): method __init__ (line 531) | def __init__(self, c, num_heads, dropout, attention_dropout=0.1, relu_... method forward (line 548) | def forward( method clear_buffer (line 608) | def clear_buffer(self, input, encoder_out=None, encoder_padding_mask=N... method set_buffer (line 612) | def set_buffer(self, name, tensor, incremental_state): class TransformerEncoderLayer (line 616) | class TransformerEncoderLayer(nn.Module): method __init__ (line 617) | def __init__(self, hidden_size, dropout, kernel_size=9, num_heads=2): method forward (line 627) | def forward(self, x, **kwargs): class TransformerDecoderLayer (line 631) | class TransformerDecoderLayer(nn.Module): method __init__ (line 632) | def __init__(self, hidden_size, dropout, kernel_size=9, num_heads=2): method forward (line 642) | def forward(self, x, **kwargs): method clear_buffer (line 645) | def clear_buffer(self, *args): method set_buffer (line 648) | def set_buffer(self, *args): class FFTBlocks (line 652) | class FFTBlocks(nn.Module): method __init__ (line 653) | def __init__(self, hidden_size, num_layers, ffn_kernel_size=9, dropout... method forward (line 681) | def forward(self, x, padding_mask=None, attn_mask=None, return_hiddens... class FastSpeechEncoder (line 709) | class FastSpeechEncoder(FFTBlocks): method __init__ (line 710) | def __init__(self, dict_size, hidden_size=256, num_layers=4, kernel_si... method forward (line 721) | def forward(self, txt_tokens, attn_mask=None): method forward_embedding (line 735) | def forward_embedding(self, txt_tokens): class FastSpeechDecoder (line 745) | class FastSpeechDecoder(FFTBlocks): method __init__ (line 746) | def __init__(self, hidden_size=256, num_layers=4, kernel_size=9, num_h... FILE: NeuralSeq/modules/commons/wavenet.py function fused_add_tanh_sigmoid_multiply (line 5) | def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): class WN (line 14) | class WN(torch.nn.Module): method __init__ (line 15) | def __init__(self, hidden_size, kernel_size, dilation_rate, n_layers, ... method forward (line 55) | def forward(self, x, nonpadding=None, cond=None): method remove_weight_norm (line 90) | def remove_weight_norm(self): FILE: NeuralSeq/modules/diff/candidate_decoder.py class SinusoidalPosEmb (line 14) | class SinusoidalPosEmb(nn.Module): method __init__ (line 15) | def __init__(self, dim): method forward (line 19) | def forward(self, x): function Conv1d (line 29) | def Conv1d(*args, **kwargs): class FFT (line 35) | class FFT(FastspeechDecoder): method __init__ (line 36) | def __init__(self, hidden_size=None, num_layers=None, kernel_size=None... method forward (line 50) | def forward(self, spec, diffusion_step, cond, padding_mask=None, attn_... FILE: NeuralSeq/modules/diff/diffusion.py function exists (line 19) | def exists(x): function default (line 23) | def default(val, d): function cycle (line 29) | def cycle(dl): function num_to_groups (line 35) | def num_to_groups(num, divisor): class Residual (line 44) | class Residual(nn.Module): method __init__ (line 45) | def __init__(self, fn): method forward (line 49) | def forward(self, x, *args, **kwargs): class SinusoidalPosEmb (line 53) | class SinusoidalPosEmb(nn.Module): method __init__ (line 54) | def __init__(self, dim): method forward (line 58) | def forward(self, x): class Mish (line 68) | class Mish(nn.Module): method forward (line 69) | def forward(self, x): class Upsample (line 73) | class Upsample(nn.Module): method __init__ (line 74) | def __init__(self, dim): method forward (line 78) | def forward(self, x): class Downsample (line 82) | class Downsample(nn.Module): method __init__ (line 83) | def __init__(self, dim): method forward (line 87) | def forward(self, x): class Rezero (line 91) | class Rezero(nn.Module): method __init__ (line 92) | def __init__(self, fn): method forward (line 97) | def forward(self, x): class Block (line 103) | class Block(nn.Module): method __init__ (line 104) | def __init__(self, dim, dim_out, groups=8): method forward (line 112) | def forward(self, x): class ResnetBlock (line 116) | class ResnetBlock(nn.Module): method __init__ (line 117) | def __init__(self, dim, dim_out, *, time_emb_dim, groups=8): method forward (line 128) | def forward(self, x, time_emb): class LinearAttention (line 135) | class LinearAttention(nn.Module): method __init__ (line 136) | def __init__(self, dim, heads=4, dim_head=32): method forward (line 143) | def forward(self, x): function extract (line 156) | def extract(a, t, x_shape): function noise_like (line 162) | def noise_like(shape, device, repeat=False): function cosine_beta_schedule (line 168) | def cosine_beta_schedule(timesteps, s=0.008): class GaussianDiffusion (line 181) | class GaussianDiffusion(nn.Module): method __init__ (line 182) | def __init__(self, phone_encoder, out_dims, denoise_fn, method q_mean_variance (line 233) | def q_mean_variance(self, x_start, t): method predict_start_from_noise (line 239) | def predict_start_from_noise(self, x_t, t, noise): method q_posterior (line 245) | def q_posterior(self, x_start, x_t, t): method p_mean_variance (line 254) | def p_mean_variance(self, x, t, cond, clip_denoised: bool): method p_sample (line 265) | def p_sample(self, x, t, cond, clip_denoised=True, repeat_noise=False): method q_sample (line 273) | def q_sample(self, x_start, t, noise=None): method p_losses (line 280) | def p_losses(self, x_start, t, cond, noise=None, nonpadding=None): method forward (line 300) | def forward(self, txt_tokens, mel2ph=None, spk_embed=None, method norm_spec (line 324) | def norm_spec(self, x): method denorm_spec (line 327) | def denorm_spec(self, x): method cwt2f0_norm (line 330) | def cwt2f0_norm(self, cwt_spec, mean, std, mel2ph): method out2mel (line 333) | def out2mel(self, x): FILE: NeuralSeq/modules/diff/net.py class AttrDict (line 16) | class AttrDict(dict): method __init__ (line 17) | def __init__(self, *args, **kwargs): method override (line 21) | def override(self, attrs): class SinusoidalPosEmb (line 32) | class SinusoidalPosEmb(nn.Module): method __init__ (line 33) | def __init__(self, dim): method forward (line 37) | def forward(self, x): function Conv1d (line 47) | def Conv1d(*args, **kwargs): function silu (line 54) | def silu(x): class ResidualBlock (line 58) | class ResidualBlock(nn.Module): method __init__ (line 59) | def __init__(self, encoder_hidden, residual_channels, dilation): method forward (line 66) | def forward(self, x, conditioner, diffusion_step): class DiffNet (line 81) | class DiffNet(nn.Module): method __init__ (line 82) | def __init__(self, in_dims=80): method forward (line 107) | def forward(self, spec, diffusion_step, cond): FILE: NeuralSeq/modules/diff/shallow_diffusion_tts.py function exists (line 20) | def exists(x): function default (line 24) | def default(val, d): function extract (line 32) | def extract(a, t, x_shape): function noise_like (line 38) | def noise_like(shape, device, repeat=False): function linear_beta_schedule (line 44) | def linear_beta_schedule(timesteps, max_beta=hparams.get('max_beta', 0.0... function cosine_beta_schedule (line 52) | def cosine_beta_schedule(timesteps, s=0.008): class GaussianDiffusion (line 71) | class GaussianDiffusion(nn.Module): method __init__ (line 72) | def __init__(self, phone_encoder, out_dims, denoise_fn, method q_mean_variance (line 128) | def q_mean_variance(self, x_start, t): method predict_start_from_noise (line 134) | def predict_start_from_noise(self, x_t, t, noise): method q_posterior (line 140) | def q_posterior(self, x_start, x_t, t): method p_mean_variance (line 149) | def p_mean_variance(self, x, t, cond, clip_denoised: bool): method p_sample (line 160) | def p_sample(self, x, t, cond, clip_denoised=True, repeat_noise=False): method p_sample_plms (line 169) | def p_sample_plms(self, x, t, interval, cond, clip_denoised=True, repe... method q_sample (line 206) | def q_sample(self, x_start, t, noise=None): method p_losses (line 213) | def p_losses(self, x_start, t, cond, noise=None, nonpadding=None): method forward (line 233) | def forward(self, txt_tokens, mel2ph=None, spk_embed=None, method norm_spec (line 279) | def norm_spec(self, x): method denorm_spec (line 282) | def denorm_spec(self, x): method cwt2f0_norm (line 285) | def cwt2f0_norm(self, cwt_spec, mean, std, mel2ph): method out2mel (line 288) | def out2mel(self, x): class OfflineGaussianDiffusion (line 292) | class OfflineGaussianDiffusion(GaussianDiffusion): method forward (line 293) | def forward(self, txt_tokens, mel2ph=None, spk_embed=None, FILE: NeuralSeq/modules/diffsinger_midi/fs2.py class FastspeechMIDIEncoder (line 11) | class FastspeechMIDIEncoder(FastspeechEncoder): method forward_embedding (line 12) | def forward_embedding(self, txt_tokens, midi_embedding, midi_dur_embed... method forward (line 25) | def forward(self, txt_tokens, midi_embedding, midi_dur_embedding, slur... class FastSpeech2MIDI (line 46) | class FastSpeech2MIDI(FastSpeech2): method __init__ (line 47) | def __init__(self, dictionary, out_dims=None): method forward (line 55) | def forward(self, txt_tokens, mel2ph=None, spk_embed=None, FILE: NeuralSeq/modules/fastspeech/fs2.py class FastSpeech2 (line 22) | class FastSpeech2(nn.Module): method __init__ (line 23) | def __init__(self, dictionary, out_dims=None): method build_embedding (line 74) | def build_embedding(self, dictionary, embed_dim): method forward (line 79) | def forward(self, txt_tokens, mel2ph=None, spk_embed=None, method add_dur (line 140) | def add_dur(self, dur_input, mel2ph, txt_tokens, ret): method add_energy (line 165) | def add_energy(self, decoder_inp, energy, ret): method add_pitch (line 174) | def add_pitch(self, decoder_inp, f0, uv, mel2ph, ret, encoder_out=None): method run_decoder (line 222) | def run_decoder(self, decoder_inp, tgt_nonpadding, ret, infer, **kwargs): method cwt2f0_norm (line 228) | def cwt2f0_norm(self, cwt_spec, mean, std, mel2ph): method out2mel (line 235) | def out2mel(self, out): method mel_norm (line 239) | def mel_norm(x): method mel_denorm (line 243) | def mel_denorm(x): method expand_states (line 246) | def expand_states(self, h, mel2ph): FILE: NeuralSeq/modules/fastspeech/pe.py class Prenet (line 7) | class Prenet(nn.Module): method __init__ (line 8) | def __init__(self, in_dim=80, out_dim=256, kernel=5, n_layers=3, strid... method forward (line 23) | def forward(self, x): class ConvBlock (line 44) | class ConvBlock(nn.Module): method __init__ (line 45) | def __init__(self, idim=80, n_chans=256, kernel_size=3, stride=1, norm... method forward (line 62) | def forward(self, x): class ConvStacks (line 81) | class ConvStacks(nn.Module): method __init__ (line 82) | def __init__(self, idim=80, n_layers=5, n_chans=256, odim=32, kernel_s... method forward (line 98) | def forward(self, x, return_hiddens=False): class PitchExtractor (line 119) | class PitchExtractor(nn.Module): method __init__ (line 120) | def __init__(self, n_mel_bins=80, conv_layers=2): method forward (line 135) | def forward(self, mel_input=None): FILE: NeuralSeq/modules/fastspeech/tts_modules.py class TransformerEncoderLayer (line 16) | class TransformerEncoderLayer(nn.Module): method __init__ (line 17) | def __init__(self, hidden_size, dropout, kernel_size=None, num_heads=2... method forward (line 30) | def forward(self, x, **kwargs): class LayerNorm (line 37) | class LayerNorm(torch.nn.LayerNorm): method __init__ (line 43) | def __init__(self, nout, dim=-1, eps=1e-5): method forward (line 48) | def forward(self, x): class DurationPredictor (line 59) | class DurationPredictor(torch.nn.Module): method __init__ (line 70) | def __init__(self, idim, odims = 1, n_layers=2, n_chans=384, kernel_si... method _forward (line 98) | def _forward(self, xs, x_masks=None, is_inference=False): method out2dur (line 114) | def out2dur(self, xs): method forward (line 125) | def forward(self, xs, x_masks=None): method inference (line 135) | def inference(self, xs, x_masks=None): class SyntaDurationPredictor (line 145) | class SyntaDurationPredictor(torch.nn.Module): method __init__ (line 146) | def __init__(self, idim, n_layers=2, n_chans=384, kernel_size=3, dropo... method forward (line 163) | def forward(self, x, x_padding=None, ph2word=None, graph_lst=None, ety... class LengthRegulator (line 179) | class LengthRegulator(torch.nn.Module): method __init__ (line 180) | def __init__(self, pad_value=0.0): method forward (line 184) | def forward(self, dur, dur_padding=None, alpha=1.0): class PitchPredictor (line 217) | class PitchPredictor(torch.nn.Module): method __init__ (line 218) | def __init__(self, idim, n_layers=5, n_chans=384, odim=2, kernel_size=5, method forward (line 247) | def forward(self, xs): class EnergyPredictor (line 263) | class EnergyPredictor(PitchPredictor): function mel2ph_to_dur (line 267) | def mel2ph_to_dur(mel2ph, T_txt, max_dur=None): class FFTBlocks (line 276) | class FFTBlocks(nn.Module): method __init__ (line 277) | def __init__(self, hidden_size, num_layers, ffn_kernel_size=9, dropout... method forward (line 307) | def forward(self, x, padding_mask=None, attn_mask=None, return_hiddens... class FastspeechEncoder (line 335) | class FastspeechEncoder(FFTBlocks): method __init__ (line 336) | def __init__(self, embed_tokens, hidden_size=None, num_layers=None, ke... method forward (line 352) | def forward(self, txt_tokens): method forward_embedding (line 365) | def forward_embedding(self, txt_tokens): class FastspeechDecoder (line 378) | class FastspeechDecoder(FFTBlocks): method __init__ (line 379) | def __init__(self, hidden_size=None, num_layers=None, kernel_size=None... FILE: NeuralSeq/modules/hifigan/hifigan.py function init_weights (line 14) | def init_weights(m, mean=0.0, std=0.01): function apply_weight_norm (line 20) | def apply_weight_norm(m): function get_padding (line 26) | def get_padding(kernel_size, dilation=1): class ResBlock1 (line 30) | class ResBlock1(torch.nn.Module): method __init__ (line 31) | def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)): method forward (line 54) | def forward(self, x): method remove_weight_norm (line 63) | def remove_weight_norm(self): class ResBlock2 (line 70) | class ResBlock2(torch.nn.Module): method __init__ (line 71) | def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)): method forward (line 82) | def forward(self, x): method remove_weight_norm (line 89) | def remove_weight_norm(self): class Conv1d1x1 (line 94) | class Conv1d1x1(Conv1d): method __init__ (line 97) | def __init__(self, in_channels, out_channels, bias): class HifiGanGenerator (line 104) | class HifiGanGenerator(torch.nn.Module): method __init__ (line 105) | def __init__(self, h, c_out=1): method forward (line 144) | def forward(self, x, f0=None): method remove_weight_norm (line 171) | def remove_weight_norm(self): class DiscriminatorP (line 181) | class DiscriminatorP(torch.nn.Module): method __init__ (line 182) | def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=... method forward (line 202) | def forward(self, x, mel): class MultiPeriodDiscriminator (line 226) | class MultiPeriodDiscriminator(torch.nn.Module): method __init__ (line 227) | def __init__(self, use_cond=False, c_in=1): method forward (line 237) | def forward(self, y, y_hat, mel=None): class DiscriminatorS (line 253) | class DiscriminatorS(torch.nn.Module): method __init__ (line 254) | def __init__(self, use_spectral_norm=False, use_cond=False, upsample_r... method forward (line 273) | def forward(self, x, mel): class MultiScaleDiscriminator (line 289) | class MultiScaleDiscriminator(torch.nn.Module): method __init__ (line 290) | def __init__(self, use_cond=False, c_in=1): method forward (line 309) | def forward(self, y, y_hat, mel=None): function feature_loss (line 328) | def feature_loss(fmap_r, fmap_g): function discriminator_loss (line 337) | def discriminator_loss(disc_real_outputs, disc_generated_outputs): function cond_discriminator_loss (line 350) | def cond_discriminator_loss(outputs): function generator_loss (line 359) | def generator_loss(disc_outputs): FILE: NeuralSeq/modules/hifigan/mel_utils.py function load_wav (line 10) | def load_wav(full_path): function dynamic_range_compression (line 15) | def dynamic_range_compression(x, C=1, clip_val=1e-5): function dynamic_range_decompression (line 19) | def dynamic_range_decompression(x, C=1): function dynamic_range_compression_torch (line 23) | def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): function dynamic_range_decompression_torch (line 27) | def dynamic_range_decompression_torch(x, C=1): function spectral_normalize_torch (line 31) | def spectral_normalize_torch(magnitudes): function spectral_de_normalize_torch (line 36) | def spectral_de_normalize_torch(magnitudes): function mel_spectrogram (line 45) | def mel_spectrogram(y, hparams, center=False, complex=False): FILE: NeuralSeq/modules/parallel_wavegan/layers/causal_conv.py class CausalConv1d (line 12) | class CausalConv1d(torch.nn.Module): method __init__ (line 15) | def __init__(self, in_channels, out_channels, kernel_size, method forward (line 23) | def forward(self, x): class CausalConvTranspose1d (line 36) | class CausalConvTranspose1d(torch.nn.Module): method __init__ (line 39) | def __init__(self, in_channels, out_channels, kernel_size, stride, bia... method forward (line 46) | def forward(self, x): FILE: NeuralSeq/modules/parallel_wavegan/layers/pqmf.py function design_prototype_filter (line 15) | def design_prototype_filter(taps=62, cutoff_ratio=0.15, beta=9.0): class PQMF (line 51) | class PQMF(torch.nn.Module): method __init__ (line 61) | def __init__(self, subbands=4, taps=62, cutoff_ratio=0.15, beta=9.0): method analysis (line 105) | def analysis(self, x): method synthesis (line 118) | def synthesis(self, x): FILE: NeuralSeq/modules/parallel_wavegan/layers/residual_block.py class Conv1d (line 15) | class Conv1d(torch.nn.Conv1d): method __init__ (line 18) | def __init__(self, *args, **kwargs): method reset_parameters (line 22) | def reset_parameters(self): class Conv1d1x1 (line 29) | class Conv1d1x1(Conv1d): method __init__ (line 32) | def __init__(self, in_channels, out_channels, bias): class ResidualBlock (line 39) | class ResidualBlock(torch.nn.Module): method __init__ (line 42) | def __init__(self, method forward (line 91) | def forward(self, x, c): FILE: NeuralSeq/modules/parallel_wavegan/layers/residual_stack.py class ResidualStack (line 13) | class ResidualStack(torch.nn.Module): method __init__ (line 16) | def __init__(self, method forward (line 65) | def forward(self, c): FILE: NeuralSeq/modules/parallel_wavegan/layers/tf_layers.py class TFReflectionPad1d (line 11) | class TFReflectionPad1d(tf.keras.layers.Layer): method __init__ (line 14) | def __init__(self, padding_size): method call (line 25) | def call(self, x): class TFConvTranspose1d (line 38) | class TFConvTranspose1d(tf.keras.layers.Layer): method __init__ (line 41) | def __init__(self, channels, kernel_size, stride, padding): method call (line 60) | def call(self, x): class TFResidualStack (line 74) | class TFResidualStack(tf.keras.layers.Layer): method __init__ (line 77) | def __init__(self, method call (line 115) | def call(self, x): FILE: NeuralSeq/modules/parallel_wavegan/layers/upsample.py class Stretch2d (line 16) | class Stretch2d(torch.nn.Module): method __init__ (line 19) | def __init__(self, x_scale, y_scale, mode="nearest"): method forward (line 33) | def forward(self, x): class Conv2d (line 47) | class Conv2d(torch.nn.Conv2d): method __init__ (line 50) | def __init__(self, *args, **kwargs): method reset_parameters (line 54) | def reset_parameters(self): class UpsampleNetwork (line 61) | class UpsampleNetwork(torch.nn.Module): method __init__ (line 64) | def __init__(self, method forward (line 106) | def forward(self, c): class ConvInUpsampleNetwork (line 125) | class ConvInUpsampleNetwork(torch.nn.Module): method __init__ (line 128) | def __init__(self, method forward (line 167) | def forward(self, c): FILE: NeuralSeq/modules/parallel_wavegan/losses/stft_loss.py function stft (line 12) | def stft(x, fft_size, hop_size, win_length, window): class SpectralConvergengeLoss (line 34) | class SpectralConvergengeLoss(torch.nn.Module): method __init__ (line 37) | def __init__(self): method forward (line 41) | def forward(self, x_mag, y_mag): class LogSTFTMagnitudeLoss (line 55) | class LogSTFTMagnitudeLoss(torch.nn.Module): method __init__ (line 58) | def __init__(self): method forward (line 62) | def forward(self, x_mag, y_mag): class STFTLoss (line 76) | class STFTLoss(torch.nn.Module): method __init__ (line 79) | def __init__(self, fft_size=1024, shift_size=120, win_length=600, wind... method forward (line 89) | def forward(self, x, y): class MultiResolutionSTFTLoss (line 109) | class MultiResolutionSTFTLoss(torch.nn.Module): method __init__ (line 112) | def __init__(self, method forward (line 132) | def forward(self, x, y): FILE: NeuralSeq/modules/parallel_wavegan/models/melgan.py class MelGANGenerator (line 18) | class MelGANGenerator(torch.nn.Module): method __init__ (line 21) | def __init__(self, method forward (line 147) | def forward(self, c): method remove_weight_norm (line 159) | def remove_weight_norm(self): method apply_weight_norm (line 170) | def apply_weight_norm(self): method reset_parameters (line 179) | def reset_parameters(self): class MelGANDiscriminator (line 194) | class MelGANDiscriminator(torch.nn.Module): method __init__ (line 197) | def __init__(self, method forward (line 285) | def forward(self, x): class MelGANMultiScaleDiscriminator (line 303) | class MelGANMultiScaleDiscriminator(torch.nn.Module): method __init__ (line 306) | def __init__(self, method forward (line 378) | def forward(self, x): method remove_weight_norm (line 395) | def remove_weight_norm(self): method apply_weight_norm (line 406) | def apply_weight_norm(self): method reset_parameters (line 415) | def reset_parameters(self): FILE: NeuralSeq/modules/parallel_wavegan/models/parallel_wavegan.py class ParallelWaveGANGenerator (line 21) | class ParallelWaveGANGenerator(torch.nn.Module): method __init__ (line 24) | def __init__(self, method forward (line 138) | def forward(self, x, c=None, pitch=None, **kwargs): method remove_weight_norm (line 173) | def remove_weight_norm(self): method apply_weight_norm (line 184) | def apply_weight_norm(self): method _get_receptive_field_size (line 194) | def _get_receptive_field_size(layers, stacks, kernel_size, method receptive_field_size (line 202) | def receptive_field_size(self): class ParallelWaveGANDiscriminator (line 207) | class ParallelWaveGANDiscriminator(torch.nn.Module): method __init__ (line 210) | def __init__(self, method forward (line 268) | def forward(self, x): method apply_weight_norm (line 282) | def apply_weight_norm(self): method remove_weight_norm (line 291) | def remove_weight_norm(self): class ResidualParallelWaveGANDiscriminator (line 303) | class ResidualParallelWaveGANDiscriminator(torch.nn.Module): method __init__ (line 306) | def __init__(self, method forward (line 392) | def forward(self, x): method apply_weight_norm (line 416) | def apply_weight_norm(self): method remove_weight_norm (line 425) | def remove_weight_norm(self): FILE: NeuralSeq/modules/parallel_wavegan/models/source.py class SineGen (line 7) | class SineGen(torch.nn.Module): method __init__ (line 25) | def __init__(self, samp_rate, harmonic_num=0, method _f02uv (line 38) | def _f02uv(self, f0): method _f02sine (line 44) | def _f02sine(self, f0_values): method forward (line 104) | def forward(self, f0): method __init__ (line 329) | def __init__(self, samp_rate, harmonic_num=0, method _f02uv (line 342) | def _f02uv(self, f0): method _f02sine (line 348) | def _f02sine(self, f0_values): method forward (line 408) | def forward(self, f0): class PulseGen (line 140) | class PulseGen(torch.nn.Module): method __init__ (line 146) | def __init__(self, samp_rate, pulse_amp = 0.1, method forward (line 158) | def forward(self, f0): class SignalsConv1d (line 205) | class SignalsConv1d(torch.nn.Module): method __init__ (line 213) | def __init__(self): method forward (line 216) | def forward(self, signal, system_ir): class CyclicNoiseGen_v1 (line 246) | class CyclicNoiseGen_v1(torch.nn.Module): method __init__ (line 252) | def __init__(self, samp_rate, method noise_decay (line 264) | def noise_decay(self, beta, f0mean): method forward (line 288) | def forward(self, f0s, beta): class SineGen (line 311) | class SineGen(torch.nn.Module): method __init__ (line 25) | def __init__(self, samp_rate, harmonic_num=0, method _f02uv (line 38) | def _f02uv(self, f0): method _f02sine (line 44) | def _f02sine(self, f0_values): method forward (line 104) | def forward(self, f0): method __init__ (line 329) | def __init__(self, samp_rate, harmonic_num=0, method _f02uv (line 342) | def _f02uv(self, f0): method _f02sine (line 348) | def _f02sine(self, f0_values): method forward (line 408) | def forward(self, f0): class SourceModuleCycNoise_v1 (line 444) | class SourceModuleCycNoise_v1(torch.nn.Module): method __init__ (line 460) | def __init__(self, sampling_rate, noise_std=0.003, voiced_threshod=0): method forward (line 467) | def forward(self, f0_upsamped, beta): class SourceModuleHnNSF (line 484) | class SourceModuleHnNSF(torch.nn.Module): method __init__ (line 503) | def __init__(self, sampling_rate, harmonic_num=0, sine_amp=0.1, method forward (line 518) | def forward(self, x): FILE: NeuralSeq/modules/parallel_wavegan/optimizers/radam.py class RAdam (line 14) | class RAdam(Optimizer): method __init__ (line 17) | def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weig... method __setstate__ (line 23) | def __setstate__(self, state): method step (line 27) | def step(self, closure=None): FILE: NeuralSeq/modules/parallel_wavegan/stft_loss.py class STFTLoss (line 13) | class STFTLoss(torch.nn.Module): method __init__ (line 16) | def __init__(self, fft_size=1024, shift_size=120, win_length=600, wind... method forward (line 29) | def forward(self, x, y): class MultiResolutionSTFTLoss (line 55) | class MultiResolutionSTFTLoss(torch.nn.Module): method __init__ (line 58) | def __init__(self, method forward (line 79) | def forward(self, x, y): FILE: NeuralSeq/modules/parallel_wavegan/utils/utils.py function find_files (line 17) | def find_files(root_dir, query="*.wav", include_root_dir=True): function read_hdf5 (line 39) | def read_hdf5(hdf5_name, hdf5_path): function write_hdf5 (line 66) | def write_hdf5(hdf5_name, hdf5_path, write_data, is_overwrite=True): class HDF5ScpLoader (line 109) | class HDF5ScpLoader(object): method __init__ (line 131) | def __init__(self, feats_scp, default_hdf5_path="feats"): method get_path (line 147) | def get_path(self, key): method __getitem__ (line 151) | def __getitem__(self, key): method __len__ (line 159) | def __len__(self): method __iter__ (line 163) | def __iter__(self): method keys (line 167) | def keys(self): FILE: NeuralSeq/modules/syntaspeech/multi_window_disc.py class SingleWindowDisc (line 6) | class SingleWindowDisc(nn.Module): method __init__ (line 7) | def __init__(self, time_length, freq_length=80, kernel=(3, 3), c_in=1,... method forward (line 32) | def forward(self, x): class MultiWindowDiscriminator (line 46) | class MultiWindowDiscriminator(nn.Module): method __init__ (line 47) | def __init__(self, time_lengths, freq_length=80, kernel=(3, 3), c_in=1... method forward (line 55) | def forward(self, x, x_len, start_frames_wins=None): method clip (line 81) | def clip(self, x, x_len, win_length, start_frames=None): class Discriminator (line 107) | class Discriminator(nn.Module): method __init__ (line 108) | def __init__(self, time_lengths=[32, 64, 128], freq_length=80, kernel=... method forward (line 120) | def forward(self, x, start_frames_wins=None): FILE: NeuralSeq/modules/syntaspeech/syntactic_graph_buider.py class Sentence2GraphParser (line 7) | class Sentence2GraphParser: method __init__ (line 8) | def __init__(self, language='zh', use_gpu=False, download=False): method parse (line 15) | def parse(self, clean_sentence=None, words=None, ph_words=None): method _parse_zh (line 26) | def _parse_zh(self, words, ph_words, enable_backward_edge=True, enable... method _parse_en (line 180) | def _parse_en(self, clean_sentence, enable_backward_edge=True, enable_... function plot_dgl_sentence_graph (line 267) | def plot_dgl_sentence_graph(dgl_graph, labels): FILE: NeuralSeq/modules/syntaspeech/syntactic_graph_encoder.py function sequence_mask (line 8) | def sequence_mask(lengths, maxlen, dtype=torch.bool): function group_hidden_by_segs (line 16) | def group_hidden_by_segs(h, seg_ids, max_len): class GraphAuxEnc (line 32) | class GraphAuxEnc(nn.Module): method __init__ (line 33) | def __init__(self, in_dim, hid_dim, out_dim, n_iterations=5, n_edge_ty... method ph_encoding_to_word_encoding (line 48) | def ph_encoding_to_word_encoding(ph_encoding, ph2word, word_len): method pad_word_encoding_to_phoneme (line 61) | def pad_word_encoding_to_phoneme(self, word_encoding, ph2word, t_p): method _process_ph_to_word_encoding (line 65) | def _process_ph_to_word_encoding(ph_encoding, ph2word, word_len=None): method _postprocess_word2ph (line 85) | def _postprocess_word2ph(word_encoding, ph2word, t_p): method _repeat_one_sequence (line 92) | def _repeat_one_sequence(x, d, T): method word_forward (line 103) | def word_forward(self, graph_lst, word_encoding, etypes_lst): method forward (line 135) | def forward(self, graph_lst, ph_encoding, ph2word, etypes_lst, return_... FILE: NeuralSeq/modules/syntaspeech/syntaspeech.py class SinusoidalPosEmb (line 17) | class SinusoidalPosEmb(nn.Module): method __init__ (line 18) | def __init__(self, dim): method forward (line 21) | def forward(self, x): class SyntaSpeech (line 36) | class SyntaSpeech(FastSpeech): method __init__ (line 37) | def __init__(self, ph_dict_size, word_dict_size, out_dims=None): method build_embedding (line 117) | def build_embedding(self, dictionary, embed_dim): method forward (line 122) | def forward(self, txt_tokens, word_tokens, ph2word, word_len, mel2word... method run_text_encoder (line 149) | def run_text_encoder(self, txt_tokens, word_tokens, ph2word, word_len,... method attention (line 194) | def attention(self, ph_encoder_out, enc_pos, word_encoder_out, dec_pos... method run_decoder (line 211) | def run_decoder(self, x, tgt_nonpadding, ret, infer, tgt_mels=None, gl... method forward_dur (line 234) | def forward_dur(self, dur_input, mel2word, ret, **kwargs): method get_pos_embed (line 259) | def get_pos_embed(self, word2word, x2word): method store_inverse_all (line 265) | def store_inverse_all(self): FILE: NeuralSeq/tasks/base_task.py class BaseDataset (line 30) | class BaseDataset(torch.utils.data.Dataset): method __init__ (line 31) | def __init__(self, shuffle): method _sizes (line 39) | def _sizes(self): method __getitem__ (line 42) | def __getitem__(self, index): method collater (line 45) | def collater(self, samples): method __len__ (line 48) | def __len__(self): method num_tokens (line 51) | def num_tokens(self, index): method size (line 54) | def size(self, index): method ordered_indices (line 60) | def ordered_indices(self): method num_workers (line 73) | def num_workers(self): class BaseTask (line 77) | class BaseTask(nn.Module): method __init__ (line 78) | def __init__(self, *args, **kwargs): method build_model (line 106) | def build_model(self): method load_ckpt (line 109) | def load_ckpt(self, ckpt_base_dir, current_model_name=None, model_name... method on_epoch_start (line 115) | def on_epoch_start(self): method _training_step (line 118) | def _training_step(self, sample, batch_idx, optimizer_idx): method training_step (line 127) | def training_step(self, sample, batch_idx, optimizer_idx=-1): method optimizer_step (line 157) | def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx): method on_epoch_end (line 163) | def on_epoch_end(self): method validation_step (line 169) | def validation_step(self, sample, batch_idx): method _validation_end (line 178) | def _validation_end(self, outputs): method validation_end (line 186) | def validation_end(self, outputs): method build_scheduler (line 196) | def build_scheduler(self, optimizer): method build_optimizer (line 199) | def build_optimizer(self, model): method configure_optimizers (line 202) | def configure_optimizers(self): method test_start (line 207) | def test_start(self): method test_step (line 210) | def test_step(self, sample, batch_idx): method test_end (line 213) | def test_end(self, outputs): method start (line 221) | def start(cls): method configure_ddp (line 261) | def configure_ddp(self, model, device_ids): method training_end (line 274) | def training_end(self, *args, **kwargs): method init_ddp_connection (line 277) | def init_ddp_connection(self, proc_rank, world_size): method train_dataloader (line 294) | def train_dataloader(self): method test_dataloader (line 298) | def test_dataloader(self): method val_dataloader (line 302) | def val_dataloader(self): method on_load_checkpoint (line 305) | def on_load_checkpoint(self, checkpoint): method on_save_checkpoint (line 308) | def on_save_checkpoint(self, checkpoint): method on_sanity_check_start (line 311) | def on_sanity_check_start(self): method on_train_start (line 314) | def on_train_start(self): method on_train_end (line 317) | def on_train_end(self): method on_batch_start (line 320) | def on_batch_start(self, batch): method on_batch_end (line 323) | def on_batch_end(self): method on_pre_performance_check (line 326) | def on_pre_performance_check(self): method on_post_performance_check (line 329) | def on_post_performance_check(self): method on_before_zero_grad (line 332) | def on_before_zero_grad(self, optimizer): method on_after_backward (line 335) | def on_after_backward(self): method backward (line 338) | def backward(self, loss, optimizer): method grad_norm (line 341) | def grad_norm(self, norm_type): FILE: NeuralSeq/tasks/run.py function run_task (line 5) | def run_task(): FILE: NeuralSeq/tasks/svs/diffsinger_task.py class DiffSingerTask (line 30) | class DiffSingerTask(DiffSpeechTask): method __init__ (line 31) | def __init__(self): method build_tts_model (line 40) | def build_tts_model(self): method validation_step (line 66) | def validation_step(self, sample, batch_idx): class ShallowDiffusionOfflineDataset (line 102) | class ShallowDiffusionOfflineDataset(FastSpeechDataset): method __getitem__ (line 103) | def __getitem__(self, index): method collater (line 114) | def collater(self, samples): class DiffSingerOfflineTask (line 121) | class DiffSingerOfflineTask(DiffSingerTask): method __init__ (line 122) | def __init__(self): method build_tts_model (line 126) | def build_tts_model(self): method run_model (line 140) | def run_model(self, model, sample, return_output=False, infer=False): method validation_step (line 172) | def validation_step(self, sample, batch_idx): method test_step (line 208) | def test_step(self, sample, batch_idx): class MIDIDataset (line 237) | class MIDIDataset(FastSpeechDataset): method __getitem__ (line 238) | def __getitem__(self, index): method collater (line 246) | def collater(self, samples): class OpencpopDataset (line 254) | class OpencpopDataset(FastSpeechDataset): method __getitem__ (line 255) | def __getitem__(self, index): method collater (line 264) | def collater(self, samples): class DiffSingerMIDITask (line 273) | class DiffSingerMIDITask(DiffSingerTask): method __init__ (line 274) | def __init__(self): method run_model (line 279) | def run_model(self, model, sample, return_output=False, infer=False): method validation_step (line 316) | def validation_step(self, sample, batch_idx): method add_dur_loss (line 351) | def add_dur_loss(self, dur_pred, mel2ph, txt_tokens, wdb, losses=None): class AuxDecoderMIDITask (line 392) | class AuxDecoderMIDITask(FastSpeech2Task): method __init__ (line 393) | def __init__(self): method build_tts_model (line 398) | def build_tts_model(self): method run_model (line 404) | def run_model(self, model, sample, return_output=False): method add_dur_loss (line 435) | def add_dur_loss(self, dur_pred, mel2ph, txt_tokens, wdb, losses=None): method validation_step (line 475) | def validation_step(self, sample, batch_idx): FILE: NeuralSeq/tasks/svs/diffspeech_task.py class DiffSpeechTask (line 17) | class DiffSpeechTask(DiffFsTask): method __init__ (line 18) | def __init__(self): method build_tts_model (line 23) | def build_tts_model(self): method build_optimizer (line 40) | def build_optimizer(self, model): method run_model (line 48) | def run_model(self, model, sample, return_output=False, infer=False): method validation_step (line 80) | def validation_step(self, sample, batch_idx): method plot_wav (line 112) | def plot_wav(self, batch_idx, gt_wav, wav_out, is_mel=False, gt_f0=Non... FILE: NeuralSeq/tasks/svs/task.py class DiffFsTask (line 15) | class DiffFsTask(FastSpeech2Task): method build_tts_model (line 16) | def build_tts_model(self): method run_model (line 26) | def run_model(self, model, sample, return_output=False, infer=False): method _training_step (line 56) | def _training_step(self, sample, batch_idx, _): method validation_step (line 63) | def validation_step(self, sample, batch_idx): method build_scheduler (line 75) | def build_scheduler(self, optimizer): method optimizer_step (line 78) | def optimizer_step(self, epoch, batch_idx, optimizer, optimizer_idx): FILE: NeuralSeq/tasks/tts/dataset_utils.py class BaseTTSDataset (line 19) | class BaseTTSDataset(BaseDataset): method __init__ (line 20) | def __init__(self, prefix, shuffle=False, test_items=None, test_sizes=... method _get_item (line 52) | def _get_item(self, index): method __getitem__ (line 59) | def __getitem__(self, index): method collater (line 82) | def collater(self, samples): class FastSpeechDataset (line 114) | class FastSpeechDataset(BaseTTSDataset): method __init__ (line 115) | def __init__(self, prefix, shuffle=False, test_items=None, test_sizes=... method __getitem__ (line 137) | def __getitem__(self, index): method collater (line 187) | def collater(self, samples): method load_test_inputs (line 212) | def load_test_inputs(self): class FastSpeechWordDataset (line 238) | class FastSpeechWordDataset(FastSpeechDataset): method __getitem__ (line 239) | def __getitem__(self, index): method collater (line 249) | def collater(self, samples): FILE: NeuralSeq/tasks/tts/fs2.py class FastSpeech2Task (line 27) | class FastSpeech2Task(TtsTask): method __init__ (line 28) | def __init__(self): method train_dataloader (line 47) | def train_dataloader(self): method val_dataloader (line 53) | def val_dataloader(self): method test_dataloader (line 58) | def test_dataloader(self): method build_tts_model (line 63) | def build_tts_model(self): method build_model (line 66) | def build_model(self): method _training_step (line 73) | def _training_step(self, sample, batch_idx, _): method validation_step (line 79) | def validation_step(self, sample, batch_idx): method _validation_end (line 96) | def _validation_end(self, outputs): method run_model (line 109) | def run_model(self, model, sample, return_output=False): method add_mel_loss (line 141) | def add_mel_loss(self, mel_out, target, losses, postfix='', mel_mix_lo... method l1_loss (line 156) | def l1_loss(self, decoder_output, target): method ssim_loss (line 164) | def ssim_loss(self, decoder_output, target, bias=6.0): method add_dur_loss (line 175) | def add_dur_loss(self, dur_pred, mel2ph, txt_tokens, losses=None): method add_pitch_loss (line 219) | def add_pitch_loss(self, output, sample, losses): method add_f0_loss (line 252) | def add_f0_loss(self, p_pred, f0, uv, losses, nonpadding): method cwt_loss (line 269) | def cwt_loss(self, cwt_p, cwt_g): method add_energy_loss (line 277) | def add_energy_loss(self, energy_pred, energy, losses): method plot_mel (line 287) | def plot_mel(self, batch_idx, spec, spec_out, name=None): method plot_dur (line 294) | def plot_dur(self, batch_idx, sample, model_out): method plot_pitch (line 303) | def plot_pitch(self, batch_idx, sample, model_out): method test_step (line 338) | def test_step(self, sample, batch_idx): method after_infer (line 369) | def after_infer(self, predictions): method save_result (line 459) | def save_result(wav_out, mel, prefix, item_name, text, gen_dir, str_ph... method expand_f0_ph (line 501) | def expand_f0_ph(f0, mel2ph): FILE: NeuralSeq/tasks/tts/fs2_adv.py class FastSpeech2AdvTask (line 11) | class FastSpeech2AdvTask(FastSpeech2Task): method build_model (line 12) | def build_model(self): method build_disc_model (line 22) | def build_disc_model(self): method _training_step (line 32) | def _training_step(self, sample, batch_idx, optimizer_idx): method configure_optimizers (line 95) | def configure_optimizers(self): method build_scheduler (line 111) | def build_scheduler(self, optimizer): method on_before_optimization (line 119) | def on_before_optimization(self, opt_idx): method on_after_optimization (line 125) | def on_after_optimization(self, epoch, batch_idx, optimizer, optimizer... FILE: NeuralSeq/tasks/tts/fs2_utils.py class FastSpeechDataset (line 23) | class FastSpeechDataset(BaseDataset): method __init__ (line 24) | def __init__(self, prefix, shuffle=False): method _get_item (line 53) | def _get_item(self, index): method __getitem__ (line 60) | def __getitem__(self, index): method collater (line 105) | def collater(self, samples): method load_test_inputs (line 154) | def load_test_inputs(self, test_input_dir, spk_id=0): FILE: NeuralSeq/tasks/tts/pe.py class PeDataset (line 18) | class PeDataset(BaseDataset): method __init__ (line 19) | def __init__(self, prefix, shuffle=False): method _get_item (line 41) | def _get_item(self, index): method __getitem__ (line 48) | def __getitem__(self, index): method collater (line 70) | def collater(self, samples): class PitchExtractionTask (line 101) | class PitchExtractionTask(FastSpeech2Task): method __init__ (line 102) | def __init__(self): method build_tts_model (line 106) | def build_tts_model(self): method _training_step (line 111) | def _training_step(self, sample, batch_idx, _): method validation_step (line 117) | def validation_step(self, sample, batch_idx): method run_model (line 128) | def run_model(self, model, sample, return_output=False, infer=False): method plot_pitch (line 139) | def plot_pitch(self, batch_idx, model_out, sample): method add_pitch_loss (line 146) | def add_pitch_loss(self, output, sample, losses): FILE: NeuralSeq/tasks/tts/ps.py class PortaSpeechTask (line 18) | class PortaSpeechTask(FastSpeech2Task): method __init__ (line 19) | def __init__(self): method build_tts_model (line 24) | def build_tts_model(self): method on_train_start (line 29) | def on_train_start(self): method run_model (line 37) | def run_model(self, sample, infer=False, *args, **kwargs): method add_dur_loss (line 85) | def add_dur_loss(self, dur_pred, mel2token, word_len, txt_tokens, loss... method validation_step (line 101) | def validation_step(self, sample, batch_idx): method save_valid_result (line 104) | def save_valid_result(self, sample, batch_idx, model_out): method get_attn_stats (line 109) | def get_attn_stats(self, attn, sample, logging_outputs, prefix=''): method get_plot_dur_info (line 127) | def get_plot_dur_info(self, sample, model_out): method build_optimizer (line 141) | def build_optimizer(self, model): method build_scheduler (line 149) | def build_scheduler(self, optimizer): method test_start (line 155) | def test_start(self): method test_step (line 161) | def test_step(self, sample, batch_idx): FILE: NeuralSeq/tasks/tts/ps_adv.py class PortaSpeechAdvTask (line 21) | class PortaSpeechAdvTask(FastSpeech2Task): method __init__ (line 22) | def __init__(self): method build_tts_model (line 29) | def build_tts_model(self): method build_disc_model (line 42) | def build_disc_model(self): method on_train_start (line 51) | def on_train_start(self): method _training_step (line 59) | def _training_step(self, sample, batch_idx, optimizer_idx): method run_model (line 104) | def run_model(self, sample, infer=False, *args, **kwargs): method add_dur_loss (line 162) | def add_dur_loss(self, dur_pred, mel2token, word_len, txt_tokens, loss... method validation_step (line 192) | def validation_step(self, sample, batch_idx): method save_valid_result (line 213) | def save_valid_result(self, sample, batch_idx, model_out): method get_attn_stats (line 253) | def get_attn_stats(self, attn, sample, logging_outputs, prefix=''): method get_plot_dur_info (line 271) | def get_plot_dur_info(self, sample, model_out): method build_optimizer (line 285) | def build_optimizer(self, model): method build_scheduler (line 301) | def build_scheduler(self, optimizer): method on_before_optimization (line 308) | def on_before_optimization(self, opt_idx): method on_after_optimization (line 319) | def on_after_optimization(self, epoch, batch_idx, optimizer, optimizer... method test_start (line 327) | def test_start(self): method test_step (line 333) | def test_step(self, sample, batch_idx): FILE: NeuralSeq/tasks/tts/ps_flow.py class PortaSpeechFlowTask (line 9) | class PortaSpeechFlowTask(PortaSpeechTask): method __init__ (line 10) | def __init__(self): method build_tts_model (line 14) | def build_tts_model(self): method _training_step (line 19) | def _training_step(self, sample, batch_idx, opt_idx): method run_model (line 32) | def run_model(self, sample, infer=False, *args, **kwargs): method validation_step (line 94) | def validation_step(self, sample, batch_idx): method save_valid_result (line 99) | def save_valid_result(self, sample, batch_idx, model_out): method build_optimizer (line 113) | def build_optimizer(self, model): method build_scheduler (line 134) | def build_scheduler(self, optimizer): FILE: NeuralSeq/tasks/tts/synta.py class SyntaSpeechTask (line 11) | class SyntaSpeechTask(PortaSpeechAdvTask): method build_tts_model (line 12) | def build_tts_model(self): FILE: NeuralSeq/tasks/tts/tts.py class TtsTask (line 28) | class TtsTask(BaseTask): method __init__ (line 29) | def __init__(self, *args, **kwargs): method build_scheduler (line 40) | def build_scheduler(self, optimizer): method build_optimizer (line 43) | def build_optimizer(self, model): method build_dataloader (line 49) | def build_dataloader(self, dataset, shuffle, max_tokens=None, max_sent... method build_phone_encoder (line 95) | def build_phone_encoder(self, data_dir): method build_optimizer (line 101) | def build_optimizer(self, model): method test_start (line 107) | def test_start(self): method test_end (line 115) | def test_end(self, outputs): method weights_nonzero_speech (line 124) | def weights_nonzero_speech(self, target): FILE: NeuralSeq/tasks/tts/tts_base.py class TTSBaseTask (line 34) | class TTSBaseTask(BaseTask): method __init__ (line 35) | def __init__(self, *args, **kwargs): method train_dataloader (line 56) | def train_dataloader(self): method val_dataloader (line 80) | def val_dataloader(self): method test_dataloader (line 85) | def test_dataloader(self): method build_dataloader (line 92) | def build_dataloader(self, dataset, shuffle, max_tokens=None, max_sent... method build_phone_encoder (line 138) | def build_phone_encoder(self, data_dir): method build_scheduler (line 143) | def build_scheduler(self, optimizer): method build_optimizer (line 149) | def build_optimizer(self, model): method plot_mel (line 157) | def plot_mel(self, batch_idx, spec, spec_out, name=None): method test_start (line 164) | def test_start(self): method after_infer (line 173) | def after_infer(self, predictions, sil_start_frame=0): method save_result (line 247) | def save_result(wav_out, mel, base_fn, gen_dir, str_phs=None, mel2ph=N... method test_end (line 284) | def test_end(self, outputs): method weights_nonzero_speech (line 294) | def weights_nonzero_speech(self, target): method make_stop_target (line 300) | def make_stop_target(self, target): FILE: NeuralSeq/tasks/tts/tts_utils.py function parse_dataset_configs (line 9) | def parse_dataset_configs(): function parse_mel_losses (line 21) | def parse_mel_losses(): function load_data_preprocessor (line 37) | def load_data_preprocessor(): function load_data_binarizer (line 47) | def load_data_binarizer(): FILE: NeuralSeq/tasks/vocoder/dataset_utils.py class EndlessDistributedSampler (line 15) | class EndlessDistributedSampler(DistributedSampler): method __init__ (line 16) | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): method __iter__ (line 42) | def __iter__(self): method __len__ (line 45) | def __len__(self): class VocoderDataset (line 49) | class VocoderDataset(BaseDataset): method __init__ (line 50) | def __init__(self, prefix, shuffle=False): method _get_item (line 74) | def _get_item(self, index): method __getitem__ (line 80) | def __getitem__(self, index): method collater (line 100) | def collater(self, batch): method _assert_ready_for_upsampling (line 163) | def _assert_ready_for_upsampling(x, c, hop_size, context_window): method load_test_inputs (line 167) | def load_test_inputs(self, test_input_dir, spk_id=0): method load_mel_inputs (line 186) | def load_mel_inputs(self, test_input_dir, spk_id=0): FILE: NeuralSeq/tasks/vocoder/vocoder_base.py class VocoderBaseTask (line 13) | class VocoderBaseTask(BaseTask): method __init__ (line 14) | def __init__(self): method train_dataloader (line 23) | def train_dataloader(self): method val_dataloader (line 28) | def val_dataloader(self): method test_dataloader (line 33) | def test_dataloader(self): method build_dataloader (line 37) | def build_dataloader(self, dataset, shuffle, max_sentences, endless=Fa... method test_start (line 60) | def test_start(self): method test_end (line 65) | def test_end(self, outputs): FILE: NeuralSeq/utils/__init__.py function tensors_to_scalars (line 17) | def tensors_to_scalars(metrics): class AvgrageMeter (line 28) | class AvgrageMeter(object): method __init__ (line 30) | def __init__(self): method reset (line 33) | def reset(self): method update (line 38) | def update(self, val, n=1): function collate_1d (line 44) | def collate_1d(values, pad_idx=0, left_pad=False, shift_right=False, max... function collate_2d (line 62) | def collate_2d(values, pad_idx=0, left_pad=False, shift_right=False, max... function _is_batch_full (line 79) | def _is_batch_full(batch, num_tokens, max_tokens, max_sentences): function batch_by_size (line 89) | def batch_by_size( function make_positions (line 145) | def make_positions(tensor, padding_idx): function softmax (line 160) | def softmax(x, dim): function unpack_dict_to_list (line 164) | def unpack_dict_to_list(samples): function load_ckpt (line 178) | def load_ckpt(cur_model, ckpt_base_dir, prefix_in_ckpt='model', force=Tr... function remove_padding (line 212) | def remove_padding(x, padding_idx=0): class Timer (line 222) | class Timer: method __init__ (line 225) | def __init__(self, name, print_time=False): method __enter__ (line 231) | def __enter__(self): method __exit__ (line 234) | def __exit__(self, exc_type, exc_val, exc_tb): function print_arch (line 240) | def print_arch(model, model_name='model'): function num_params (line 245) | def num_params(model, print_out=True, model_name="model"): FILE: NeuralSeq/utils/audio.py function save_wav (line 12) | def save_wav(wav, path, sr, norm=False): function get_hop_size (line 20) | def get_hop_size(hparams): function _stft (line 29) | def _stft(y, hparams): function _istft (line 34) | def _istft(y, hparams): function librosa_pad_lr (line 38) | def librosa_pad_lr(x, fsize, fshift, pad_sides=1): function amp_to_db (line 51) | def amp_to_db(x): function normalize (line 55) | def normalize(S, hparams): function denormalize (line 58) | def denormalize(D, hparams): function rnnoise (line 60) | def rnnoise(filename, out_fn=None, verbose=False, out_sample_rate=22050): FILE: NeuralSeq/utils/ckpt_utils.py function get_last_checkpoint (line 8) | def get_last_checkpoint(work_dir, steps=None): function get_all_ckpts (line 19) | def get_all_ckpts(work_dir, steps=None): function load_ckpt (line 28) | def load_ckpt(cur_model, ckpt_base_dir, model_name='model', force=True, ... FILE: NeuralSeq/utils/cwt.py function load_wav (line 7) | def load_wav(wav_file, sr): function convert_continuos_f0 (line 12) | def convert_continuos_f0(f0): function get_cont_lf0 (line 46) | def get_cont_lf0(f0, frame_period=5.0): function get_lf0_cwt (line 53) | def get_lf0_cwt(lf0): function norm_scale (line 72) | def norm_scale(Wavelet_lf0): function normalize_cwt_lf0 (line 80) | def normalize_cwt_lf0(f0, mean, std): function get_lf0_cwt_norm (line 89) | def get_lf0_cwt_norm(f0s, mean, std): function inverse_cwt_torch (line 118) | def inverse_cwt_torch(Wavelet_lf0, scales): function inverse_cwt (line 127) | def inverse_cwt(Wavelet_lf0, scales): function cwt2f0 (line 135) | def cwt2f0(cwt_spec, mean, std, cwt_scales): FILE: NeuralSeq/utils/dtw.py function dtw (line 6) | def dtw(x, y, dist, warp=1, w=inf, s=1.0): function accelerated_dtw (line 58) | def accelerated_dtw(x, y, dist, warp=1): function _traceback (line 100) | def _traceback(D): FILE: NeuralSeq/utils/hparams.py class Args (line 9) | class Args: method __init__ (line 10) | def __init__(self, **kwargs): function override_config (line 15) | def override_config(old_config: dict, new_config: dict): function set_hparams (line 23) | def set_hparams(config='', exp_name='', hparams_str='', print_hparams=Tr... FILE: NeuralSeq/utils/indexed_datasets.py class IndexedDataset (line 7) | class IndexedDataset: method __init__ (line 8) | def __init__(self, path, num_cache=1): method check_index (line 17) | def check_index(self, i): method __del__ (line 21) | def __del__(self): method __getitem__ (line 25) | def __getitem__(self, i): method __len__ (line 38) | def __len__(self): class IndexedDatasetBuilder (line 41) | class IndexedDatasetBuilder: method __init__ (line 42) | def __init__(self, path): method add_item (line 47) | def add_item(self, item): method finalize (line 52) | def finalize(self): FILE: NeuralSeq/utils/multiprocess_utils.py function chunked_worker (line 6) | def chunked_worker(worker_id, map_func, args, results_queue=None, init_c... function chunked_multiprocess_run (line 19) | def chunked_multiprocess_run(map_func, args, num_workers=None, ordered=T... function multiprocess_run_tqdm (line 49) | def multiprocess_run_tqdm(map_func, args, num_workers=None, ordered=True... FILE: NeuralSeq/utils/os_utils.py function link_file (line 5) | def link_file(from_file, to_file): function move_file (line 10) | def move_file(from_file, to_file): function copy_file (line 14) | def copy_file(from_file, to_file): function remove_file (line 18) | def remove_file(*fns): FILE: NeuralSeq/utils/pitch_utils.py function f0_to_coarse (line 22) | def f0_to_coarse(f0): function norm_f0 (line 34) | def norm_f0(f0, uv, hparams): function norm_interp_f0 (line 45) | def norm_interp_f0(f0, hparams): function denorm_f0 (line 63) | def denorm_f0(f0, uv, hparams, pitch_padding=None, min=None, max=None): FILE: NeuralSeq/utils/pl_utils.py function get_a_var (line 32) | def get_a_var(obj): # pragma: no cover function data_loader (line 47) | def data_loader(fn): function parallel_apply (line 80) | def parallel_apply(modules, inputs, kwargs_tup=None, devices=None): # p... function _find_tensors (line 166) | def _find_tensors(obj): # pragma: no cover class DDP (line 179) | class DDP(DistributedDataParallel): method parallel_apply (line 184) | def parallel_apply(self, replicas, inputs, kwargs): method forward (line 187) | def forward(self, *inputs, **kwargs): # pragma: no cover class DP (line 224) | class DP(DataParallel): method forward (line 229) | def forward(self, *inputs, **kwargs): method parallel_apply (line 253) | def parallel_apply(self, replicas, inputs, kwargs): class GradientAccumulationScheduler (line 257) | class GradientAccumulationScheduler: method __init__ (line 258) | def __init__(self, scheduling: dict): method on_epoch_begin (line 276) | def on_epoch_begin(self, epoch, trainer): class LatestModelCheckpoint (line 284) | class LatestModelCheckpoint(ModelCheckpoint): method __init__ (line 285) | def __init__(self, filepath, monitor='val_loss', verbose=0, num_ckpt_k... method get_all_ckpts (line 323) | def get_all_ckpts(self): method on_epoch_end (line 327) | def on_epoch_end(self, epoch, logs=None): class BaseTrainer (line 354) | class BaseTrainer: method __init__ (line 355) | def __init__( method num_gpus (line 458) | def num_gpus(self): method data_parallel (line 466) | def data_parallel(self): method get_model (line 469) | def get_model(self): method fit (line 477) | def fit(self, model): method init_optimizers (line 492) | def init_optimizers(self, optimizers): method run_pretrain_routine (line 507) | def run_pretrain_routine(self, model): method test (line 584) | def test(self, model): method training_tqdm_dict (line 589) | def training_tqdm_dict(self): method restore_weights (line 599) | def restore_weights(self, model): method restore_state_if_checkpoint_exists (line 628) | def restore_state_if_checkpoint_exists(self, model): method restore (line 661) | def restore(self, checkpoint_path, on_gpu): method restore_training_state (line 683) | def restore_training_state(self, checkpoint): method _atomic_save (line 722) | def _atomic_save(self, checkpoint, filepath): method save_checkpoint (line 739) | def save_checkpoint(self, filepath): method dump_checkpoint (line 743) | def dump_checkpoint(self): method copy_trainer_model_properties (line 776) | def copy_trainer_model_properties(self, model): method transfer_batch_to_gpu (line 792) | def transfer_batch_to_gpu(self, batch, gpu_id): method set_distributed_mode (line 823) | def set_distributed_mode(self, distributed_backend): method ddp_train (line 847) | def ddp_train(self, gpu_idx, model): method resolve_root_node_address (line 905) | def resolve_root_node_address(self, root_node): method log_metrics (line 917) | def log_metrics(self, metrics, grad_norm_dic, step=None): method add_tqdm_metrics (line 938) | def add_tqdm_metrics(self, metrics): method metrics_to_scalars (line 945) | def metrics_to_scalars(self, metrics): method process_output (line 958) | def process_output(self, output, train=False): method reduce_distributed_output (line 1050) | def reduce_distributed_output(self, output, num_gpus): method clip_gradients (line 1074) | def clip_gradients(self): method print_nan_gradients (line 1079) | def print_nan_gradients(self): method configure_accumulated_gradients (line 1085) | def configure_accumulated_gradients(self, accumulate_grad_batches): method get_dataloaders (line 1096) | def get_dataloaders(self, model): method init_train_dataloader (line 1111) | def init_train_dataloader(self, model): method init_val_dataloader (line 1127) | def init_val_dataloader(self, model): method init_test_dataloader (line 1137) | def init_test_dataloader(self, model): method evaluate (line 1146) | def evaluate(self, model, dataloaders, max_batches, test=False): method run_evaluation (line 1221) | def run_evaluation(self, test=False): method evaluation_forward (line 1281) | def evaluation_forward(self, model, batch, batch_idx, dataloader_idx, ... method train (line 1313) | def train(self): method run_training_epoch (line 1371) | def run_training_epoch(self): method run_training_batch (line 1436) | def run_training_batch(self, batch, batch_idx): method training_forward (line 1564) | def training_forward(self, batch, batch_idx, opt_idx, hiddens): method is_function_implemented (line 1606) | def is_function_implemented(self, f_name): method _percent_range_check (line 1611) | def _percent_range_check(self, name): FILE: NeuralSeq/utils/plot.py function spec_to_figure (line 8) | def spec_to_figure(spec, vmin=None, vmax=None): function spec_f0_to_figure (line 16) | def spec_f0_to_figure(spec, f0s, figsize=None): function dur_to_figure (line 30) | def dur_to_figure(dur_gt, dur_pred, txt): function f0_to_figure (line 45) | def f0_to_figure(f0_gt, f0_cwt=None, f0_pred=None): FILE: NeuralSeq/utils/text_encoder.py function strip_ids (line 28) | def strip_ids(ids, ids_to_strip): class TextEncoder (line 36) | class TextEncoder(object): method __init__ (line 39) | def __init__(self, num_reserved_ids=NUM_RESERVED_TOKENS): method num_reserved_ids (line 43) | def num_reserved_ids(self): method encode (line 46) | def encode(self, s): method decode (line 62) | def decode(self, ids, strip_extraneous=False): method decode_list (line 79) | def decode_list(self, ids): method vocab_size (line 101) | def vocab_size(self): class ByteTextEncoder (line 105) | class ByteTextEncoder(TextEncoder): method encode (line 108) | def encode(self, s): method decode (line 117) | def decode(self, ids, strip_extraneous=False): method decode_list (line 133) | def decode_list(self, ids): method vocab_size (line 146) | def vocab_size(self): class ByteTextEncoderWithEos (line 150) | class ByteTextEncoderWithEos(ByteTextEncoder): method encode (line 153) | def encode(self, s): class TokenTextEncoder (line 157) | class TokenTextEncoder(TextEncoder): method __init__ (line 160) | def __init__(self, method encode (line 197) | def encode(self, s): method decode (line 207) | def decode(self, ids, strip_eos=False, strip_padding=False): method decode_list (line 216) | def decode_list(self, ids): method vocab_size (line 221) | def vocab_size(self): method __len__ (line 224) | def __len__(self): method _safe_id_to_token (line 227) | def _safe_id_to_token(self, idx): method _init_vocab_from_file (line 230) | def _init_vocab_from_file(self, filename): method _init_vocab_from_list (line 245) | def _init_vocab_from_list(self, vocab_list): method _init_vocab (line 261) | def _init_vocab(self, token_generator, add_reserved_tokens=True): method pad (line 278) | def pad(self): method eos (line 281) | def eos(self): method unk (line 284) | def unk(self): method seg (line 287) | def seg(self): method store_to_file (line 290) | def store_to_file(self, filename): method sil_phonemes (line 303) | def sil_phonemes(self): FILE: NeuralSeq/utils/text_norm.py class ChineseChar (line 57) | class ChineseChar(object): method __init__ (line 65) | def __init__(self, simplified, traditional): method __str__ (line 70) | def __str__(self): method __repr__ (line 73) | def __repr__(self): class ChineseNumberUnit (line 77) | class ChineseNumberUnit(ChineseChar): method __init__ (line 84) | def __init__(self, power, simplified, traditional, big_s, big_t): method __str__ (line 90) | def __str__(self): method create (line 94) | def create(cls, index, value, numbering_type=NUMBERING_TYPES[1], small... class ChineseNumberDigit (line 113) | class ChineseNumberDigit(ChineseChar): method __init__ (line 118) | def __init__(self, value, simplified, traditional, big_s, big_t, alt_s... method __str__ (line 126) | def __str__(self): method create (line 130) | def create(cls, i, v): class ChineseMath (line 134) | class ChineseMath(ChineseChar): method __init__ (line 139) | def __init__(self, simplified, traditional, symbol, expression=None): class NumberSystem (line 150) | class NumberSystem(object): class MathSymbol (line 157) | class MathSymbol(object): method __init__ (line 165) | def __init__(self, positive, negative, point): method __iter__ (line 170) | def __iter__(self): function create_system (line 191) | def create_system(numbering_type=NUMBERING_TYPES[1]): function chn2num (line 233) | def chn2num(chinese_string, numbering_type=NUMBERING_TYPES[1]): function num2chn (line 319) | def num2chn(number_string, numbering_type=NUMBERING_TYPES[1], big=False, class Cardinal (line 419) | class Cardinal: method __init__ (line 424) | def __init__(self, cardinal=None, chntext=None): method chntext2cardinal (line 428) | def chntext2cardinal(self): method cardinal2chntext (line 431) | def cardinal2chntext(self): class Digit (line 435) | class Digit: method __init__ (line 440) | def __init__(self, digit=None, chntext=None): method digit2chntext (line 447) | def digit2chntext(self): class TelePhone (line 451) | class TelePhone: method __init__ (line 456) | def __init__(self, telephone=None, raw_chntext=None, chntext=None): method telephone2chntext (line 468) | def telephone2chntext(self, fixed=False): class Fraction (line 485) | class Fraction: method __init__ (line 490) | def __init__(self, fraction=None, chntext=None): method chntext2fraction (line 494) | def chntext2fraction(self): method fraction2chntext (line 498) | def fraction2chntext(self): class Date (line 503) | class Date: method __init__ (line 508) | def __init__(self, date=None, chntext=None): method date2chntext (line 536) | def date2chntext(self): class Money (line 561) | class Money: method __init__ (line 566) | def __init__(self, money=None, chntext=None): method money2chntext (line 573) | def money2chntext(self): class Percentage (line 584) | class Percentage: method __init__ (line 589) | def __init__(self, percentage=None, chntext=None): method chntext2percentage (line 593) | def chntext2percentage(self): method percentage2chntext (line 596) | def percentage2chntext(self): class NSWNormalizer (line 603) | class NSWNormalizer: method __init__ (line 604) | def __init__(self, raw_text): method _particular (line 608) | def _particular(self): method normalize (line 619) | def normalize(self, remove_punc=True): function nsw_test_case (line 712) | def nsw_test_case(raw_text): function nsw_test (line 718) | def nsw_test(): FILE: NeuralSeq/utils/training_utils.py class RSQRTSchedule (line 4) | class RSQRTSchedule(object): method __init__ (line 5) | def __init__(self, optimizer): method step (line 16) | def step(self, num_updates): method get_lr (line 26) | def get_lr(self): FILE: NeuralSeq/utils/tts_utils.py function make_positions (line 6) | def make_positions(tensor, padding_idx): function softmax (line 21) | def softmax(x, dim): function sequence_mask (line 25) | def sequence_mask(lengths, maxlen, dtype=torch.bool): function _get_full_incremental_state_key (line 36) | def _get_full_incremental_state_key(module_instance, key): function get_incremental_state (line 48) | def get_incremental_state(module, incremental_state, key): function set_incremental_state (line 56) | def set_incremental_state(module, incremental_state, key, value): function fill_with_neg_inf (line 63) | def fill_with_neg_inf(t): function fill_with_neg_inf2 (line 68) | def fill_with_neg_inf2(t): function get_focus_rate (line 73) | def get_focus_rate(attn, src_padding_mask=None, tgt_padding_mask=None): function get_phone_coverage_rate (line 88) | def get_phone_coverage_rate(attn, src_padding_mask=None, src_seg_mask=No... function get_diagonal_focus_rate (line 108) | def get_diagonal_focus_rate(attn, attn_ks, target_len, src_padding_mask=... function select_attn (line 146) | def select_attn(attn_logits, type='best'): function make_pad_mask (line 164) | def make_pad_mask(lengths, xs=None, length_dim=-1): function make_non_pad_mask (line 270) | def make_non_pad_mask(lengths, xs=None, length_dim=-1): function get_mask_from_lengths (line 350) | def get_mask_from_lengths(lengths): function group_hidden_by_segs (line 357) | def group_hidden_by_segs(h, seg_ids, max_len): function mel2token_to_dur (line 373) | def mel2token_to_dur(mel2token, T_txt=None, max_dur=None): function expand_word2ph (line 394) | def expand_word2ph(word_encoding, ph2word): FILE: NeuralSeq/vocoders/base_vocoder.py function register_vocoder (line 5) | def register_vocoder(cls): function get_vocoder_cls (line 11) | def get_vocoder_cls(hparams): class BaseVocoder (line 22) | class BaseVocoder: method spec2wav (line 23) | def spec2wav(self, mel): method wav2spec (line 33) | def wav2spec(wav_fn): FILE: NeuralSeq/vocoders/hifigan.py function load_model (line 17) | def load_model(config_path, checkpoint_path): class HifiGAN (line 40) | class HifiGAN(PWG): method __init__ (line 41) | def __init__(self): method spec2wav (line 55) | def spec2wav(self, mel, **kwargs): FILE: NeuralSeq/vocoders/pwg.py function load_pwg_model (line 16) | def load_pwg_model(config_path, checkpoint_path, stats_path): class PWG (line 54) | class PWG(BaseVocoder): method __init__ (line 55) | def __init__(self): method spec2wav (line 82) | def spec2wav(self, mel, **kwargs): method wav2spec (line 106) | def wav2spec(wav_fn, return_linear=False): method wav2mfcc (line 125) | def wav2mfcc(wav_fn): FILE: NeuralSeq/vocoders/vocoder_utils.py function denoise (line 7) | def denoise(wav, v=0.1): FILE: audio-chatgpt.py function cut_dialogue_history (line 77) | def cut_dialogue_history(history_memory, keep_last_n_words = 500): function merge_audio (line 92) | def merge_audio(audio_path_1, audio_path_2): class T2I (line 104) | class T2I: method __init__ (line 105) | def __init__(self, device): method inference (line 117) | def inference(self, text): class ImageCaptioning (line 126) | class ImageCaptioning: method __init__ (line 127) | def __init__(self, device): method inference (line 134) | def inference(self, image_path): class T2A (line 140) | class T2A: method __init__ (line 141) | def __init__(self, device): method _initialize_model (line 147) | def _initialize_model(self, config, ckpt, device): method txt2audio (line 158) | def txt2audio(self, text, seed = 55, scale = 1.5, ddim_steps = 100, n_... method select_best_audio (line 185) | def select_best_audio(self, prompt, wav_list): method inference (line 201) | def inference(self, text, seed = 55, scale = 1.5, ddim_steps = 100, n_... class I2A (line 214) | class I2A: method __init__ (line 215) | def __init__(self, device): method _initialize_model (line 221) | def _initialize_model(self, config, ckpt, device): method img2audio (line 232) | def img2audio(self, image, seed = 55, scale = 3, ddim_steps = 100, W =... method inference (line 262) | def inference(self, image, seed = 55, scale = 3, ddim_steps = 100, W =... class TTS (line 275) | class TTS: method __init__ (line 276) | def __init__(self, device=None): method set_model_hparams (line 286) | def set_model_hparams(self): method inference (line 290) | def inference(self, text): class T2S (line 298) | class T2S: method __init__ (line 299) | def __init__(self, device= None): method set_model_hparams (line 315) | def set_model_hparams(self): method inference (line 319) | def inference(self, inputs): class t2s_VISinger (line 341) | class t2s_VISinger: method __init__ (line 342) | def __init__(self, device=None): method inference (line 368) | def inference(self, inputs): class TTS_OOD (line 383) | class TTS_OOD: method __init__ (line 384) | def __init__(self, device): method set_model_hparams (line 395) | def set_model_hparams(self): method inference (line 405) | def inference(self, inputs): class Inpaint (line 418) | class Inpaint: method __init__ (line 419) | def __init__(self, device): method _initialize_model_inpaint (line 426) | def _initialize_model_inpaint(self, config, ckpt): method make_batch_sd (line 436) | def make_batch_sd(self, mel, mask, num_samples=1): method gen_mel (line 452) | def gen_mel(self, input_audio_path): method gen_mel_audio (line 472) | def gen_mel_audio(self, input_audio): method show_mel_fn (line 492) | def show_mel_fn(self, input_audio_path): method inpaint (line 500) | def inpaint(self, batch, seed, ddim_steps, num_samples=1, W=512, H=512): method inference (line 529) | def inference(self, input_audio, mel_and_mask, seed = 55, ddim_steps =... class ASR (line 560) | class ASR: method __init__ (line 561) | def __init__(self, device): method inference (line 566) | def inference(self, audio_path): method translate_english (line 574) | def translate_english(self, audio_path): class A2T (line 578) | class A2T: method __init__ (line 579) | def __init__(self, device): method inference (line 584) | def inference(self, audio_path): class GeneFace (line 589) | class GeneFace: method __init__ (line 590) | def __init__(self, device=None): method inference (line 599) | def inference(self, audio_path): class SoundDetection (line 612) | class SoundDetection: method __init__ (line 613) | def __init__(self, device): method inference (line 634) | def inference(self, audio_path): class SoundExtraction (line 675) | class SoundExtraction: method __init__ (line 676) | def __init__(self, device): method inference (line 689) | def inference(self, inputs): class Binaural (line 713) | class Binaural: method __init__ (line 714) | def __init__(self, device): method inference (line 729) | def inference(self, audio_path): class TargetSoundDetection (line 775) | class TargetSoundDetection: method __init__ (line 776) | def __init__(self, device): method extract_feature (line 807) | def extract_feature(self, fname): method build_clip (line 818) | def build_clip(self, text): method cal_similarity (line 823) | def cal_similarity(self, target, retrievals): method inference (line 833) | def inference(self, text, audio_path): class Speech_Enh_SS_SC (line 957) | class Speech_Enh_SS_SC: method __init__ (line 963) | def __init__(self, device="cuda", model_name="espnet/Wangyou_Zhang_chi... method _initialize_model (line 969) | def _initialize_model(self): method inference (line 989) | def inference(self, speech_path, ref_channel=0): class Speech_SS (line 1009) | class Speech_SS: method __init__ (line 1010) | def __init__(self, device="cuda", model_name="lichenda/wsj0_2mix_skim_... method _initialize_model (line 1016) | def _initialize_model(self): method inference (line 1036) | def inference(self, speech_path): class ConversationBot (line 1051) | class ConversationBot: method __init__ (line 1052) | def __init__(self): method init_tools (line 1075) | def init_tools(self, interaction_type): method run_text (line 1197) | def run_text(self, text, state): method run_image_or_audio (line 1250) | def run_image_or_audio(self, file, state, txt): method speech (line 1294) | def speech(self, speech_input, state): method inpainting (line 1351) | def inpainting(self, state, audio_filename, image_filename): method clear_audio (line 1364) | def clear_audio(self): method clear_input_audio (line 1366) | def clear_input_audio(self): method clear_image (line 1368) | def clear_image(self): method clear_video (line 1370) | def clear_video(self): method clear_button (line 1372) | def clear_button(self): FILE: audio_detection/audio_infer/pytorch/evaluate.py class Evaluator (line 6) | class Evaluator(object): method __init__ (line 7) | def __init__(self, model): method evaluate (line 15) | def evaluate(self, data_loader): FILE: audio_detection/audio_infer/pytorch/finetune_template.py class Transfer_Cnn14 (line 25) | class Transfer_Cnn14(nn.Module): method __init__ (line 26) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method init_weights (line 46) | def init_weights(self): method load_from_pretrain (line 49) | def load_from_pretrain(self, pretrained_checkpoint_path): method forward (line 53) | def forward(self, input, mixup_lambda=None): function train (line 65) | def train(args): FILE: audio_detection/audio_infer/pytorch/inference.py function audio_tagging (line 15) | def audio_tagging(args): function sound_event_detection (line 80) | def sound_event_detection(args): FILE: audio_detection/audio_infer/pytorch/losses.py function clip_bce (line 5) | def clip_bce(output_dict, target_dict): function get_loss_func (line 12) | def get_loss_func(loss_type): FILE: audio_detection/audio_infer/pytorch/main.py function train (line 50) | def train(args): FILE: audio_detection/audio_infer/pytorch/models.py function load_checkpoint (line 43) | def load_checkpoint(model, function init_layer (line 97) | def init_layer(layer): function init_bn (line 106) | def init_bn(bn): class TimeShift (line 114) | class TimeShift(nn.Module): method __init__ (line 115) | def __init__(self, mean, std): method forward (line 120) | def forward(self, x): class LinearSoftPool (line 126) | class LinearSoftPool(nn.Module): method __init__ (line 133) | def __init__(self, pooldim=1): method forward (line 137) | def forward(self, logits, time_decision): class PVT (line 141) | class PVT(nn.Module): method __init__ (line 142) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method init_weights (line 195) | def init_weights(self): method forward (line 199) | def forward(self, input, mixup_lambda=None): class PVT2 (line 239) | class PVT2(nn.Module): method __init__ (line 240) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method init_weights (line 292) | def init_weights(self): method forward (line 296) | def forward(self, input, mixup_lambda=None): class PVT_2layer (line 333) | class PVT_2layer(nn.Module): method __init__ (line 334) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method init_weights (line 387) | def init_weights(self): method forward (line 391) | def forward(self, input, mixup_lambda=None): class PVT_lr (line 431) | class PVT_lr(nn.Module): method __init__ (line 432) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method init_weights (line 484) | def init_weights(self): method forward (line 488) | def forward(self, input, mixup_lambda=None): class PVT_nopretrain (line 525) | class PVT_nopretrain(nn.Module): method __init__ (line 526) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method init_weights (line 578) | def init_weights(self): method forward (line 582) | def forward(self, input, mixup_lambda=None): class Mlp (line 619) | class Mlp(nn.Module): method __init__ (line 620) | def __init__(self, in_features, hidden_features=None, out_features=Non... method _init_weights (line 634) | def _init_weights(self, m): method forward (line 649) | def forward(self, x, H, W): class Attention (line 661) | class Attention(nn.Module): method __init__ (line 662) | def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, at... method _init_weights (line 690) | def _init_weights(self, m): method forward (line 705) | def forward(self, x, H, W): class Pooling (line 736) | class Pooling(nn.Module): method __init__ (line 741) | def __init__(self, pool_size=3): method forward (line 746) | def forward(self, x): class Block (line 749) | class Block(nn.Module): method __init__ (line 751) | def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_sc... method _init_weights (line 768) | def _init_weights(self, m): method forward (line 783) | def forward(self, x, H, W): class OverlapPatchEmbed (line 789) | class OverlapPatchEmbed(nn.Module): method __init__ (line 793) | def __init__(self, tdim, fdim, patch_size=7, stride=4, in_chans=3, emb... method _init_weights (line 808) | def _init_weights(self, m): method forward (line 823) | def forward(self, x): class PyramidVisionTransformerV2 (line 832) | class PyramidVisionTransformerV2(nn.Module): method __init__ (line 833) | def __init__(self, tdim=1001, fdim=64, patch_size=16, stride=4, in_cha... method _init_weights (line 871) | def _init_weights(self, m): method init_weights (line 886) | def init_weights(self, pretrained=None): method freeze_patch_emb (line 891) | def freeze_patch_emb(self): method no_weight_decay (line 895) | def no_weight_decay(self): method get_classifier (line 898) | def get_classifier(self): method reset_classifier (line 901) | def reset_classifier(self, num_classes, global_pool=''): method forward_features (line 905) | def forward_features(self, x): method forward (line 923) | def forward(self, x): class DWConv (line 929) | class DWConv(nn.Module): method __init__ (line 930) | def __init__(self, dim=768): method forward (line 934) | def forward(self, x, H, W): function _conv_filter (line 943) | def _conv_filter(state_dict, patch_size=16): FILE: audio_detection/audio_infer/pytorch/pytorch_utils.py function move_data_to_device (line 7) | def move_data_to_device(x, device): function do_mixup (line 18) | def do_mixup(x, mixup_lambda): function append_to_dict (line 34) | def append_to_dict(dict, key, value): function forward (line 41) | def forward(model, generator, return_input=False, function interpolate (line 103) | def interpolate(x, ratio): function pad_framewise_output (line 120) | def pad_framewise_output(framewise_output, frames_num): function count_parameters (line 140) | def count_parameters(model): function count_flops (line 144) | def count_flops(model, audio_length): FILE: audio_detection/audio_infer/utils/crash.py class ExceptionHook (line 3) | class ExceptionHook: method __call__ (line 5) | def __call__(self, *args, **kwargs): FILE: audio_detection/audio_infer/utils/create_black_list.py function dcase2017task4 (line 8) | def dcase2017task4(args): FILE: audio_detection/audio_infer/utils/create_indexes.py function create_indexes (line 16) | def create_indexes(args): function combine_full_indexes (line 40) | def combine_full_indexes(args): FILE: audio_detection/audio_infer/utils/data_generator.py function read_black_list (line 10) | def read_black_list(black_list_csv): class AudioSetDataset (line 21) | class AudioSetDataset(object): method __init__ (line 22) | def __init__(self, sample_rate=32000): method __getitem__ (line 28) | def __getitem__(self, meta): method resample (line 55) | def resample(self, waveform): class Base (line 74) | class Base(object): method __init__ (line 75) | def __init__(self, indexes_hdf5_path, batch_size, black_list_csv, rand... class TrainSampler (line 109) | class TrainSampler(Base): method __init__ (line 110) | def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None, method __iter__ (line 130) | def __iter__(self): method state_dict (line 163) | def state_dict(self): method load_state_dict (line 169) | def load_state_dict(self, state): class BalancedTrainSampler (line 174) | class BalancedTrainSampler(Base): method __init__ (line 175) | def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None, method expand_queue (line 208) | def expand_queue(self, queue): method __iter__ (line 214) | def __iter__(self): method state_dict (line 252) | def state_dict(self): method load_state_dict (line 259) | def load_state_dict(self, state): class AlternateTrainSampler (line 265) | class AlternateTrainSampler(Base): method __init__ (line 266) | def __init__(self, indexes_hdf5_path, batch_size, black_list_csv=None, method __iter__ (line 286) | def __iter__(self): method state_dict (line 348) | def state_dict(self): method load_state_dict (line 354) | def load_state_dict(self, state): class EvaluateSampler (line 359) | class EvaluateSampler(object): method __init__ (line 360) | def __init__(self, indexes_hdf5_path, batch_size): method __iter__ (line 377) | def __iter__(self): function collate_fn (line 406) | def collate_fn(list_data_dict): FILE: audio_detection/audio_infer/utils/dataset.py function split_unbalanced_csv_to_partial_csvs (line 17) | def split_unbalanced_csv_to_partial_csvs(args): function download_wavs (line 51) | def download_wavs(args): function pack_waveforms_to_hdf5 (line 125) | def pack_waveforms_to_hdf5(args): FILE: audio_detection/audio_infer/utils/plot_for_paper.py function load_statistics (line 16) | def load_statistics(statistics_path): function crop_label (line 27) | def crop_label(label): function add_comma (line 42) | def add_comma(integer): function plot_classwise_iteration_map (line 52) | def plot_classwise_iteration_map(args): function plot_six_figures (line 92) | def plot_six_figures(args): function plot_complexity_map (line 298) | def plot_complexity_map(args): function plot_long_fig (line 345) | def plot_long_fig(args): function prepare_plot_long_4_rows (line 436) | def prepare_plot_long_4_rows(sorted_lbs): function _scatter_4_rows (line 523) | def _scatter_4_rows(x, ax, ax2, ax3, ax4, s, c, marker='.', alpha=1.): function _plot_4_rows (line 531) | def _plot_4_rows(x, ax, ax2, ax3, ax4, c, linewidth=1.0, alpha=1.0, labe... FILE: audio_detection/audio_infer/utils/plot_statistics.py function _load_metrics0 (line 17) | def _load_metrics0(filename, sample_rate, window_size, hop_size, mel_bin... function _load_metrics0_classwise (line 40) | def _load_metrics0_classwise(filename, sample_rate, window_size, hop_siz... function _load_metrics0_classwise2 (line 56) | def _load_metrics0_classwise2(filename, sample_rate, window_size, hop_si... function _load_metrics_classwise (line 76) | def _load_metrics_classwise(filename, sample_rate, window_size, hop_size... function plot (line 96) | def plot(args): function plot_for_paper (line 705) | def plot_for_paper(args): function plot_for_paper2 (line 965) | def plot_for_paper2(args): function table_values (line 1260) | def table_values(args): function crop_label (line 1410) | def crop_label(label): function add_comma (line 1424) | def add_comma(integer): function plot_class_iteration (line 1432) | def plot_class_iteration(args): function _load_old_metrics (line 1490) | def _load_old_metrics(workspace, filename, iteration, data_type): function _sort (line 1510) | def _sort(ys): function load_data (line 1517) | def load_data(hdf5_path): function get_avg_stats (line 1524) | def get_avg_stats(workspace, bgn_iter, fin_iter, interval_iter, filename... function _samples_num_per_class (line 1576) | def _samples_num_per_class(): function get_label_quality (line 1593) | def get_label_quality(): function summary_stats (line 1614) | def summary_stats(args): function prepare_plot_long_4_rows (line 1679) | def prepare_plot_long_4_rows(sorted_lbs): function _scatter_4_rows (line 1767) | def _scatter_4_rows(x, ax, ax2, ax3, ax4, s, c, marker='.', alpha=1.): function _plot_4_rows (line 1774) | def _plot_4_rows(x, ax, ax2, ax3, ax4, c, linewidth=1.0, alpha=1.0, labe... function plot_long_fig (line 1782) | def plot_long_fig(args): function plot_flops (line 1852) | def plot_flops(args): function spearman (line 1900) | def spearman(args): function print_results (line 1924) | def print_results(args): FILE: audio_detection/audio_infer/utils/utilities.py function create_folder (line 13) | def create_folder(fd): function get_filename (line 18) | def get_filename(path): function get_sub_filepaths (line 25) | def get_sub_filepaths(folder): function create_logging (line 34) | def create_logging(log_dir, filemode): function read_metadata (line 59) | def read_metadata(csv_path, classes_num, id_to_ix): function float32_to_int16 (line 95) | def float32_to_int16(x): function int16_to_float32 (line 100) | def int16_to_float32(x): function pad_or_truncate (line 104) | def pad_or_truncate(x, audio_length): function d_prime (line 112) | def d_prime(auc): class Mixup (line 117) | class Mixup(object): method __init__ (line 118) | def __init__(self, mixup_alpha, random_seed=1234): method get_lambda (line 124) | def get_lambda(self, batch_size): class StatisticsContainer (line 140) | class StatisticsContainer(object): method __init__ (line 141) | def __init__(self, statistics_path): method append (line 152) | def append(self, iteration, statistics, data_type): method dump (line 156) | def dump(self): method load_state_dict (line 162) | def load_state_dict(self, resume_iteration): FILE: audio_detection/target_sound_detection/src/models.py function load_checkpoint (line 61) | def load_checkpoint(model, function init_weights (line 124) | def init_weights(m): function init_layer (line 137) | def init_layer(layer): function init_bn (line 145) | def init_bn(bn): class MaxPool (line 150) | class MaxPool(nn.Module): method __init__ (line 151) | def __init__(self, pooldim=1): method forward (line 155) | def forward(self, logits, decision): class LinearSoftPool (line 159) | class LinearSoftPool(nn.Module): method __init__ (line 167) | def __init__(self, pooldim=1): method forward (line 171) | def forward(self, logits, time_decision): class ConvBlock (line 175) | class ConvBlock(nn.Module): method __init__ (line 176) | def __init__(self, in_channels, out_channels): method init_weight (line 195) | def init_weight(self): method forward (line 202) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class ConvBlock_GLU (line 220) | class ConvBlock_GLU(nn.Module): method __init__ (line 221) | def __init__(self, in_channels, out_channels,kernel_size=(3,3)): method init_weight (line 231) | def init_weight(self): method forward (line 235) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class Mul_scale_GLU (line 258) | class Mul_scale_GLU(nn.Module): method __init__ (line 259) | def __init__(self): method forward (line 273) | def forward(self, input, fi=None): class Cnn14 (line 304) | class Cnn14(nn.Module): method __init__ (line 305) | def __init__(self, sample_rate=32000, window_size=1024, hop_size=320, ... method init_weight (line 345) | def init_weight(self): method forward (line 349) | def forward(self, input_, mixup_lambda=None): class Cnn10_fi (line 379) | class Cnn10_fi(nn.Module): method __init__ (line 380) | def __init__(self): method forward (line 392) | def forward(self, input, fi=None): class Cnn10_mul_scale (line 422) | class Cnn10_mul_scale(nn.Module): method __init__ (line 423) | def __init__(self,scale=8): method forward (line 433) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class Cnn10 (line 482) | class Cnn10(nn.Module): method __init__ (line 483) | def __init__(self,scale=8): method forward (line 490) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class MeanPool (line 523) | class MeanPool(nn.Module): method __init__ (line 524) | def __init__(self, pooldim=1): method forward (line 528) | def forward(self, logits, decision): class ResPool (line 531) | class ResPool(nn.Module): method __init__ (line 532) | def __init__(self, pooldim=1): class AutoExpPool (line 537) | class AutoExpPool(nn.Module): method __init__ (line 538) | def __init__(self, outputdim=10, pooldim=1): method forward (line 544) | def forward(self, logits, decision): class SoftPool (line 550) | class SoftPool(nn.Module): method __init__ (line 551) | def __init__(self, T=1, pooldim=1): method forward (line 556) | def forward(self, logits, decision): class AutoPool (line 561) | class AutoPool(nn.Module): method __init__ (line 563) | def __init__(self, outputdim=10, pooldim=1): method forward (line 569) | def forward(self, logits, decision): class ExtAttentionPool (line 575) | class ExtAttentionPool(nn.Module): method __init__ (line 576) | def __init__(self, inputdim, outputdim=10, pooldim=1, **kwargs): method forward (line 586) | def forward(self, logits, decision): class AttentionPool (line 594) | class AttentionPool(nn.Module): method __init__ (line 596) | def __init__(self, inputdim, outputdim=10, pooldim=1, **kwargs): method forward (line 605) | def forward(self, logits, decision): class Block2D (line 614) | class Block2D(nn.Module): method __init__ (line 615) | def __init__(self, cin, cout, kernel_size=3, padding=1): method forward (line 626) | def forward(self, x): class AudioCNN (line 629) | class AudioCNN(nn.Module): method __init__ (line 630) | def __init__(self, classes_num): method init_weights (line 640) | def init_weights(self): method forward (line 643) | def forward(self, input): method extract (line 660) | def extract(self,input): function parse_poolingfunction (line 673) | def parse_poolingfunction(poolingfunction_name='mean', **kwargs): class conv1d (line 698) | class conv1d(nn.Module): method __init__ (line 699) | def __init__(self, nin, nout, kernel_size=3, stride=1, padding='VALID'... method init_layer (line 711) | def init_layer(self, layer, nonlinearity='relu'): method forward (line 716) | def forward(self, x): class Atten_1 (line 720) | class Atten_1(nn.Module): method __init__ (line 721) | def __init__(self, input_dim, context=2, dropout_rate=0.2): method init_layer (line 731) | def init_layer(self, layer, nonlinearity='leaky_relu'): method forward (line 738) | def forward(self, input_x): class Fusion (line 770) | class Fusion(nn.Module): method __init__ (line 771) | def __init__(self, inputdim, inputdim2, n_fac): method forward (line 777) | def forward(self,embedding,mix_embed): class CDur_fusion (line 790) | class CDur_fusion(nn.Module): method __init__ (line 791) | def __init__(self, inputdim, outputdim, **kwargs): method forward (line 815) | def forward(self, x, embedding): # class CDur (line 836) | class CDur(nn.Module): method __init__ (line 837) | def __init__(self, inputdim, outputdim,time_resolution, **kwargs): method forward (line 860) | def forward(self, x, embedding,one_hot=None): # class CDur_big (line 880) | class CDur_big(nn.Module): method __init__ (line 881) | def __init__(self, inputdim, outputdim, **kwargs): method forward (line 906) | def forward(self, x, embedding): # class CDur_GLU (line 926) | class CDur_GLU(nn.Module): method __init__ (line 927) | def __init__(self, inputdim, outputdim, **kwargs): method forward (line 940) | def forward(self, x, embedding,one_hot=None): # class CDur_CNN14 (line 964) | class CDur_CNN14(nn.Module): method __init__ (line 965) | def __init__(self, inputdim, outputdim,time_resolution,**kwargs): method forward (line 987) | def forward(self, x, embedding,one_hot=None): class CDur_CNN_mul_scale (line 1010) | class CDur_CNN_mul_scale(nn.Module): method __init__ (line 1011) | def __init__(self, inputdim, outputdim,time_resolution,**kwargs): method forward (line 1033) | def forward(self, x, embedding,one_hot=None): class CDur_CNN_mul_scale_fusion (line 1058) | class CDur_CNN_mul_scale_fusion(nn.Module): method __init__ (line 1059) | def __init__(self, inputdim, outputdim, time_resolution,**kwargs): method forward (line 1082) | def forward(self, x, embedding,one_hot=None): class RaDur_fusion (line 1109) | class RaDur_fusion(nn.Module): method __init__ (line 1110) | def __init__(self, model_config, inputdim, outputdim, time_resolution,... method get_w (line 1132) | def get_w(self,q,k): method get_w_ee (line 1141) | def get_w_ee(self,q,k): method attention_pooling (line 1150) | def attention_pooling(self, embeddings, mean_embedding): method select_topk_embeddings (line 1159) | def select_topk_embeddings(self, scores, embeddings, k): method sum_with_attention (line 1170) | def sum_with_attention(self, embedding, top_k, selected_embeddings): method orcal_EE (line 1188) | def orcal_EE(self, x, embedding, label): method forward (line 1250) | def forward(self, x, ref, label=None): FILE: audio_detection/target_sound_detection/src/utils.py function parse_config_or_kwargs (line 23) | def parse_config_or_kwargs(config_file, **kwargs): function find_contiguous_regions (line 34) | def find_contiguous_regions(activity_array): # in this part, if you cann... function split_train_cv (line 55) | def split_train_cv( function pprint_dict (line 95) | def pprint_dict(in_dict, outputfun=sys.stdout.write, formatter='yaml'): ... function getfile_outlogger (line 108) | def getfile_outlogger(outputfile): function train_labelencoder (line 116) | def train_labelencoder(labels: pd.Series, sparse=True): function encode_labels (line 139) | def encode_labels(labels: pd.Series, encoder=None, sparse=True): function decode_with_timestamps (line 169) | def decode_with_timestamps(events,labels: np.array): function median_filter (line 189) | def median_filter(x, window_size, threshold=0.5): function _decode_with_timestamps (line 210) | def _decode_with_timestamps(events,labels): function inverse_transform_labels (line 222) | def inverse_transform_labels(encoder, pred): function binarize (line 229) | def binarize(pred, threshold=0.5): function double_threshold (line 238) | def double_threshold(x, high_thres, low_thres, n_connect=1): function _double_threshold (line 263) | def _double_threshold(x, high_thres, low_thres, n_connect=1, return_arr=... function connect_clusters (line 293) | def connect_clusters(x, n=1): function connect_clusters_ (line 300) | def connect_clusters_(x, n=1): function connect_ (line 316) | def connect_(pairs, n=1): function predictions_to_time (line 338) | def predictions_to_time(df, ratio): function upgrade_resolution (line 343) | def upgrade_resolution(arr, scale): FILE: audio_to_text/captioning/models/base_model.py class CaptionModel (line 11) | class CaptionModel(nn.Module): method __init__ (line 21) | def __init__(self, encoder: nn.Module, decoder: nn.Module, **kwargs): method check_decoder_compatibility (line 34) | def check_decoder_compatibility(self): method set_index (line 41) | def set_index(cls, start_idx, end_idx): method forward (line 45) | def forward(self, input_dict: Dict): method prepare_output (line 108) | def prepare_output(self, input_dict): method train_forward (line 127) | def train_forward(self, input_dict): method seq_forward (line 135) | def seq_forward(self, input_dict): method train_process (line 138) | def train_process(self, output, input_dict): method inference_forward (line 141) | def inference_forward(self, input_dict): method stepwise_forward (line 148) | def stepwise_forward(self, input_dict): method decode_step (line 168) | def decode_step(self, input_dict, output): method prepare_decoder_input (line 194) | def prepare_decoder_input(self, input_dict, output): method stepwise_process_step (line 198) | def stepwise_process_step(self, output, output_t): method stepwise_process (line 206) | def stepwise_process(self, output): method sample_next_word (line 210) | def sample_next_word(self, logit, method, temp): method beam_search (line 250) | def beam_search(self, input_dict): method prepare_beamsearch_output (line 320) | def prepare_beamsearch_output(self, input_dict): method beamsearch_step (line 332) | def beamsearch_step(self, input_dict, output_i): method prepare_beamsearch_decoder_input (line 338) | def prepare_beamsearch_decoder_input(self, input_dict, output_i): method beamsearch_process_step (line 341) | def beamsearch_process_step(self, output_i, output_t): method beamsearch_process (line 344) | def beamsearch_process(self, output, output_i, input_dict): method diverse_beam_search (line 356) | def diverse_beam_search(self, input_dict): method prepare_dbs_decoder_input (line 466) | def prepare_dbs_decoder_input(self, input_dict, output_i): method dbs_process_step (line 469) | def dbs_process_step(self, output_i, output_t): class CaptionSequenceModel (line 473) | class CaptionSequenceModel(nn.Module): method __init__ (line 475) | def __init__(self, model, seq_output_size): method forward (line 483) | def forward(self, input_dict): FILE: audio_to_text/captioning/models/decoder.py class BaseDecoder (line 13) | class BaseDecoder(nn.Module): method __init__ (line 20) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, method forward (line 30) | def forward(self, x): method load_word_embedding (line 33) | def load_word_embedding(self, weight, freeze=True): class RnnDecoder (line 46) | class RnnDecoder(BaseDecoder): method __init__ (line 48) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, attn_emb_dim, method forward (line 59) | def forward(self, x): method init_hidden (line 62) | def init_hidden(self, bs, device): class RnnFcDecoder (line 73) | class RnnFcDecoder(RnnDecoder): method __init__ (line 75) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, attn_emb_dim, drop... method forward (line 86) | def forward(self, input_dict): class Seq2SeqAttention (line 110) | class Seq2SeqAttention(nn.Module): method __init__ (line 112) | def __init__(self, hs_enc, hs_dec, attn_size): method forward (line 124) | def forward(self, h_dec, h_enc, src_lens): class AttentionProj (line 151) | class AttentionProj(nn.Module): method __init__ (line 153) | def __init__(self, hs_enc, hs_dec, embed_dim, attn_size): method init (line 160) | def init(self, m): method forward (line 166) | def forward(self, h_dec, h_enc, src_lens): class BahAttnDecoder (line 195) | class BahAttnDecoder(RnnDecoder): method __init__ (line 197) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, attn_emb_dim, method forward (line 219) | def forward(self, input_dict): class BahAttnDecoder2 (line 254) | class BahAttnDecoder2(RnnDecoder): method __init__ (line 256) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, attn_emb_dim, method forward (line 278) | def forward(self, input_dict): class ConditionalBahAttnDecoder (line 312) | class ConditionalBahAttnDecoder(RnnDecoder): method __init__ (line 314) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, attn_emb_dim, method forward (line 336) | def forward(self, input_dict): class StructBahAttnDecoder (line 375) | class StructBahAttnDecoder(RnnDecoder): method __init__ (line 377) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, struct_vocab_size, method forward (line 399) | def forward(self, input_dict): class StyleBahAttnDecoder (line 436) | class StyleBahAttnDecoder(RnnDecoder): method __init__ (line 438) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, attn_emb_dim, method forward (line 459) | def forward(self, input_dict): class BahAttnDecoder3 (line 494) | class BahAttnDecoder3(RnnDecoder): method __init__ (line 496) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, attn_emb_dim, method forward (line 517) | def forward(self, input_dict): class SpecificityBahAttnDecoder (line 555) | class SpecificityBahAttnDecoder(RnnDecoder): method __init__ (line 557) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, attn_emb_dim, method forward (line 578) | def forward(self, input_dict): class TransformerDecoder (line 614) | class TransformerDecoder(BaseDecoder): method __init__ (line 616) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, attn_emb_dim, drop... method init_params (line 640) | def init_params(self): method generate_square_subsequent_mask (line 645) | def generate_square_subsequent_mask(self, max_length): method forward (line 650) | def forward(self, input_dict): class EventTransformerDecoder (line 678) | class EventTransformerDecoder(TransformerDecoder): method forward (line 680) | def forward(self, input_dict): class KeywordProbTransformerDecoder (line 709) | class KeywordProbTransformerDecoder(TransformerDecoder): method __init__ (line 711) | def __init__(self, emb_dim, vocab_size, fc_emb_dim, attn_emb_dim, method forward (line 718) | def forward(self, input_dict): FILE: audio_to_text/captioning/models/encoder.py function init_layer (line 16) | def init_layer(layer): function init_bn (line 25) | def init_bn(bn): class BaseEncoder (line 31) | class BaseEncoder(nn.Module): method __init__ (line 39) | def __init__(self, spec_dim, fc_feat_dim, attn_feat_dim): method forward (line 46) | def forward(self, x): class Block2D (line 58) | class Block2D(nn.Module): method __init__ (line 60) | def __init__(self, cin, cout, kernel_size=3, padding=1): method forward (line 71) | def forward(self, x): class LinearSoftPool (line 75) | class LinearSoftPool(nn.Module): method __init__ (line 82) | def __init__(self, pooldim=1): method forward (line 86) | def forward(self, logits, time_decision): class MeanPool (line 91) | class MeanPool(nn.Module): method __init__ (line 93) | def __init__(self, pooldim=1): method forward (line 97) | def forward(self, logits, decision): class AttentionPool (line 101) | class AttentionPool(nn.Module): method __init__ (line 103) | def __init__(self, inputdim, outputdim=10, pooldim=1, **kwargs): method forward (line 112) | def forward(self, logits, decision): class MMPool (line 122) | class MMPool(nn.Module): method __init__ (line 124) | def __init__(self, dims): method forward (line 129) | def forward(self, x): function parse_poolingfunction (line 133) | def parse_poolingfunction(poolingfunction_name='mean', **kwargs): function embedding_pooling (line 150) | def embedding_pooling(x, lens, pooling="mean"): class Cdur5Encoder (line 168) | class Cdur5Encoder(BaseEncoder): method __init__ (line 170) | def __init__(self, spec_dim, fc_feat_dim, attn_feat_dim, pooling="mean"): method forward (line 195) | def forward(self, input_dict): function conv_conv_block (line 223) | def conv_conv_block(in_channel, out_channel): class Cdur8Encoder (line 242) | class Cdur8Encoder(BaseEncoder): method __init__ (line 244) | def __init__(self, spec_dim, fc_feat_dim, attn_feat_dim, pooling="mean"): method forward (line 267) | def forward(self, input_dict): class Cnn10Encoder (line 290) | class Cnn10Encoder(BaseEncoder): method __init__ (line 292) | def __init__(self, spec_dim, fc_feat_dim, attn_feat_dim): method forward (line 313) | def forward(self, input_dict): class ConvBlock (line 336) | class ConvBlock(nn.Module): method __init__ (line 337) | def __init__(self, in_channels, out_channels): method init_weight (line 356) | def init_weight(self): method forward (line 363) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class Cnn14Encoder (line 382) | class Cnn14Encoder(nn.Module): method __init__ (line 383) | def __init__(self, sample_rate=32000): method init_weight (line 422) | def init_weight(self): method load_pretrained (line 426) | def load_pretrained(self, pretrained): method forward (line 464) | def forward(self, input_dict): class RnnEncoder (line 519) | class RnnEncoder(BaseEncoder): method __init__ (line 521) | def __init__(self, spec_dim, fc_feat_dim, attn_feat_dim, method forward (line 543) | def forward(self, input_dict): class Cnn14RnnEncoder (line 561) | class Cnn14RnnEncoder(nn.Module): method __init__ (line 562) | def __init__(self, sample_rate=32000, pretrained=None, method train (line 576) | def train(self, mode): method forward (line 586) | def forward(self, input_dict): class TransformerEncoder (line 595) | class TransformerEncoder(BaseEncoder): method __init__ (line 597) | def __init__(self, spec_dim, fc_feat_dim, attn_feat_dim, d_model, **kw... method init_params (line 619) | def init_params(self): method forward (line 624) | def forward(self, input_dict): class Cnn14TransformerEncoder (line 650) | class Cnn14TransformerEncoder(nn.Module): method __init__ (line 651) | def __init__(self, sample_rate=32000, pretrained=None, method train (line 665) | def train(self, mode): method forward (line 675) | def forward(self, input_dict): FILE: audio_to_text/captioning/models/transformer_model.py class TransformerModel (line 11) | class TransformerModel(CaptionModel): method __init__ (line 13) | def __init__(self, encoder: nn.Module, decoder: nn.Module, **kwargs): method seq_forward (line 20) | def seq_forward(self, input_dict): method prepare_decoder_input (line 34) | def prepare_decoder_input(self, input_dict, output): method prepare_beamsearch_decoder_input (line 59) | def prepare_beamsearch_decoder_input(self, input_dict, output_i): class M2TransformerModel (line 89) | class M2TransformerModel(CaptionModel): method __init__ (line 91) | def __init__(self, encoder: nn.Module, decoder: nn.Module, **kwargs): method check_encoder_compatibility (line 99) | def check_encoder_compatibility(self): method seq_forward (line 104) | def seq_forward(self, input_dict): method prepare_decoder_input (line 115) | def prepare_decoder_input(self, input_dict, output): method prepare_beamsearch_decoder_input (line 138) | def prepare_beamsearch_decoder_input(self, input_dict, output_i): class EventEncoder (line 166) | class EventEncoder(nn.Module): method __init__ (line 170) | def __init__(self, emb_dim, vocab_size=527): method forward (line 175) | def forward(self, word_idxs): class EventCondTransformerModel (line 181) | class EventCondTransformerModel(TransformerModel): method __init__ (line 183) | def __init__(self, encoder: nn.Module, decoder: nn.Module, **kwargs): method prepare_decoder_input (line 207) | def prepare_decoder_input(self, input_dict, output): method prepare_beamsearch_decoder_input (line 212) | def prepare_beamsearch_decoder_input(self, input_dict, output_i): class KeywordCondTransformerModel (line 223) | class KeywordCondTransformerModel(TransformerModel): method __init__ (line 225) | def __init__(self, encoder: nn.Module, decoder: nn.Module, **kwargs): method seq_forward (line 234) | def seq_forward(self, input_dict): method prepare_decoder_input (line 250) | def prepare_decoder_input(self, input_dict, output): method prepare_beamsearch_decoder_input (line 255) | def prepare_beamsearch_decoder_input(self, input_dict, output_i): FILE: audio_to_text/captioning/models/utils.py function sort_pack_padded_sequence (line 10) | def sort_pack_padded_sequence(input, lengths): function pad_unsort_packed_sequence (line 17) | def pad_unsort_packed_sequence(input, inv_ix): function pack_wrapper (line 22) | def pack_wrapper(module, attn_feats, attn_feat_lens): function generate_length_mask (line 29) | def generate_length_mask(lens, max_length=None): function mean_with_lens (line 39) | def mean_with_lens(features, lens): function max_with_lens (line 63) | def max_with_lens(features, lens): function repeat_tensor (line 76) | def repeat_tensor(x, n): function init (line 79) | def init(m, method="kaiming"): class PositionalEncoding (line 113) | class PositionalEncoding(nn.Module): method __init__ (line 115) | def __init__(self, d_model, dropout=0.1, max_len=100): method forward (line 129) | def forward(self, x): FILE: audio_to_text/captioning/utils/bert/create_sent_embedding.py class EmbeddingExtractor (line 8) | class EmbeddingExtractor(object): method extract_sentbert (line 10) | def extract_sentbert(self, caption_file: str, output: str, dev: bool=T... method extract_originbert (line 21) | def extract_originbert(self, caption_file: str, output: str, dev: bool... method extract (line 27) | def extract(self, caption_file: str, model, output, dev: bool): method extract_sbert (line 66) | def extract_sbert(self, FILE: audio_to_text/captioning/utils/bert/create_word_embedding.py function main (line 15) | def main(vocab_file: str, output: str, server_hostname: str): FILE: audio_to_text/captioning/utils/build_vocab.py class Vocabulary (line 10) | class Vocabulary(object): method __init__ (line 12) | def __init__(self): method add_word (line 17) | def add_word(self, word): method __call__ (line 23) | def __call__(self, word): method __getitem__ (line 28) | def __getitem__(self, word_id): method __len__ (line 31) | def __len__(self): function build_vocab (line 35) | def build_vocab(input_json: str, function process (line 134) | def process(input_json: str, FILE: audio_to_text/captioning/utils/build_vocab_ltp.py class Vocabulary (line 9) | class Vocabulary(object): method __init__ (line 11) | def __init__(self): method add_word (line 16) | def add_word(self, word): method __call__ (line 22) | def __call__(self, word): method __len__ (line 27) | def __len__(self): function build_vocab (line 30) | def build_vocab(input_json: str, function process (line 131) | def process(input_json: str, FILE: audio_to_text/captioning/utils/build_vocab_spacy.py class Vocabulary (line 9) | class Vocabulary(object): method __init__ (line 11) | def __init__(self): method add_word (line 16) | def add_word(self, word): method __call__ (line 22) | def __call__(self, word): method __len__ (line 27) | def __len__(self): function build_vocab (line 31) | def build_vocab(input_json: str, function process (line 130) | def process(input_json: str, FILE: audio_to_text/captioning/utils/eval_round_robin.py function evaluate_annotation (line 8) | def evaluate_annotation(key2refs, scorer): function evaluate_prediction (line 30) | def evaluate_prediction(key2pred, key2refs, scorer): class Evaluator (line 52) | class Evaluator(object): method eval_annotation (line 54) | def eval_annotation(self, annotation, output): method eval_prediction (line 108) | def eval_prediction(self, prediction, annotation, output): FILE: audio_to_text/captioning/utils/fasttext/create_word_embedding.py function create_embedding (line 16) | def create_embedding(caption_file: str, FILE: audio_to_text/captioning/utils/lr_scheduler.py class ExponentialDecayScheduler (line 5) | class ExponentialDecayScheduler(torch.optim.lr_scheduler._LRScheduler): method __init__ (line 7) | def __init__(self, optimizer, total_iters, final_lrs, method _get_closed_form_lr (line 22) | def _get_closed_form_lr(self): method get_lr (line 44) | def get_lr(self): class NoamScheduler (line 48) | class NoamScheduler(torch.optim.lr_scheduler._LRScheduler): method __init__ (line 50) | def __init__(self, optimizer, model_size=512, factor=1, warmup_iters=3... method _get_closed_form_lr (line 58) | def _get_closed_form_lr(self): method get_lr (line 68) | def get_lr(self): class CosineWithWarmup (line 72) | class CosineWithWarmup(torch.optim.lr_scheduler._LRScheduler): method __init__ (line 74) | def __init__(self, optimizer, total_iters, warmup_iters, method lr_lambda (line 81) | def lr_lambda(self, iteration): method _get_closed_form_lr (line 89) | def _get_closed_form_lr(self): method get_lr (line 97) | def get_lr(self): FILE: audio_to_text/captioning/utils/model_eval_diff.py function coco_score (line 13) | def coco_score(refs, pred, scorer): function embedding_score (line 40) | def embedding_score(refs, pred, scorer): function main (line 58) | def main(output_file, eval_caption_file, eval_embedding_file, output, zh... FILE: audio_to_text/captioning/utils/remove_optimizer.py function main (line 5) | def main(checkpoint): FILE: audio_to_text/captioning/utils/tokenize_caption.py function tokenize_caption (line 7) | def tokenize_caption(input_json: str, FILE: audio_to_text/captioning/utils/train_util.py function load_dict_from_csv (line 15) | def load_dict_from_csv(csv, cols): function init_logger (line 21) | def init_logger(filename, level="INFO"): function init_obj (line 37) | def init_obj(module, config, **kwargs):# 'captioning.models.encoder' function pprint_dict (line 43) | def pprint_dict(in_dict, outputfun=sys.stdout.write, formatter='yaml'): function merge_a_into_b (line 57) | def merge_a_into_b(a, b): function load_config (line 69) | def load_config(config_file): function parse_config_or_kwargs (line 86) | def parse_config_or_kwargs(config_file, **kwargs): function store_yaml (line 93) | def store_yaml(config, config_file): class MetricImprover (line 98) | class MetricImprover: method __init__ (line 100) | def __init__(self, mode): method compare (line 106) | def compare(self, x, best_x): method __call__ (line 109) | def __call__(self, x): method state_dict (line 115) | def state_dict(self): method load_state_dict (line 118) | def load_state_dict(self, state_dict): function fix_batchnorm (line 122) | def fix_batchnorm(model: torch.nn.Module): function load_pretrained_model (line 130) | def load_pretrained_model(model: torch.nn.Module, class AveragedModel (line 158) | class AveragedModel(torch_average_model): method update_parameters (line 160) | def update_parameters(self, model): FILE: audio_to_text/captioning/utils/word2vec/create_word_embedding.py function create_embedding (line 17) | def create_embedding(vocab_file: str, FILE: audio_to_text/inference_waveform.py function load_model (line 12) | def load_model(config, checkpoint): function decode_caption (line 48) | def decode_caption(word_ids, vocabulary): class AudioCapModel (line 61) | class AudioCapModel(object): method __init__ (line 62) | def __init__(self,weight_dir,device='cuda'): method caption (line 72) | def caption(self,audio_list): method __call__ (line 98) | def __call__(self, audio_list): FILE: mono2binaural/src/models.py class GeometricWarper (line 11) | class GeometricWarper(nn.Module): method __init__ (line 12) | def __init__(self, sampling_rate=48000): method _transmitter_mouth (line 16) | def _transmitter_mouth(self, view): method _3d_displacements (line 31) | def _3d_displacements(self, view): method _warpfield (line 42) | def _warpfield(self, view, seq_length): method forward (line 45) | def forward(self, mono, view): class Warpnet (line 54) | class Warpnet(nn.Module): method __init__ (line 55) | def __init__(self, layers=4, channels=64, view_dim=7): method neural_warpfield (line 63) | def neural_warpfield(self, view, seq_length): method forward (line 72) | def forward(self, mono, view): class BinauralNetwork (line 86) | class BinauralNetwork(Net): method __init__ (line 87) | def __init__(self, method forward (line 98) | def forward(self, mono, view): FILE: mono2binaural/src/utils.py class Net (line 14) | class Net(th.nn.Module): method __init__ (line 16) | def __init__(self, model_name="network", use_cuda=True): method save (line 21) | def save(self, model_dir, suffix=''): method load_from_file (line 39) | def load_from_file(self, model_file): method load (line 54) | def load(self, model_dir, suffix=''): method num_trainable_parameters (line 66) | def num_trainable_parameters(self): FILE: mono2binaural/src/warping.py class TimeWarperFunction (line 14) | class TimeWarperFunction(th.autograd.Function): method forward (line 17) | def forward(ctx, input, warpfield): method backward (line 35) | def backward(ctx, grad_output): class TimeWarper (line 51) | class TimeWarper(nn.Module): method __init__ (line 53) | def __init__(self): method _to_absolute_positions (line 57) | def _to_absolute_positions(self, warpfield, seq_length): method forward (line 63) | def forward(self, input, warpfield): class MonotoneTimeWarper (line 74) | class MonotoneTimeWarper(TimeWarper): method forward (line 76) | def forward(self, input, warpfield): class GeometricTimeWarper (line 91) | class GeometricTimeWarper(TimeWarper): method __init__ (line 93) | def __init__(self, sampling_rate=48000): method displacements2warpfield (line 97) | def displacements2warpfield(self, displacements, seq_length): method forward (line 103) | def forward(self, input, displacements): FILE: sound_extraction/model/LASSNet.py class LASSNet (line 7) | class LASSNet(nn.Module): method __init__ (line 8) | def __init__(self, device='cuda'): method forward (line 13) | def forward(self, x, caption): method get_tokenizer (line 24) | def get_tokenizer(self): FILE: sound_extraction/model/film.py class Film (line 4) | class Film(nn.Module): method __init__ (line 5) | def __init__(self, channels, cond_embedding_dim): method forward (line 14) | def forward(self, data, cond_vec): FILE: sound_extraction/model/modules.py class ConvBlock (line 7) | class ConvBlock(nn.Module): method __init__ (line 8) | def __init__(self, in_channels, out_channels, kernel_size, activation,... method init_weights (line 40) | def init_weights(self): method forward (line 46) | def forward(self, x): class EncoderBlock (line 52) | class EncoderBlock(nn.Module): method __init__ (line 53) | def __init__(self, in_channels, out_channels, kernel_size, downsample,... method forward (line 61) | def forward(self, x): class DecoderBlock (line 67) | class DecoderBlock(nn.Module): method __init__ (line 68) | def __init__(self, in_channels, out_channels, kernel_size, upsample, a... method init_weights (line 90) | def init_weights(self): method prune (line 94) | def prune(self, x): method forward (line 104) | def forward(self, input_tensor, concat_tensor): class EncoderBlockRes1B (line 113) | class EncoderBlockRes1B(nn.Module): method __init__ (line 114) | def __init__(self, in_channels, out_channels, downsample, activation, ... method forward (line 124) | def forward(self, x): class DecoderBlockRes1B (line 132) | class DecoderBlockRes1B(nn.Module): method __init__ (line 133) | def __init__(self, in_channels, out_channels, stride, activation, mome... method init_weights (line 148) | def init_weights(self): method prune (line 151) | def prune(self, x, both=False): method forward (line 158) | def forward(self, input_tensor, concat_tensor,both=False): class EncoderBlockRes2BCond (line 169) | class EncoderBlockRes2BCond(nn.Module): method __init__ (line 170) | def __init__(self, in_channels, out_channels, downsample, activation, ... method forward (line 178) | def forward(self, x, cond_vec): class DecoderBlockRes2BCond (line 184) | class DecoderBlockRes2BCond(nn.Module): method __init__ (line 185) | def __init__(self, in_channels, out_channels, stride, activation, mome... method init_weights (line 198) | def init_weights(self): method prune (line 201) | def prune(self, x, both=False): method forward (line 208) | def forward(self, input_tensor, concat_tensor, cond_vec, both=False): class EncoderBlockRes4BCond (line 216) | class EncoderBlockRes4BCond(nn.Module): method __init__ (line 217) | def __init__(self, in_channels, out_channels, downsample, activation, ... method forward (line 227) | def forward(self, x, cond_vec): class DecoderBlockRes4BCond (line 235) | class DecoderBlockRes4BCond(nn.Module): method __init__ (line 236) | def __init__(self, in_channels, out_channels, stride, activation, mome... method init_weights (line 251) | def init_weights(self): method prune (line 254) | def prune(self, x, both=False): method forward (line 261) | def forward(self, input_tensor, concat_tensor, cond_vec, both=False): class EncoderBlockRes4B (line 271) | class EncoderBlockRes4B(nn.Module): method __init__ (line 272) | def __init__(self, in_channels, out_channels, downsample, activation, ... method forward (line 282) | def forward(self, x): class DecoderBlockRes4B (line 290) | class DecoderBlockRes4B(nn.Module): method __init__ (line 291) | def __init__(self, in_channels, out_channels, stride, activation, mome... method init_weights (line 306) | def init_weights(self): method prune (line 309) | def prune(self, x, both=False): method forward (line 316) | def forward(self, input_tensor, concat_tensor,both=False): class ConvBlockResCond (line 326) | class ConvBlockResCond(nn.Module): method __init__ (line 327) | def __init__(self, in_channels, out_channels, kernel_size, activation,... method init_weights (line 359) | def init_weights(self): method forward (line 368) | def forward(self, x, cond_vec): class ConvBlockRes (line 381) | class ConvBlockRes(nn.Module): method __init__ (line 382) | def __init__(self, in_channels, out_channels, kernel_size, activation,... method init_weights (line 412) | def init_weights(self): method forward (line 421) | def forward(self, x): function init_layer (line 431) | def init_layer(layer): function init_bn (line 439) | def init_bn(bn): function init_gru (line 444) | def init_gru(rnn): function act (line 472) | def act(x, activation): FILE: sound_extraction/model/resunet_film.py class UNetRes_FiLM (line 4) | class UNetRes_FiLM(nn.Module): method __init__ (line 5) | def __init__(self, channels, cond_embedding_dim, nsrc=1): method init_weights (line 63) | def init_weights(self): method forward (line 66) | def forward(self, sp, cond_vec, dec_cond_vec): FILE: sound_extraction/model/text_encoder.py class Text_Encoder (line 11) | class Text_Encoder(nn.Module): method __init__ (line 12) | def __init__(self, device): method tokenize (line 29) | def tokenize(self, caption): method forward (line 39) | def forward(self, input_ids, attns_mask): FILE: sound_extraction/utils/create_mixtures.py function add_noise_and_scale (line 4) | def add_noise_and_scale(front, noise, snr_l=0, snr_h=0, scale_lower=1.0,... function _random_scale (line 34) | def _random_scale(lower=0.3, upper=0.9): function _random_noise (line 37) | def _random_noise(clean, noise, snr_l=None, snr_h=None): function _to_numpy (line 42) | def _to_numpy(wav): function normalize_energy (line 45) | def normalize_energy(audio, alpha = 1): function normalize_energy_torch (line 54) | def normalize_energy_torch(audio, alpha = 1): function unify_energy (line 64) | def unify_energy(*args): function unify_energy_torch (line 69) | def unify_energy_torch(*args): function activelev (line 74) | def activelev(*args): function activelev_torch (line 80) | def activelev_torch(*args): function uniform_torch (line 90) | def uniform_torch(lower, upper): FILE: sound_extraction/utils/stft.py function window_sumsquare (line 10) | def window_sumsquare(window, n_frames, hop_length=512, win_length=1024, class STFT (line 53) | class STFT(torch.nn.Module): method __init__ (line 55) | def __init__(self, filter_length=1024, hop_length=512, win_length=1024, method transform (line 88) | def transform(self, input_data): method inverse (line 118) | def inverse(self, magnitude, phase): method forward (line 149) | def forward(self, input_data): FILE: sound_extraction/utils/wav_io.py function load_wav (line 7) | def load_wav(path): function save_wav (line 21) | def save_wav(wav, path): FILE: text_to_audio/Make_An_Audio/ldm/data/extract_mel_spectrogram.py class MelSpectrogram (line 15) | class MelSpectrogram(object): method __init__ (line 16) | def __init__(self, sr, nfft, fmin, fmax, nmels, hoplen, spec_power, in... method __call__ (line 28) | def __call__(self, x): class LowerThresh (line 40) | class LowerThresh(object): method __init__ (line 41) | def __init__(self, min_val, inverse=False): method __call__ (line 45) | def __call__(self, x): class Add (line 51) | class Add(object): method __init__ (line 52) | def __init__(self, val, inverse=False): method __call__ (line 56) | def __call__(self, x): class Subtract (line 62) | class Subtract(Add): method __init__ (line 63) | def __init__(self, val, inverse=False): method __call__ (line 67) | def __call__(self, x): class Multiply (line 73) | class Multiply(object): method __init__ (line 74) | def __init__(self, val, inverse=False) -> None: method __call__ (line 78) | def __call__(self, x): class Divide (line 84) | class Divide(Multiply): method __init__ (line 85) | def __init__(self, val, inverse=False): method __call__ (line 89) | def __call__(self, x): class Log10 (line 95) | class Log10(object): method __init__ (line 96) | def __init__(self, inverse=False): method __call__ (line 99) | def __call__(self, x): class Clip (line 105) | class Clip(object): method __init__ (line 106) | def __init__(self, min_val, max_val, inverse=False): method __call__ (line 111) | def __call__(self, x): class TrimSpec (line 117) | class TrimSpec(object): method __init__ (line 118) | def __init__(self, max_len, inverse=False): method __call__ (line 122) | def __call__(self, x): class MaxNorm (line 128) | class MaxNorm(object): method __init__ (line 129) | def __init__(self, inverse=False): method __call__ (line 133) | def __call__(self, x): FILE: text_to_audio/Make_An_Audio/ldm/lr_scheduler.py class LambdaWarmUpCosineScheduler (line 4) | class LambdaWarmUpCosineScheduler: method __init__ (line 8) | def __init__(self, warm_up_steps, lr_min, lr_max, lr_start, max_decay_... method schedule (line 17) | def schedule(self, n, **kwargs): method __call__ (line 32) | def __call__(self, n, **kwargs): class LambdaWarmUpCosineScheduler2 (line 36) | class LambdaWarmUpCosineScheduler2: method __init__ (line 41) | def __init__(self, warm_up_steps, f_min, f_max, f_start, cycle_lengths... method find_in_interval (line 52) | def find_in_interval(self, n): method schedule (line 59) | def schedule(self, n, **kwargs): method __call__ (line 77) | def __call__(self, n, **kwargs): class LambdaLinearScheduler (line 81) | class LambdaLinearScheduler(LambdaWarmUpCosineScheduler2): method schedule (line 83) | def schedule(self, n, **kwargs): FILE: text_to_audio/Make_An_Audio/ldm/models/autoencoder.py class VQModel (line 14) | class VQModel(pl.LightningModule): method __init__ (line 15) | def __init__(self, method ema_scope (line 64) | def ema_scope(self, context=None): method init_from_ckpt (line 78) | def init_from_ckpt(self, path, ignore_keys=list()): method on_train_batch_end (line 92) | def on_train_batch_end(self, *args, **kwargs): method encode (line 96) | def encode(self, x): method encode_to_prequant (line 102) | def encode_to_prequant(self, x): method decode (line 107) | def decode(self, quant): method decode_code (line 112) | def decode_code(self, code_b): method forward (line 117) | def forward(self, input, return_pred_indices=False): method get_input (line 124) | def get_input(self, batch, k): method training_step (line 142) | def training_step(self, batch, batch_idx, optimizer_idx): method validation_step (line 164) | def validation_step(self, batch, batch_idx): method _validation_step (line 170) | def _validation_step(self, batch, batch_idx, suffix=""): method test_step (line 197) | def test_step(self, batch, batch_idx): method configure_optimizers (line 217) | def configure_optimizers(self): method get_last_layer (line 250) | def get_last_layer(self): method log_images (line 253) | def log_images(self, batch, only_inputs=False, plot_ema=False, **kwargs): method to_rgb (line 275) | def to_rgb(self, x): class VQModelInterface (line 284) | class VQModelInterface(VQModel): method __init__ (line 285) | def __init__(self, embed_dim, *args, **kwargs): method encode (line 289) | def encode(self, x):# VQModel的quantize写在encoder里,VQModelInterface则将其写在... method decode (line 294) | def decode(self, h, force_not_quantize=False): class AutoencoderKL (line 305) | class AutoencoderKL(pl.LightningModule): method __init__ (line 306) | def __init__(self, method init_from_ckpt (line 334) | def init_from_ckpt(self, path, ignore_keys=list()): method encode (line 345) | def encode(self, x): method decode (line 351) | def decode(self, z): method forward (line 356) | def forward(self, input, sample_posterior=True): method get_input (line 365) | def get_input(self, batch, k): method training_step (line 372) | def training_step(self, batch, batch_idx, optimizer_idx): method validation_step (line 393) | def validation_step(self, batch, batch_idx): method test_step (line 397) | def test_step(self, batch, batch_idx): method configure_optimizers (line 417) | def configure_optimizers(self): method get_last_layer (line 428) | def get_last_layer(self): method log_images (line 432) | def log_images(self, batch, only_inputs=False,save_dir = 'mel_result_a... method to_rgb (line 448) | def to_rgb(self, x): class IdentityFirstStage (line 457) | class IdentityFirstStage(torch.nn.Module): method __init__ (line 458) | def __init__(self, *args, vq_interface=False, **kwargs): method encode (line 462) | def encode(self, x, *args, **kwargs): method decode (line 465) | def decode(self, x, *args, **kwargs): method quantize (line 468) | def quantize(self, x, *args, **kwargs): method forward (line 473) | def forward(self, x, *args, **kwargs): FILE: text_to_audio/Make_An_Audio/ldm/models/autoencoder_multi.py class AutoencoderKL (line 23) | class AutoencoderKL(pl.LightningModule): method __init__ (line 24) | def __init__(self, method init_from_ckpt (line 51) | def init_from_ckpt(self, path, ignore_keys=list()): method encode (line 62) | def encode(self, x): method decode (line 68) | def decode(self, z): method forward (line 73) | def forward(self, input, sample_posterior=True): method get_input (line 82) | def get_input(self, batch, k): method training_step (line 89) | def training_step(self, batch, batch_idx, optimizer_idx): method validation_step (line 110) | def validation_step(self, batch, batch_idx): method test_step (line 124) | def test_step(self, batch, batch_idx): method configure_optimizers (line 144) | def configure_optimizers(self): method get_last_layer (line 155) | def get_last_layer(self): method log_images (line 159) | def log_images(self, batch, only_inputs=False, **kwargs): method to_rgb (line 175) | def to_rgb(self, x): class IdentityFirstStage (line 184) | class IdentityFirstStage(torch.nn.Module): method __init__ (line 185) | def __init__(self, *args, vq_interface=False, **kwargs): method encode (line 189) | def encode(self, x, *args, **kwargs): method decode (line 192) | def decode(self, x, *args, **kwargs): method quantize (line 195) | def quantize(self, x, *args, **kwargs): method forward (line 200) | def forward(self, x, *args, **kwargs): FILE: text_to_audio/Make_An_Audio/ldm/models/diffusion/classifier.py function disabled_train (line 22) | def disabled_train(self, mode=True): class NoisyLatentImageClassifier (line 28) | class NoisyLatentImageClassifier(pl.LightningModule): method __init__ (line 30) | def __init__(self, method init_from_ckpt (line 70) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): method load_diffusion (line 88) | def load_diffusion(self): method load_classifier (line 95) | def load_classifier(self, ckpt_path, pool): method get_x_noisy (line 110) | def get_x_noisy(self, x, t, noise=None): method forward (line 120) | def forward(self, x_noisy, t, *args, **kwargs): method get_input (line 124) | def get_input(self, batch, k): method get_conditioning (line 133) | def get_conditioning(self, batch, k=None): method compute_top_k (line 150) | def compute_top_k(self, logits, labels, k, reduction="mean"): method on_train_epoch_start (line 157) | def on_train_epoch_start(self): method write_logs (line 162) | def write_logs(self, loss, logits, targets): method shared_step (line 179) | def shared_step(self, batch, t=None): method training_step (line 198) | def training_step(self, batch, batch_idx): method reset_noise_accs (line 202) | def reset_noise_accs(self): method on_validation_start (line 206) | def on_validation_start(self): method validation_step (line 210) | def validation_step(self, batch, batch_idx): method configure_optimizers (line 220) | def configure_optimizers(self): method log_images (line 238) | def log_images(self, batch, N=8, *args, **kwargs): FILE: text_to_audio/Make_An_Audio/ldm/models/diffusion/ddim.py class DDIMSampler (line 12) | class DDIMSampler(object): method __init__ (line 13) | def __init__(self, model, schedule="linear", **kwargs): method register_buffer (line 20) | def register_buffer(self, name, attr): method make_schedule (line 27) | def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddi... method sample (line 59) | def sample(self, method ddim_sampling (line 118) | def ddim_sampling(self, cond, shape, method p_sample_ddim (line 169) | def p_sample_ddim(self, x, c, t, index, repeat_noise=False, use_origin... method stochastic_encode (line 228) | def stochastic_encode(self, x0, t, use_original_steps=False, noise=None): method decode (line 244) | def decode(self, x_latent, cond, t_start, unconditional_guidance_scale... FILE: text_to_audio/Make_An_Audio/ldm/models/diffusion/ddpm.py function disabled_train (line 33) | def disabled_train(self, mode=True): function uniform_on_device (line 39) | def uniform_on_device(r1, r2, shape, device): class DDPM (line 43) | class DDPM(pl.LightningModule): method __init__ (line 45) | def __init__(self, method register_schedule (line 115) | def register_schedule(self, given_betas=None, beta_schedule="linear", ... method ema_scope (line 170) | def ema_scope(self, context=None): method init_from_ckpt (line 184) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): method q_mean_variance (line 202) | def q_mean_variance(self, x_start, t): method predict_start_from_noise (line 214) | def predict_start_from_noise(self, x_t, t, noise): method q_posterior (line 220) | def q_posterior(self, x_start, x_t, t): method p_mean_variance (line 229) | def p_mean_variance(self, x, t, clip_denoised: bool): method p_sample (line 242) | def p_sample(self, x, t, clip_denoised=True, repeat_noise=False): method p_sample_loop (line 251) | def p_sample_loop(self, shape, return_intermediates=False): method sample (line 266) | def sample(self, batch_size=16, return_intermediates=False): method q_sample (line 272) | def q_sample(self, x_start, t, noise=None): method get_loss (line 277) | def get_loss(self, pred, target, mean=True): method p_losses (line 292) | def p_losses(self, x_start, t, noise=None): method forward (line 321) | def forward(self, x, *args, **kwargs): method get_input (line 327) | def get_input(self, batch, k): method shared_step (line 335) | def shared_step(self, batch): method training_step (line 340) | def training_step(self, batch, batch_idx): method validation_step (line 356) | def validation_step(self, batch, batch_idx): method on_train_batch_end (line 364) | def on_train_batch_end(self, *args, **kwargs): method _get_rows_from_list (line 368) | def _get_rows_from_list(self, samples): method log_images (line 376) | def log_images(self, batch, N=8, n_row=2, sample=True, return_keys=Non... method configure_optimizers (line 413) | def configure_optimizers(self): class LatentDiffusion (line 422) | class LatentDiffusion(DDPM): method __init__ (line 424) | def __init__(self, method make_cond_schedule (line 469) | def make_cond_schedule(self, ): method on_train_batch_start (line 476) | def on_train_batch_start(self, batch, batch_idx, dataloader_idx): method register_schedule (line 491) | def register_schedule(self, method instantiate_first_stage (line 500) | def instantiate_first_stage(self, config): method instantiate_cond_stage (line 507) | def instantiate_cond_stage(self, config): method _get_denoise_row_from_list (line 528) | def _get_denoise_row_from_list(self, samples, desc='', force_no_decode... method get_first_stage_encoding (line 540) | def get_first_stage_encoding(self, encoder_posterior): method get_learned_conditioning (line 549) | def get_learned_conditioning(self, c): method meshgrid (line 562) | def meshgrid(self, h, w): method delta_border (line 569) | def delta_border(self, h, w): method get_weighting (line 583) | def get_weighting(self, h, w, Ly, Lx, device): method get_fold_unfold (line 599) | def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo... method get_input (line 652) | def get_input(self, batch, k, return_first_stage_outputs=False, force_... method decode_first_stage (line 704) | def decode_first_stage(self, z, predict_cids=False, force_not_quantize... method differentiable_decode_first_stage (line 764) | def differentiable_decode_first_stage(self, z, predict_cids=False, for... method encode_first_stage (line 824) | def encode_first_stage(self, x): method shared_step (line 863) | def shared_step(self, batch, **kwargs): method forward (line 868) | def forward(self, x, c, *args, **kwargs): method _rescale_annotations (line 879) | def _rescale_annotations(self, bboxes, crop_coordinates): # TODO: mov... method apply_model (line 889) | def apply_model(self, x_noisy, t, cond, return_ids=False): method _predict_eps_from_xstart (line 992) | def _predict_eps_from_xstart(self, x_t, t, pred_xstart): method _prior_bpd (line 996) | def _prior_bpd(self, x_start): method p_losses (line 1010) | def p_losses(self, x_start, cond, t, noise=None): method p_mean_variance (line 1045) | def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codeboo... method p_sample (line 1077) | def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, method progressive_denoising (line 1108) | def progressive_denoising(self, cond, shape, verbose=True, callback=No... method p_sample_loop (line 1164) | def p_sample_loop(self, cond, shape, return_intermediates=False, method sample (line 1215) | def sample(self, cond, batch_size=16, return_intermediates=False, x_T=... method sample_log (line 1233) | def sample_log(self,cond,batch_size,ddim, ddim_steps,**kwargs): method log_images (line 1249) | def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200,... method configure_optimizers (line 1359) | def configure_optimizers(self): method to_rgb (line 1384) | def to_rgb(self, x): class DiffusionWrapper (line 1393) | class DiffusionWrapper(pl.LightningModule): method __init__ (line 1394) | def __init__(self, diff_model_config, conditioning_key): method forward (line 1400) | def forward(self, x, t, c_concat: list = None, c_crossattn: list = None): class Layout2ImgDiffusion (line 1423) | class Layout2ImgDiffusion(LatentDiffusion): method __init__ (line 1425) | def __init__(self, cond_stage_key, *args, **kwargs): method log_images (line 1429) | def log_images(self, batch, N=8, *args, **kwargs): FILE: text_to_audio/Make_An_Audio/ldm/models/diffusion/ddpm_audio.py class LatentDiffusion_audio (line 35) | class LatentDiffusion_audio(DDPM): method __init__ (line 37) | def __init__(self, method make_cond_schedule (line 86) | def make_cond_schedule(self, ): method on_train_batch_start (line 93) | def on_train_batch_start(self, batch, batch_idx, dataloader_idx): method register_schedule (line 108) | def register_schedule(self, method instantiate_first_stage (line 117) | def instantiate_first_stage(self, config): method instantiate_cond_stage (line 124) | def instantiate_cond_stage(self, config): method _get_denoise_row_from_list (line 145) | def _get_denoise_row_from_list(self, samples, desc='', force_no_decode... method get_first_stage_encoding (line 157) | def get_first_stage_encoding(self, encoder_posterior): method get_learned_conditioning (line 166) | def get_learned_conditioning(self, c): method get_unconditional_conditioning (line 181) | def get_unconditional_conditioning(self, batch_size, null_label=None): method meshgrid (line 205) | def meshgrid(self, h, w): method delta_border (line 212) | def delta_border(self, h, w): method get_weighting (line 226) | def get_weighting(self, h, w, Ly, Lx, device): method get_fold_unfold (line 242) | def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo... method get_input (line 295) | def get_input(self, batch, k, return_first_stage_outputs=False, force_... method decode_first_stage (line 352) | def decode_first_stage(self, z, predict_cids=False, force_not_quantize... method differentiable_decode_first_stage (line 412) | def differentiable_decode_first_stage(self, z, predict_cids=False, for... method encode_first_stage (line 472) | def encode_first_stage(self, x): method shared_step (line 511) | def shared_step(self, batch, **kwargs): method test_step (line 516) | def test_step(self,batch,batch_idx): method forward (line 540) | def forward(self, x, c, *args, **kwargs): method _rescale_annotations (line 551) | def _rescale_annotations(self, bboxes, crop_coordinates): # TODO: mov... method apply_model (line 561) | def apply_model(self, x_noisy, t, cond, return_ids=False): method _predict_eps_from_xstart (line 664) | def _predict_eps_from_xstart(self, x_t, t, pred_xstart): method _prior_bpd (line 668) | def _prior_bpd(self, x_start): method p_losses (line 682) | def p_losses(self, x_start, cond, t, noise=None): method p_mean_variance (line 717) | def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codeboo... method p_sample (line 749) | def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, method progressive_denoising (line 780) | def progressive_denoising(self, cond, shape, verbose=True, callback=No... method p_sample_loop (line 836) | def p_sample_loop(self, cond, shape, return_intermediates=False, method sample (line 887) | def sample(self, cond, batch_size=16, return_intermediates=False, x_T=... method sample_log (line 905) | def sample_log(self,cond,batch_size,ddim, ddim_steps,**kwargs): method log_images (line 921) | def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200,... method configure_optimizers (line 1037) | def configure_optimizers(self): method to_rgb (line 1062) | def to_rgb(self, x): class LatentFinetuneDiffusion (line 1071) | class LatentFinetuneDiffusion(LatentDiffusion_audio): method __init__ (line 1077) | def __init__(self, method init_from_ckpt (line 1101) | def init_from_ckpt(self, path, ignore_keys=list(), only_model=False): method log_images (line 1133) | def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200,... class LatentInpaintDiffusion (line 1213) | class LatentInpaintDiffusion(LatentFinetuneDiffusion): method __init__ (line 1220) | def __init__(self, method get_input (line 1230) | def get_input(self, batch, k, cond_key=None, bs=None, return_first_sta... method log_images (line 1258) | def log_images(self, *args, **kwargs): FILE: text_to_audio/Make_An_Audio/ldm/models/diffusion/ddpm_audio_inpaint.py class LatentDiffusion_audioinpaint (line 34) | class LatentDiffusion_audioinpaint(DDPM): method __init__ (line 36) | def __init__(self, method make_cond_schedule (line 90) | def make_cond_schedule(self, ): method on_train_batch_start (line 97) | def on_train_batch_start(self, batch, batch_idx, dataloader_idx): method register_schedule (line 112) | def register_schedule(self, method instantiate_first_stage (line 121) | def instantiate_first_stage(self, config): method instantiate_cond_stage (line 128) | def instantiate_cond_stage(self, config): method _get_denoise_row_from_list (line 149) | def _get_denoise_row_from_list(self, samples, desc='', force_no_decode... method get_first_stage_encoding (line 161) | def get_first_stage_encoding(self, encoder_posterior):# encode_emb fro... method get_learned_conditioning (line 170) | def get_learned_conditioning(self, c): method meshgrid (line 183) | def meshgrid(self, h, w): method delta_border (line 190) | def delta_border(self, h, w): method get_weighting (line 204) | def get_weighting(self, h, w, Ly, Lx, device): method get_fold_unfold (line 220) | def get_fold_unfold(self, x, kernel_size, stride, uf=1, df=1): # todo... method get_input (line 273) | def get_input(self, batch, k, return_first_stage_outputs=False, force_... method decode_first_stage (line 348) | def decode_first_stage(self, z, predict_cids=False, force_not_quantize... method differentiable_decode_first_stage (line 408) | def differentiable_decode_first_stage(self, z, predict_cids=False, for... method encode_first_stage (line 468) | def encode_first_stage(self, x): method shared_step (line 507) | def shared_step(self, batch, **kwargs): method test_step (line 512) | def test_step(self,batch,batch_idx): method forward (line 546) | def forward(self, x, c, *args, **kwargs): method _rescale_annotations (line 561) | def _rescale_annotations(self, bboxes, crop_coordinates): # TODO: mov... method apply_model (line 571) | def apply_model(self, x_noisy, t, cond, return_ids=False): method _predict_eps_from_xstart (line 682) | def _predict_eps_from_xstart(self, x_t, t, pred_xstart): method _prior_bpd (line 686) | def _prior_bpd(self, x_start): method p_losses (line 700) | def p_losses(self, x_start, cond, t, noise=None): method p_mean_variance (line 735) | def p_mean_variance(self, x, c, t, clip_denoised: bool, return_codeboo... method p_sample (line 767) | def p_sample(self, x, c, t, clip_denoised=False, repeat_noise=False, method progressive_denoising (line 798) | def progressive_denoising(self, cond, shape, verbose=True, callback=No... method p_sample_loop (line 854) | def p_sample_loop(self, cond, shape, return_intermediates=False, method sample (line 905) | def sample(self, cond, batch_size=16, return_intermediates=False, x_T=... method sample_log (line 923) | def sample_log(self,cond,batch_size,ddim, ddim_steps,**kwargs): method log_images (line 937) | def log_images(self, batch, N=8, n_row=4, sample=True, ddim_steps=200,... method configure_optimizers (line 1049) | def configure_optimizers(self): method to_rgb (line 1074) | def to_rgb(self, x): FILE: text_to_audio/Make_An_Audio/ldm/models/diffusion/plms.py class PLMSSampler (line 11) | class PLMSSampler(object): method __init__ (line 12) | def __init__(self, model, schedule="linear", **kwargs): method register_buffer (line 18) | def register_buffer(self, name, attr): method make_schedule (line 24) | def make_schedule(self, ddim_num_steps, ddim_discretize="uniform", ddi... method sample (line 58) | def sample(self, method plms_sampling (line 115) | def plms_sampling(self, cond, shape, method p_sample_plms (line 173) | def p_sample_plms(self, x, c, t, index, repeat_noise=False, use_origin... FILE: text_to_audio/Make_An_Audio/ldm/modules/attention.py function exists (line 11) | def exists(val): function uniq (line 15) | def uniq(arr): function default (line 19) | def default(val, d): function max_neg_value (line 25) | def max_neg_value(t): function init_ (line 29) | def init_(tensor): class GEGLU (line 37) | class GEGLU(nn.Module): method __init__ (line 38) | def __init__(self, dim_in, dim_out): method forward (line 42) | def forward(self, x): class FeedForward (line 47) | class FeedForward(nn.Module): method __init__ (line 48) | def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.): method forward (line 63) | def forward(self, x): function zero_module (line 67) | def zero_module(module): function Normalize (line 76) | def Normalize(in_channels): class LinearAttention (line 80) | class LinearAttention(nn.Module): method __init__ (line 81) | def __init__(self, dim, heads=4, dim_head=32): method forward (line 88) | def forward(self, x): class SpatialSelfAttention (line 99) | class SpatialSelfAttention(nn.Module): method __init__ (line 100) | def __init__(self, in_channels): method forward (line 126) | def forward(self, x): class CrossAttention (line 152) | class CrossAttention(nn.Module): method __init__ (line 153) | def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, ... method forward (line 170) | def forward(self, x, context=None, mask=None):# x:(b,h*w,c), context:(... class BasicTransformerBlock (line 196) | class BasicTransformerBlock(nn.Module): method __init__ (line 197) | def __init__(self, dim, n_heads, d_head, dropout=0., context_dim=None,... method forward (line 208) | def forward(self, x, context=None): method _forward (line 211) | def _forward(self, x, context=None): class SpatialTransformer (line 218) | class SpatialTransformer(nn.Module): method __init__ (line 226) | def __init__(self, in_channels, n_heads, d_head, method forward (line 250) | def forward(self, x, context=None): FILE: text_to_audio/Make_An_Audio/ldm/modules/diffusionmodules/custom_openaimodel.py class UNetModel (line 26) | class UNetModel(nn.Module): method __init__ (line 56) | def __init__( method convert_to_fp16 (line 315) | def convert_to_fp16(self): method convert_to_fp32 (line 323) | def convert_to_fp32(self): method forward (line 331) | def forward(self, x, timesteps=None, context=None, y=None,**kwargs): FILE: text_to_audio/Make_An_Audio/ldm/modules/diffusionmodules/model.py function get_timestep_embedding (line 12) | def get_timestep_embedding(timesteps, embedding_dim): function nonlinearity (line 33) | def nonlinearity(x): function Normalize (line 38) | def Normalize(in_channels, num_groups=32): class Upsample (line 42) | class Upsample(nn.Module): method __init__ (line 43) | def __init__(self, in_channels, with_conv): method forward (line 53) | def forward(self, x): class Downsample (line 60) | class Downsample(nn.Module): method __init__ (line 61) | def __init__(self, in_channels, with_conv): method forward (line 72) | def forward(self, x): class ResnetBlock (line 82) | class ResnetBlock(nn.Module): method __init__ (line 83) | def __init__(self, *, in_channels, out_channels=None, conv_shortcut=Fa... method forward (line 121) | def forward(self, x, temb): class LinAttnBlock (line 144) | class LinAttnBlock(LinearAttention): method __init__ (line 146) | def __init__(self, in_channels): class AttnBlock (line 150) | class AttnBlock(nn.Module): method __init__ (line 151) | def __init__(self, in_channels): method forward (line 178) | def forward(self, x): function make_attn (line 205) | def make_attn(in_channels, attn_type="vanilla"): class Model (line 216) | class Model(nn.Module): method __init__ (line 217) | def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks, method forward (line 316) | def forward(self, x, t=None, context=None): method get_last_layer (line 364) | def get_last_layer(self): class Encoder (line 368) | class Encoder(nn.Module): method __init__ (line 369) | def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks, method forward (line 434) | def forward(self, x): class Decoder (line 462) | class Decoder(nn.Module): method __init__ (line 463) | def __init__(self, *, ch, out_ch, ch_mult=(1,2,4,8), num_res_blocks, method forward (line 535) | def forward(self, z): class SimpleDecoder (line 571) | class SimpleDecoder(nn.Module): method __init__ (line 572) | def __init__(self, in_channels, out_channels, *args, **kwargs): method forward (line 594) | def forward(self, x): class UpsampleDecoder (line 607) | class UpsampleDecoder(nn.Module): method __init__ (line 608) | def __init__(self, in_channels, out_channels, ch, num_res_blocks, reso... method forward (line 641) | def forward(self, x): class LatentRescaler (line 655) | class LatentRescaler(nn.Module): method __init__ (line 656) | def __init__(self, factor, in_channels, mid_channels, out_channels, de... method forward (line 680) | def forward(self, x): class MergedRescaleEncoder (line 692) | class MergedRescaleEncoder(nn.Module): method __init__ (line 693) | def __init__(self, in_channels, ch, resolution, out_ch, num_res_blocks, method forward (line 705) | def forward(self, x): class MergedRescaleDecoder (line 711) | class MergedRescaleDecoder(nn.Module): method __init__ (line 712) | def __init__(self, z_channels, out_ch, resolution, num_res_blocks, att... method forward (line 722) | def forward(self, x): class Upsampler (line 728) | class Upsampler(nn.Module): method __init__ (line 729) | def __init__(self, in_size, out_size, in_channels, out_channels, ch_mu... method forward (line 741) | def forward(self, x): class Resize (line 747) | class Resize(nn.Module): method __init__ (line 748) | def __init__(self, in_channels=None, learned=False, mode="bilinear"): method forward (line 763) | def forward(self, x, scale_factor=1.0): class FirstStagePostProcessor (line 770) | class FirstStagePostProcessor(nn.Module): method __init__ (line 772) | def __init__(self, ch_mult:list, in_channels, method instantiate_pretrained (line 807) | def instantiate_pretrained(self, config): method encode_with_pretrained (line 816) | def encode_with_pretrained(self,x): method forward (line 822) | def forward(self,x): FILE: text_to_audio/Make_An_Audio/ldm/modules/diffusionmodules/openaimodel.py function convert_module_to_f16 (line 24) | def convert_module_to_f16(x): function convert_module_to_f32 (line 27) | def convert_module_to_f32(x): class AttentionPool2d (line 32) | class AttentionPool2d(nn.Module): method __init__ (line 37) | def __init__( method forward (line 51) | def forward(self, x): class TimestepBlock (line 62) | class TimestepBlock(nn.Module): method forward (line 68) | def forward(self, x, emb): class TimestepEmbedSequential (line 74) | class TimestepEmbedSequential(nn.Sequential, TimestepBlock): method forward (line 80) | def forward(self, x, emb, context=None): class Upsample (line 91) | class Upsample(nn.Module): method __init__ (line 100) | def __init__(self, channels, use_conv, dims=2, out_channels=None, padd... method forward (line 109) | def forward(self, x): class TransposedUpsample (line 121) | class TransposedUpsample(nn.Module): method __init__ (line 123) | def __init__(self, channels, out_channels=None, ks=5): method forward (line 130) | def forward(self,x): class Downsample (line 134) | class Downsample(nn.Module): method __init__ (line 143) | def __init__(self, channels, use_conv, dims=2, out_channels=None,paddi... method forward (line 158) | def forward(self, x): class ResBlock (line 163) | class ResBlock(TimestepBlock): method __init__ (line 179) | def __init__( method forward (line 243) | def forward(self, x, emb): method _forward (line 255) | def _forward(self, x, emb): class AttentionBlock (line 278) | class AttentionBlock(nn.Module): method __init__ (line 285) | def __init__( method forward (line 314) | def forward(self, x): method _forward (line 318) | def _forward(self, x): function count_flops_attn (line 327) | def count_flops_attn(model, _x, y): class QKVAttentionLegacy (line 347) | class QKVAttentionLegacy(nn.Module): method __init__ (line 352) | def __init__(self, n_heads): method forward (line 356) | def forward(self, qkv): method count_flops (line 375) | def count_flops(model, _x, y): class QKVAttention (line 379) | class QKVAttention(nn.Module): method __init__ (line 384) | def __init__(self, n_heads): method forward (line 388) | def forward(self, qkv): method count_flops (line 409) | def count_flops(model, _x, y): class UNetModel (line 413) | class UNetModel(nn.Module): method __init__ (line 443) | def __init__( method convert_to_fp16 (line 695) | def convert_to_fp16(self): method convert_to_fp32 (line 703) | def convert_to_fp32(self): method forward (line 711) | def forward(self, x, timesteps=None, context=None, y=None,**kwargs): class EncoderUNetModel (line 747) | class EncoderUNetModel(nn.Module): method __init__ (line 753) | def __init__( method convert_to_fp16 (line 926) | def convert_to_fp16(self): method convert_to_fp32 (line 933) | def convert_to_fp32(self): method forward (line 940) | def forward(self, x, timesteps): FILE: text_to_audio/Make_An_Audio/ldm/modules/diffusionmodules/util.py function make_beta_schedule (line 21) | def make_beta_schedule(schedule, n_timestep, linear_start=1e-4, linear_e... function make_ddim_timesteps (line 46) | def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_... function make_ddim_sampling_parameters (line 63) | def make_ddim_sampling_parameters(alphacums, ddim_timesteps, eta, verbos... function betas_for_alpha_bar (line 77) | def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.9... function extract_into_tensor (line 96) | def extract_into_tensor(a, t, x_shape): function checkpoint (line 102) | def checkpoint(func, inputs, params, flag): class CheckpointFunction (line 119) | class CheckpointFunction(torch.autograd.Function): method forward (line 121) | def forward(ctx, run_function, length, *args): method backward (line 131) | def backward(ctx, *output_grads): function timestep_embedding (line 151) | def timestep_embedding(timesteps, dim, max_period=10000, repeat_only=Fal... function zero_module (line 174) | def zero_module(module): function scale_module (line 183) | def scale_module(module, scale): function mean_flat (line 192) | def mean_flat(tensor): function normalization (line 199) | def normalization(channels): class SiLU (line 209) | class SiLU(nn.Module): method forward (line 210) | def forward(self, x): class GroupNorm32 (line 214) | class GroupNorm32(nn.GroupNorm): method forward (line 215) | def forward(self, x): function conv_nd (line 218) | def conv_nd(dims, *args, **kwargs): function linear (line 231) | def linear(*args, **kwargs): function avg_pool_nd (line 238) | def avg_pool_nd(dims, *args, **kwargs): class HybridConditioner (line 251) | class HybridConditioner(nn.Module): method __init__ (line 253) | def __init__(self, c_concat_config, c_crossattn_config): method forward (line 258) | def forward(self, c_concat, c_crossattn): function noise_like (line 264) | def noise_like(shape, device, repeat=False): FILE: text_to_audio/Make_An_Audio/ldm/modules/discriminator/model.py class ActNorm (line 5) | class ActNorm(nn.Module): method __init__ (line 6) | def __init__(self, num_features, logdet=False, affine=True, method initialize (line 17) | def initialize(self, input): method forward (line 38) | def forward(self, input, reverse=False): method reverse (line 66) | def reverse(self, output): function weights_init (line 89) | def weights_init(m): class NLayerDiscriminator (line 98) | class NLayerDiscriminator(nn.Module): method __init__ (line 102) | def __init__(self, input_nc=3, ndf=64, n_layers=3, use_actnorm=False): method forward (line 145) | def forward(self, input): class NLayerDiscriminator1dFeats (line 149) | class NLayerDiscriminator1dFeats(NLayerDiscriminator): method __init__ (line 153) | def __init__(self, input_nc=3, ndf=64, n_layers=3, use_actnorm=False): class NLayerDiscriminator1dSpecs (line 205) | class NLayerDiscriminator1dSpecs(NLayerDiscriminator): method __init__ (line 209) | def __init__(self, input_nc=80, ndf=64, n_layers=3, use_actnorm=False): method forward (line 253) | def forward(self, input): FILE: text_to_audio/Make_An_Audio/ldm/modules/discriminator/multi_window_disc.py class Discriminator2DFactory (line 6) | class Discriminator2DFactory(nn.Module): method __init__ (line 7) | def __init__(self, time_length, freq_length=80, kernel=(3, 3), c_in=1,... method forward (line 46) | def forward(self, x): class MultiWindowDiscriminator (line 66) | class MultiWindowDiscriminator(nn.Module): method __init__ (line 67) | def __init__(self, time_lengths, cond_size=0, freq_length=80, kernel=(... method forward (line 88) | def forward(self, x, x_len, cond=None, start_frames_wins=None): method clip (line 124) | def clip(self, x, cond, x_len, win_length, start_frames=None): class Discriminator (line 151) | class Discriminator(nn.Module): method __init__ (line 152) | def __init__(self, time_lengths=[32, 64, 128], freq_length=80, cond_si... method forward (line 177) | def forward(self, x, cond=None, start_frames_wins=None): FILE: text_to_audio/Make_An_Audio/ldm/modules/distributions/distributions.py class AbstractDistribution (line 5) | class AbstractDistribution: method sample (line 6) | def sample(self): method mode (line 9) | def mode(self): class DiracDistribution (line 13) | class DiracDistribution(AbstractDistribution): method __init__ (line 14) | def __init__(self, value): method sample (line 17) | def sample(self): method mode (line 20) | def mode(self): class DiagonalGaussianDistribution (line 24) | class DiagonalGaussianDistribution(object): method __init__ (line 25) | def __init__(self, parameters, deterministic=False): method sample (line 35) | def sample(self): method kl (line 39) | def kl(self, other=None): method nll (line 53) | def nll(self, sample, dims=[1,2,3]): method mode (line 61) | def mode(self): function normal_kl (line 65) | def normal_kl(mean1, logvar1, mean2, logvar2): FILE: text_to_audio/Make_An_Audio/ldm/modules/ema.py class LitEma (line 5) | class LitEma(nn.Module): method __init__ (line 6) | def __init__(self, model, decay=0.9999, use_num_upates=True): method forward (line 25) | def forward(self,model): method copy_to (line 46) | def copy_to(self, model): method store (line 55) | def store(self, parameters): method restore (line 64) | def restore(self, parameters): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/CLAP/CLAPWrapper.py class CLAPWrapper (line 18) | class CLAPWrapper(): method __init__ (line 23) | def __init__(self, model_fp, device): method load_clap (line 34) | def load_clap(self): method default_collate (line 71) | def default_collate(self, batch): method load_audio_into_tensor (line 117) | def load_audio_into_tensor(self, audio_path, audio_duration, resample=... method preprocess_audio (line 145) | def preprocess_audio(self, audio_files, resample): method preprocess_text (line 155) | def preprocess_text(self, text_queries, text_len=100): method get_text_embeddings (line 167) | def get_text_embeddings(self, class_labels): method get_audio_embeddings (line 174) | def get_audio_embeddings(self, audio_files, resample): method _get_text_embeddings (line 181) | def _get_text_embeddings(self, preprocessed_text): method _get_audio_embeddings (line 188) | def _get_audio_embeddings(self, preprocessed_audio): method compute_similarity (line 198) | def compute_similarity(self, audio_embeddings, text_embeddings): method _generic_batch_inference (line 204) | def _generic_batch_inference(self, func, *args): method get_audio_embeddings_per_batch (line 227) | def get_audio_embeddings_per_batch(self, audio_files, batch_size): method get_text_embeddings_per_batch (line 231) | def get_text_embeddings_per_batch(self, class_labels, batch_size): method classify_audio_files_per_batch (line 235) | def classify_audio_files_per_batch(self, audio_files, class_labels, ba... FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/CLAP/audio.py function get_audio_encoder (line 6) | def get_audio_encoder(name: str): class ConvBlock (line 13) | class ConvBlock(nn.Module): method __init__ (line 14) | def __init__(self, in_channels, out_channels): method forward (line 32) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class ConvBlock5x5 (line 51) | class ConvBlock5x5(nn.Module): method __init__ (line 52) | def __init__(self, in_channels, out_channels): method forward (line 64) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class AttBlock (line 82) | class AttBlock(nn.Module): method __init__ (line 83) | def __init__(self, n_in, n_out, activation='linear', temperature=1.): method forward (line 93) | def forward(self, x): method nonlinear_transform (line 100) | def nonlinear_transform(self, x): class Cnn14 (line 107) | class Cnn14(nn.Module): method __init__ (line 108) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method forward (line 143) | def forward(self, input, mixup_lambda=None): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/CLAP/clap.py class Projection (line 8) | class Projection(nn.Module): method __init__ (line 9) | def __init__(self, d_in: int, d_out: int, p: float=0.5) -> None: method forward (line 16) | def forward(self, x: torch.Tensor) -> torch.Tensor: class AudioEncoder (line 22) | class AudioEncoder(nn.Module): method __init__ (line 23) | def __init__(self, audioenc_name:str, d_in: int, d_out: int, sample_ra... method forward (line 36) | def forward(self, x): class TextEncoder (line 42) | class TextEncoder(nn.Module): method __init__ (line 43) | def __init__(self, d_out: int, text_model: str, transformer_embed_dim:... method forward (line 48) | def forward(self, x): class CLAP (line 54) | class CLAP(nn.Module): method __init__ (line 55) | def __init__(self, method forward (line 85) | def forward(self, audio, text): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/CLAP/utils.py function read_config_as_args (line 5) | def read_config_as_args(config_path,args=None,is_config_str=False): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/modules.py class AbstractEncoder (line 14) | class AbstractEncoder(nn.Module): method __init__ (line 15) | def __init__(self): method encode (line 18) | def encode(self, *args, **kwargs): class ClassEmbedder (line 22) | class ClassEmbedder(nn.Module): method __init__ (line 23) | def __init__(self, embed_dim, n_classes=1000, key='class'): method forward (line 28) | def forward(self, batch, key=None): class TransformerEmbedder (line 37) | class TransformerEmbedder(AbstractEncoder): method __init__ (line 39) | def __init__(self, n_embed, n_layer, vocab_size, max_seq_len=77, devic... method forward (line 45) | def forward(self, tokens): method encode (line 50) | def encode(self, x): class BERTTokenizer (line 54) | class BERTTokenizer(AbstractEncoder): method __init__ (line 56) | def __init__(self, device="cuda", vq_interface=True, max_length=77): method forward (line 64) | def forward(self, text): method encode (line 71) | def encode(self, text): method decode (line 77) | def decode(self, text): class BERTEmbedder (line 81) | class BERTEmbedder(AbstractEncoder):# 这里不是用的pretrained bert,是用的transform... method __init__ (line 83) | def __init__(self, n_embed, n_layer, vocab_size=30522, max_seq_len=77, method forward (line 94) | def forward(self, text): method encode (line 102) | def encode(self, text): class SpatialRescaler (line 107) | class SpatialRescaler(nn.Module): method __init__ (line 108) | def __init__(self, method forward (line 126) | def forward(self,x): method encode (line 135) | def encode(self, x): function disabled_train (line 138) | def disabled_train(self, mode=True): class FrozenT5Embedder (line 143) | class FrozenT5Embedder(AbstractEncoder): method __init__ (line 145) | def __init__(self, version="google/t5-v1_1-large", device="cuda", max_... method freeze (line 154) | def freeze(self): method forward (line 160) | def forward(self, text): method encode (line 169) | def encode(self, text): class FrozenCLAPEmbedder (line 173) | class FrozenCLAPEmbedder(AbstractEncoder): method __init__ (line 175) | def __init__(self, weights_path, freeze=True, device="cuda", max_lengt... method freeze (line 199) | def freeze(self): method encode (line 205) | def encode(self, text): class FrozenCLAPEmbedderNoLoad (line 214) | class FrozenCLAPEmbedderNoLoad(AbstractEncoder): method __init__ (line 215) | def __init__(self, config, freeze=True, device="cpu", max_length=77): method freeze (line 231) | def freeze(self): method encode (line 237) | def encode(self, text): class NewFrozenCLAPEmbedder (line 247) | class NewFrozenCLAPEmbedder(AbstractEncoder): method __init__ (line 249) | def __init__(self, weights_path, freeze=True, device="cuda", max_lengt... method freeze (line 275) | def freeze(self): method encode (line 280) | def encode(self, text): class FrozenFLANEmbedder (line 287) | class FrozenFLANEmbedder(AbstractEncoder): method __init__ (line 289) | def __init__(self, version="google/flan-t5-large", device="cuda", max_... method freeze (line 298) | def freeze(self): method forward (line 304) | def forward(self, text): method encode (line 313) | def encode(self, text): class FrozenGlobalNormOpenCLIPEmbedder (line 315) | class FrozenGlobalNormOpenCLIPEmbedder(AbstractEncoder): method __init__ (line 319) | def __init__(self, arch="ViT-H-14", version="laion2b_s32b_b79k", devic... method freeze (line 333) | def freeze(self): method forward (line 338) | def forward(self, text): method forward_img (line 344) | def forward_img(self, image): method encode (line 349) | def encode(self, text): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/bert.py function bert_embeddings (line 6) | def bert_embeddings(text): function Roberta_embeddings (line 17) | def Roberta_embeddings(text): function bart_embeddings (line 28) | def bart_embeddings(text): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/factory.py function _natural_key (line 20) | def _natural_key(string_): function _rescan_model_configs (line 24) | def _rescan_model_configs(): function load_state_dict (line 51) | def load_state_dict(checkpoint_path: str, map_location="cpu", skip_param... function create_model (line 67) | def create_model( function create_model_and_transforms (line 224) | def create_model_and_transforms( function list_models (line 247) | def list_models(): function add_model_config (line 252) | def add_model_config(path): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/feature_fusion.py class DAF (line 11) | class DAF(nn.Module): method __init__ (line 16) | def __init__(self): method forward (line 19) | def forward(self, x, residual): class iAFF (line 23) | class iAFF(nn.Module): method __init__ (line 28) | def __init__(self, channels=64, r=4, type='2D'): method forward (line 111) | def forward(self, x, residual): class AFF (line 133) | class AFF(nn.Module): method __init__ (line 138) | def __init__(self, channels=64, r=4, type='2D'): method forward (line 179) | def forward(self, x, residual): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/htsat.py function _ntuple (line 30) | def _ntuple(n): function drop_path (line 43) | def drop_path(x, drop_prob: float = 0., training: bool = False): class DropPath (line 61) | class DropPath(nn.Module): method __init__ (line 64) | def __init__(self, drop_prob=None): method forward (line 68) | def forward(self, x): class PatchEmbed (line 71) | class PatchEmbed(nn.Module): method __init__ (line 74) | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=... method forward (line 108) | def forward(self, x, longer_idx = None): class Mlp (line 146) | class Mlp(nn.Module): method __init__ (line 149) | def __init__(self, in_features, hidden_features=None, out_features=Non... method forward (line 158) | def forward(self, x): function _no_grad_trunc_normal_ (line 166) | def _no_grad_trunc_normal_(tensor, mean, std, a, b): function trunc_normal_ (line 202) | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): function variance_scaling_ (line 223) | def variance_scaling_(tensor, scale=1.0, mode='fan_in', distribution='no... function lecun_normal_ (line 246) | def lecun_normal_(tensor): function window_partition (line 249) | def window_partition(x, window_size): function window_reverse (line 263) | def window_reverse(windows, window_size, H, W): class WindowAttention (line 279) | class WindowAttention(nn.Module): method __init__ (line 292) | def __init__(self, dim, window_size, num_heads, qkv_bias=True, qk_scal... method forward (line 326) | def forward(self, x, mask=None): method extra_repr (line 359) | def extra_repr(self): class SwinTransformerBlock (line 364) | class SwinTransformerBlock(nn.Module): method __init__ (line 382) | def __init__(self, dim, input_resolution, num_heads, window_size=7, sh... method forward (line 439) | def forward(self, x): method extra_repr (line 482) | def extra_repr(self): class PatchMerging (line 488) | class PatchMerging(nn.Module): method __init__ (line 496) | def __init__(self, input_resolution, dim, norm_layer=nn.LayerNorm): method forward (line 503) | def forward(self, x): method extra_repr (line 526) | def extra_repr(self): class BasicLayer (line 530) | class BasicLayer(nn.Module): method __init__ (line 549) | def __init__(self, dim, input_resolution, depth, num_heads, window_size, method forward (line 578) | def forward(self, x): method extra_repr (line 594) | def extra_repr(self): class HTSAT_Swin_Transformer (line 599) | class HTSAT_Swin_Transformer(nn.Module): method __init__ (line 624) | def __init__(self, spec_size=256, patch_size=4, patch_stride=(4,4), method _init_weights (line 756) | def _init_weights(self, m): method no_weight_decay (line 766) | def no_weight_decay(self): method no_weight_decay_keywords (line 770) | def no_weight_decay_keywords(self): method forward_features (line 774) | def forward_features(self, x, longer_idx = None): method crop_wav (line 821) | def crop_wav(self, x, crop_size, spe_pos = None): method reshape_wav2img (line 833) | def reshape_wav2img(self, x): method repeat_wat2img (line 851) | def repeat_wat2img(self, x, cur_pos): method forward (line 866) | def forward(self, x: torch.Tensor, mixup_lambda = None, infer_mode = F... function create_htsat_model (line 972) | def create_htsat_model(audio_cfg, enable_fusion=False, fusion_type='None'): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/linear_probe.py class LinearProbe (line 7) | class LinearProbe(nn.Module): method __init__ (line 8) | def __init__(self, model, mlp, freeze, in_ch, out_ch, act=None): method forward (line 45) | def forward(self, x, mix_lambda=None, device=None): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/loss.py function gather_features (line 15) | def gather_features( class ClipLoss (line 93) | class ClipLoss(nn.Module): method __init__ (line 95) | def __init__( method forward (line 120) | def forward(self, audio_features, text_features, logit_scale_a, logit_... function lp_gather_features (line 223) | def lp_gather_features( function get_map (line 246) | def get_map(pred, target): function get_acc (line 251) | def get_acc(pred, target): function get_mauc (line 256) | def get_mauc(pred, target): class LPMetrics (line 262) | class LPMetrics(object): method __init__ (line 263) | def __init__(self, metric_names = ['map','acc','mauc']): method get_metric (line 269) | def get_metric(self,name): method evaluate_mertics (line 279) | def evaluate_mertics(self, pred, target): function calc_celoss (line 286) | def calc_celoss(pred, target): class LPLoss (line 291) | class LPLoss(nn.Module): method __init__ (line 293) | def __init__(self, loss_name): method forward (line 304) | def forward(self, pred, target): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/model.py class MLPLayers (line 27) | class MLPLayers(nn.Module): method __init__ (line 28) | def __init__(self, units=[512, 512, 512], nonlin=nn.ReLU(), dropout=0.1): method forward (line 42) | def forward(self, X): class Bottleneck (line 47) | class Bottleneck(nn.Module): method __init__ (line 50) | def __init__(self, inplanes, planes, stride=1): method forward (line 90) | def forward(self, x: torch.Tensor): class AttentionPool2d (line 106) | class AttentionPool2d(nn.Module): method __init__ (line 107) | def __init__( method forward (line 120) | def forward(self, x): class ModifiedResNet (line 153) | class ModifiedResNet(nn.Module): method __init__ (line 161) | def __init__(self, layers, output_dim, heads, image_size=224, width=64): method _make_layer (line 192) | def _make_layer(self, planes, blocks, stride=1): method init_parameters (line 201) | def init_parameters(self): method lock (line 214) | def lock(self, unlocked_groups=0, freeze_bn_stats=False): method stem (line 223) | def stem(self, x): method forward (line 233) | def forward(self, x): class LayerNorm (line 244) | class LayerNorm(nn.LayerNorm): method forward (line 247) | def forward(self, x: torch.Tensor): class QuickGELU (line 253) | class QuickGELU(nn.Module): method forward (line 255) | def forward(self, x: torch.Tensor): class ResidualAttentionBlock (line 259) | class ResidualAttentionBlock(nn.Module): method __init__ (line 260) | def __init__(self, d_model: int, n_head: int, act_layer: Callable = nn... method attention (line 276) | def attention(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor]... method forward (line 279) | def forward(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor] =... class Transformer (line 285) | class Transformer(nn.Module): method __init__ (line 286) | def __init__( method forward (line 299) | def forward(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor] =... class VisualTransformer (line 305) | class VisualTransformer(nn.Module): method __init__ (line 306) | def __init__( method lock (line 339) | def lock(self, unlocked_groups=0, freeze_bn_stats=False): method forward (line 346) | def forward(self, x: torch.Tensor): class CLAPVisionCfg (line 376) | class CLAPVisionCfg: class CLAPAudioCfp (line 397) | class CLAPAudioCfp: class CLAPTextCfg (line 413) | class CLAPTextCfg: class CLAP (line 422) | class CLAP(nn.Module): method __init__ (line 423) | def __init__( method init_text_branch_parameters (line 552) | def init_text_branch_parameters(self): method build_attention_mask (line 582) | def build_attention_mask(self): method encode_audio (line 590) | def encode_audio(self, audio, device): method encode_text (line 603) | def encode_text(self, text, device): method forward (line 652) | def forward(self, audio, text, device=None): method get_logit_scale (line 697) | def get_logit_scale(self): method get_textual_embedding (line 700) | def get_textual_embedding(self, data): method get_text_embedding (line 718) | def get_text_embedding(self, data): method get_audio_embedding (line 740) | def get_audio_embedding(self, data): method audio_infer (line 767) | def audio_infer(self, audio, hopsize=None, device=None): function convert_weights_to_fp16 (line 822) | def convert_weights_to_fp16(model: nn.Module): function build_model_from_openai_state_dict (line 852) | def build_model_from_openai_state_dict(state_dict: dict, model_cfg, enab... function trace_model (line 897) | def trace_model(model, batch_size=256, device=torch.device("cpu")): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/openai.py function list_openai_models (line 18) | def list_openai_models() -> List[str]: function load_openai_model (line 23) | def load_openai_model( FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/pann_model.py function init_layer (line 17) | def init_layer(layer): function init_bn (line 26) | def init_bn(bn): class ConvBlock (line 32) | class ConvBlock(nn.Module): method __init__ (line 33) | def __init__(self, in_channels, out_channels): method init_weight (line 52) | def init_weight(self): method forward (line 59) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class ConvBlock5x5 (line 78) | class ConvBlock5x5(nn.Module): method __init__ (line 79) | def __init__(self, in_channels, out_channels): method init_weight (line 92) | def init_weight(self): method forward (line 97) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class AttBlock (line 115) | class AttBlock(nn.Module): method __init__ (line 116) | def __init__(self, n_in, n_out, activation='linear', temperature=1.): method init_weights (line 127) | def init_weights(self): method forward (line 132) | def forward(self, x): method nonlinear_transform (line 139) | def nonlinear_transform(self, x): class Cnn14 (line 146) | class Cnn14(nn.Module): method __init__ (line 147) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method init_weight (line 218) | def init_weight(self): method forward (line 223) | def forward(self, input, mixup_lambda=None, device=None): class Cnn6 (line 333) | class Cnn6(nn.Module): method __init__ (line 334) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method init_weight (line 375) | def init_weight(self): method forward (line 380) | def forward(self, input, mixup_lambda=None, device=None): class Cnn10 (line 428) | class Cnn10(nn.Module): method __init__ (line 429) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method init_weight (line 471) | def init_weight(self): method forward (line 476) | def forward(self, input, mixup_lambda=None, device=None): function create_pann_model (line 526) | def create_pann_model(audio_cfg, enable_fusion=False, fusion_type='None'): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/pretrained.py function list_pretrained (line 78) | def list_pretrained(as_str: bool = False): function list_pretrained_tag_models (line 85) | def list_pretrained_tag_models(tag: str): function list_pretrained_model_tags (line 94) | def list_pretrained_model_tags(model: str): function get_pretrained_url (line 102) | def get_pretrained_url(model: str, tag: str): function download_pretrained (line 111) | def download_pretrained(url: str, root: str = os.path.expanduser("~/.cac... FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/timm_model.py class TimmModel (line 20) | class TimmModel(nn.Module): method __init__ (line 25) | def __init__( method lock (line 71) | def lock(self, unlocked_groups=0, freeze_bn_stats=False): method forward (line 103) | def forward(self, x): FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/tokenizer.py function default_bpe (line 17) | def default_bpe(): function bytes_to_unicode (line 22) | def bytes_to_unicode(): function get_pairs (line 44) | def get_pairs(word): function basic_clean (line 56) | def basic_clean(text): function whitespace_clean (line 62) | def whitespace_clean(text): class SimpleTokenizer (line 68) | class SimpleTokenizer(object): method __init__ (line 69) | def __init__(self, bpe_path: str = default_bpe(), special_tokens=None): method bpe (line 94) | def bpe(self, token): method encode (line 135) | def encode(self, text): method decode (line 143) | def decode(self, tokens): function tokenize (line 152) | def tokenize(texts: Union[str, List[str]], context_length: int = 77) -> ... FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/transform.py function _convert_to_rgb (line 5) | def _convert_to_rgb(image): function image_transform (line 9) | def image_transform( FILE: text_to_audio/Make_An_Audio/ldm/modules/encoders/open_clap/utils.py function freeze_batch_norm_2d (line 42) | def freeze_batch_norm_2d(module, module_match={}, name=""): function exist (line 83) | def exist(dataset_name, dataset_type): function get_tar_path_from_dataset_name (line 93) | def get_tar_path_from_dataset_name( function get_tar_path_from_txts (line 134) | def get_tar_path_from_txts(txt_path, islocal, proportion=1): function get_mix_lambda (line 169) | def get_mix_lambda(mixup_alpha, batch_size): function do_mixup (line 176) | def do_mixup(x, mixup_lambda): function interpolate (line 191) | def interpolate(x, ratio): function pad_framewise_output (line 207) | def pad_framewise_output(framewise_output, frames_num): function process_ipc (line 225) | def process_ipc(index_path, classes_num, filename): function save_to_dict (line 239) | def save_to_dict(s, o_={}): function get_data_from_log (line 245) | def get_data_from_log(txt_path): function save_p (line 283) | def save_p(obj, filename): function load_p (line 301) | def load_p(filename): function save_json (line 309) | def save_json(data, name="data.json"): function load_json (line 316) | def load_json(name): function load_class_label (line 328) | def load_class_label(path): function get_optimizer (line 354) | def get_optimizer(params, lr, betas, eps, momentum, optimizer_name): FILE: text_to_audio/Make_An_Audio/ldm/modules/image_degradation/bsrgan.py function modcrop_np (line 29) | def modcrop_np(img, sf): function analytic_kernel (line 49) | def analytic_kernel(k): function anisotropic_Gaussian (line 65) | def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6): function gm_blur_kernel (line 86) | def gm_blur_kernel(mean, cov, size=15): function shift_pixel (line 99) | def shift_pixel(x, sf, upper_left=True): function blur (line 128) | def blur(x, k): function gen_kernel (line 145) | def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]),... function fspecial_gaussian (line 187) | def fspecial_gaussian(hsize, sigma): function fspecial_laplacian (line 201) | def fspecial_laplacian(alpha): function fspecial (line 210) | def fspecial(filter_type, *args, **kwargs): function bicubic_degradation (line 228) | def bicubic_degradation(x, sf=3): function srmd_degradation (line 240) | def srmd_degradation(x, k, sf=3): function dpsr_degradation (line 262) | def dpsr_degradation(x, k, sf=3): function classical_degradation (line 284) | def classical_degradation(x, k, sf=3): function add_sharpening (line 299) | def add_sharpening(img, weight=0.5, radius=50, threshold=10): function add_blur (line 325) | def add_blur(img, sf=4): function add_resize (line 339) | def add_resize(img, sf=4): function add_Gaussian_noise (line 369) | def add_Gaussian_noise(img, noise_level1=2, noise_level2=25): function add_speckle_noise (line 386) | def add_speckle_noise(img, noise_level1=2, noise_level2=25): function add_Poisson_noise (line 404) | def add_Poisson_noise(img): function add_JPEG_noise (line 418) | def add_JPEG_noise(img): function random_crop (line 427) | def random_crop(lq, hq, sf=4, lq_patchsize=64): function degradation_bsrgan (line 438) | def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=None): function degradation_bsrgan_variant (line 530) | def degradation_bsrgan_variant(image, sf=4, isp_model=None): function degradation_bsrgan_plus (line 617) | def degradation_bsrgan_plus(img, sf=4, shuffle_prob=0.5, use_sharp=True,... FILE: text_to_audio/Make_An_Audio/ldm/modules/image_degradation/bsrgan_light.py function modcrop_np (line 29) | def modcrop_np(img, sf): function analytic_kernel (line 49) | def analytic_kernel(k): function anisotropic_Gaussian (line 65) | def anisotropic_Gaussian(ksize=15, theta=np.pi, l1=6, l2=6): function gm_blur_kernel (line 86) | def gm_blur_kernel(mean, cov, size=15): function shift_pixel (line 99) | def shift_pixel(x, sf, upper_left=True): function blur (line 128) | def blur(x, k): function gen_kernel (line 145) | def gen_kernel(k_size=np.array([15, 15]), scale_factor=np.array([4, 4]),... function fspecial_gaussian (line 187) | def fspecial_gaussian(hsize, sigma): function fspecial_laplacian (line 201) | def fspecial_laplacian(alpha): function fspecial (line 210) | def fspecial(filter_type, *args, **kwargs): function bicubic_degradation (line 228) | def bicubic_degradation(x, sf=3): function srmd_degradation (line 240) | def srmd_degradation(x, k, sf=3): function dpsr_degradation (line 262) | def dpsr_degradation(x, k, sf=3): function classical_degradation (line 284) | def classical_degradation(x, k, sf=3): function add_sharpening (line 299) | def add_sharpening(img, weight=0.5, radius=50, threshold=10): function add_blur (line 325) | def add_blur(img, sf=4): function add_resize (line 343) | def add_resize(img, sf=4): function add_Gaussian_noise (line 373) | def add_Gaussian_noise(img, noise_level1=2, noise_level2=25): function add_speckle_noise (line 390) | def add_speckle_noise(img, noise_level1=2, noise_level2=25): function add_Poisson_noise (line 408) | def add_Poisson_noise(img): function add_JPEG_noise (line 422) | def add_JPEG_noise(img): function random_crop (line 431) | def random_crop(lq, hq, sf=4, lq_patchsize=64): function degradation_bsrgan (line 442) | def degradation_bsrgan(img, sf=4, lq_patchsize=72, isp_model=None): function degradation_bsrgan_variant (line 534) | def degradation_bsrgan_variant(image, sf=4, isp_model=None): FILE: text_to_audio/Make_An_Audio/ldm/modules/image_degradation/utils_image.py function is_image_file (line 29) | def is_image_file(filename): function get_timestamp (line 33) | def get_timestamp(): function imshow (line 37) | def imshow(x, title=None, cbar=False, figsize=None): function surf (line 47) | def surf(Z, cmap='rainbow', figsize=None): function get_image_paths (line 67) | def get_image_paths(dataroot): function _get_paths_from_images (line 74) | def _get_paths_from_images(path): function patches_from_image (line 93) | def patches_from_image(img, p_size=512, p_overlap=64, p_max=800): function imssave (line 112) | def imssave(imgs, img_path): function split_imageset (line 125) | def split_imageset(original_dataroot, taget_dataroot, n_channels=3, p_si... function mkdir (line 153) | def mkdir(path): function mkdirs (line 158) | def mkdirs(paths): function mkdir_and_rename (line 166) | def mkdir_and_rename(path): function imread_uint (line 185) | def imread_uint(path, n_channels=3): function imsave (line 203) | def imsave(img, img_path): function imwrite (line 209) | def imwrite(img, img_path): function read_img (line 220) | def read_img(path): function uint2single (line 249) | def uint2single(img): function single2uint (line 254) | def single2uint(img): function uint162single (line 259) | def uint162single(img): function single2uint16 (line 264) | def single2uint16(img): function uint2tensor4 (line 275) | def uint2tensor4(img): function uint2tensor3 (line 282) | def uint2tensor3(img): function tensor2uint (line 289) | def tensor2uint(img): function single2tensor3 (line 302) | def single2tensor3(img): function single2tensor4 (line 307) | def single2tensor4(img): function tensor2single (line 312) | def tensor2single(img): function tensor2single3 (line 320) | def tensor2single3(img): function single2tensor5 (line 329) | def single2tensor5(img): function single32tensor5 (line 333) | def single32tensor5(img): function single42tensor4 (line 337) | def single42tensor4(img): function tensor2img (line 342) | def tensor2img(tensor, out_type=np.uint8, min_max=(0, 1)): function augment_img (line 380) | def augment_img(img, mode=0): function augment_img_tensor4 (line 401) | def augment_img_tensor4(img, mode=0): function augment_img_tensor (line 422) | def augment_img_tensor(img, mode=0): function augment_img_np3 (line 441) | def augment_img_np3(img, mode=0): function augment_imgs (line 469) | def augment_imgs(img_list, hflip=True, rot=True): function modcrop (line 494) | def modcrop(img_in, scale): function shave (line 510) | def shave(img_in, border=0): function rgb2ycbcr (line 529) | def rgb2ycbcr(img, only_y=True): function ycbcr2rgb (line 553) | def ycbcr2rgb(img): function bgr2ycbcr (line 573) | def bgr2ycbcr(img, only_y=True): function channel_convert (line 597) | def channel_convert(in_c, tar_type, img_list): function calculate_psnr (line 621) | def calculate_psnr(img1, img2, border=0): function calculate_ssim (line 642) | def calculate_ssim(img1, img2, border=0): function ssim (line 669) | def ssim(img1, img2): function cubic (line 700) | def cubic(x): function calculate_weights_indices (line 708) | def calculate_weights_indices(in_length, out_length, scale, kernel, kern... function imresize (line 766) | def imresize(img, scale, antialiasing=True): function imresize_np (line 839) | def imresize_np(img, scale, antialiasing=True): FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/contperceptual.py class LPAPSWithDiscriminator (line 10) | class LPAPSWithDiscriminator(nn.Module): method __init__ (line 11) | def __init__(self, disc_start, logvar_init=0.0, kl_weight=1.0, pixello... method calculate_adaptive_weight (line 42) | def calculate_adaptive_weight(self, nll_loss, g_loss, last_layer=None): method forward (line 55) | def forward(self, inputs, reconstructions, posteriors, optimizer_idx, FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/contperceptual_dis.py class LPAPSWithDiscriminator (line 10) | class LPAPSWithDiscriminator(nn.Module):# 相比于contperceptual.py添加了MultiWi... method __init__ (line 11) | def __init__(self, disc_start, logvar_init=0.0, kl_weight=1.0, pixello... method calculate_adaptive_weight (line 47) | def calculate_adaptive_weight(self, nll_loss, g_loss, last_layer=None): method forward (line 60) | def forward(self, inputs, reconstructions, posteriors, optimizer_idx, FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/lpaps.py class LPAPS (line 17) | class LPAPS(nn.Module): method __init__ (line 19) | def __init__(self, use_dropout=True): method load_from_pretrained (line 33) | def load_from_pretrained(self, name="vggishish_lpaps"): method from_pretrained (line 39) | def from_pretrained(cls, name="vggishish_lpaps"): method forward (line 47) | def forward(self, input, target): class ScalingLayer (line 62) | class ScalingLayer(nn.Module): method __init__ (line 63) | def __init__(self): method forward (line 76) | def forward(self, inp): class NetLinLayer (line 80) | class NetLinLayer(nn.Module): method __init__ (line 82) | def __init__(self, chn_in, chn_out=1, use_dropout=False): class vggishish16 (line 88) | class vggishish16(torch.nn.Module): method __init__ (line 89) | def __init__(self, requires_grad=False, pretrained=True): method forward (line 112) | def forward(self, X): method vggishish16 (line 127) | def vggishish16(self, pretrained: bool = True) -> VGGishish: function normalize_tensor (line 138) | def normalize_tensor(x, eps=1e-10): function spatial_average (line 142) | def spatial_average(x, keepdim=True): FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/vggishish/dataset.py class VGGSound (line 16) | class VGGSound(torch.utils.data.Dataset): method __init__ (line 18) | def __init__(self, split, specs_dir, transforms=None, splits_path='./d... method __getitem__ (line 47) | def __getitem__(self, idx): method __len__ (line 66) | def __len__(self): method make_split_files (line 69) | def make_split_files(self): FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/vggishish/logger.py class LoggerWithTBoard (line 11) | class LoggerWithTBoard(SummaryWriter): method __init__ (line 13) | def __init__(self, cfg): method log_param_num (line 45) | def log_param_num(self, model): method log_iter_loss (line 51) | def log_iter_loss(self, loss, iter, phase): method log_epoch_loss (line 54) | def log_epoch_loss(self, loss, epoch, phase): method log_epoch_metrics (line 58) | def log_epoch_metrics(self, metrics_dict, epoch, phase): method log_test_metrics (line 64) | def log_test_metrics(self, metrics_dict, hparams_dict, best_epoch): method log_best_model (line 76) | def log_best_model(self, model, loss, epoch, optimizer, metrics_dict): FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/vggishish/loss.py class WeightedCrossEntropy (line 6) | class WeightedCrossEntropy(nn.CrossEntropyLoss): method __init__ (line 8) | def __init__(self, weights, **pytorch_ce_loss_args) -> None: method __call__ (line 12) | def __call__(self, outputs, targets, to_weight=True): FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/vggishish/metrics.py function metrics (line 10) | def metrics(targets, outputs, topk=(1, 5)): FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/vggishish/model.py class VGGishish (line 5) | class VGGishish(nn.Module): method __init__ (line 7) | def __init__(self, conv_layers, use_bn, num_classes): method forward (line 43) | def forward(self, x): method reset_parameters (line 56) | def reset_parameters(self): FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/vggishish/train_melception.py class Melception (line 20) | class Melception(Inception3): method __init__ (line 22) | def __init__(self, num_classes, **kwargs): method forward (line 32) | def forward(self, x): function train_inception_scorer (line 36) | def train_inception_scorer(cfg): FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/vggishish/transforms.py class StandardNormalizeAudio (line 13) | class StandardNormalizeAudio(object): method __init__ (line 17) | def __init__(self, specs_dir, train_ids_path='./data/vggsound_train.tx... method __call__ (line 25) | def __call__(self, item): method calculate_or_load_stats (line 42) | def calculate_or_load_stats(self): class ToTensor (line 67) | class ToTensor(object): method __call__ (line 69) | def __call__(self, item): class Crop (line 75) | class Crop(object): method __init__ (line 77) | def __init__(self, cropped_shape=None, random_crop=False): method __call__ (line 89) | def __call__(self, item): FILE: text_to_audio/Make_An_Audio/ldm/modules/losses_audio/vqperceptual.py class DummyLoss (line 15) | class DummyLoss(nn.Module): method __init__ (line 16) | def __init__(self): class VQLPAPSWithDiscriminator (line 19) | class VQLPAPSWithDiscriminator(nn.Module): method __init__ (line 20) | def __init__(self, disc_start, codebook_weight=1.0, pixelloss_weight=1.0, method calculate_adaptive_weight (line 54) | def calculate_adaptive_weight(self, nll_loss, g_loss, last_layer=None): method forward (line 67) | def forward(self, codebook_loss, inputs, reconstructions, optimizer_idx, FILE: text_to_audio/Make_An_Audio/ldm/modules/x_transformer.py class AbsolutePositionalEmbedding (line 25) | class AbsolutePositionalEmbedding(nn.Module): method __init__ (line 26) | def __init__(self, dim, max_seq_len): method init_ (line 31) | def init_(self): method forward (line 34) | def forward(self, x): class FixedPositionalEmbedding (line 39) | class FixedPositionalEmbedding(nn.Module): method __init__ (line 40) | def __init__(self, dim): method forward (line 45) | def forward(self, x, seq_dim=1, offset=0): function exists (line 54) | def exists(val): function default (line 58) | def default(val, d): function always (line 64) | def always(val): function not_equals (line 70) | def not_equals(val): function equals (line 76) | def equals(val): function max_neg_value (line 82) | def max_neg_value(tensor): function pick_and_pop (line 88) | def pick_and_pop(keys, d): function group_dict_by_key (line 93) | def group_dict_by_key(cond, d): function string_begins_with (line 102) | def string_begins_with(prefix, str): function group_by_key_prefix (line 106) | def group_by_key_prefix(prefix, d): function groupby_prefix_and_trim (line 110) | def groupby_prefix_and_trim(prefix, d): class Scale (line 117) | class Scale(nn.Module): method __init__ (line 118) | def __init__(self, value, fn): method forward (line 123) | def forward(self, x, **kwargs): class Rezero (line 128) | class Rezero(nn.Module): method __init__ (line 129) | def __init__(self, fn): method forward (line 134) | def forward(self, x, **kwargs): class ScaleNorm (line 139) | class ScaleNorm(nn.Module): method __init__ (line 140) | def __init__(self, dim, eps=1e-5): method forward (line 146) | def forward(self, x): class RMSNorm (line 151) | class RMSNorm(nn.Module): method __init__ (line 152) | def __init__(self, dim, eps=1e-8): method forward (line 158) | def forward(self, x): class Residual (line 163) | class Residual(nn.Module): method forward (line 164) | def forward(self, x, residual): class GRUGating (line 168) | class GRUGating(nn.Module): method __init__ (line 169) | def __init__(self, dim): method forward (line 173) | def forward(self, x, residual): class GEGLU (line 184) | class GEGLU(nn.Module): method __init__ (line 185) | def __init__(self, dim_in, dim_out): method forward (line 189) | def forward(self, x): class FeedForward (line 194) | class FeedForward(nn.Module): method __init__ (line 195) | def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.): method forward (line 210) | def forward(self, x): class Attention (line 215) | class Attention(nn.Module): method __init__ (line 216) | def __init__( method forward (line 268) | def forward( class AttentionLayers (line 370) | class AttentionLayers(nn.Module): method __init__ (line 371) | def __init__( method forward (line 481) | def forward( class Encoder (line 541) | class Encoder(AttentionLayers): method __init__ (line 542) | def __init__(self, **kwargs): class TransformerWrapper (line 548) | class TransformerWrapper(nn.Module): method __init__ (line 549) | def __init__( method init_ (line 595) | def init_(self): method forward (line 598) | def forward( FILE: text_to_audio/Make_An_Audio/ldm/util.py function download (line 31) | def download(url, local_path, chunk_size=1024): function md5_hash (line 43) | def md5_hash(path): function log_txt_as_img (line 50) | def log_txt_as_img(wh, xc, size=10): function ismap (line 74) | def ismap(x): function isimage (line 80) | def isimage(x): function exists (line 86) | def exists(x): function default (line 90) | def default(val, d): function mean_flat (line 96) | def mean_flat(tensor): function count_params (line 104) | def count_params(model, verbose=False): function instantiate_from_config (line 111) | def instantiate_from_config(config,reload=False): function get_obj_from_str (line 121) | def get_obj_from_str(string, reload=False): function get_ckpt_path (line 128) | def get_ckpt_path(name, root, check=False): FILE: text_to_audio/Make_An_Audio/vocoder/bigvgan/activations.py class Snake (line 9) | class Snake(nn.Module): method __init__ (line 25) | def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha... method forward (line 48) | def forward(self, x): class SnakeBeta (line 62) | class SnakeBeta(nn.Module): method __init__ (line 79) | def __init__(self, in_features, alpha=1.0, alpha_trainable=True, alpha... method forward (line 107) | def forward(self, x): FILE: text_to_audio/Make_An_Audio/vocoder/bigvgan/alias_free_torch/act.py class Activation1d (line 8) | class Activation1d(nn.Module): method __init__ (line 9) | def __init__(self, method forward (line 23) | def forward(self, x): FILE: text_to_audio/Make_An_Audio/vocoder/bigvgan/alias_free_torch/filter.py function sinc (line 15) | def sinc(x: torch.Tensor): function kaiser_sinc_filter1d (line 28) | def kaiser_sinc_filter1d(cutoff, half_width, kernel_size): # return filt... class LowPassFilter1d (line 60) | class LowPassFilter1d(nn.Module): method __init__ (line 61) | def __init__(self, method forward (line 86) | def forward(self, x): FILE: text_to_audio/Make_An_Audio/vocoder/bigvgan/alias_free_torch/resample.py class UpSample1d (line 10) | class UpSample1d(nn.Module): method __init__ (line 11) | def __init__(self, ratio=2, kernel_size=None): method forward (line 25) | def forward(self, x): class DownSample1d (line 36) | class DownSample1d(nn.Module): method __init__ (line 37) | def __init__(self, ratio=2, kernel_size=None): method forward (line 46) | def forward(self, x): FILE: text_to_audio/Make_An_Audio/vocoder/bigvgan/models.py function init_weights (line 21) | def init_weights(m, mean=0.0, std=0.01): function get_padding (line 27) | def get_padding(kernel_size, dilation=1): class AMPBlock1 (line 30) | class AMPBlock1(torch.nn.Module): method __init__ (line 31) | def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5), act... method forward (line 72) | def forward(self, x): method remove_weight_norm (line 83) | def remove_weight_norm(self): class AMPBlock2 (line 90) | class AMPBlock2(torch.nn.Module): method __init__ (line 91) | def __init__(self, h, channels, kernel_size=3, dilation=(1, 3), activa... method forward (line 120) | def forward(self, x): method remove_weight_norm (line 128) | def remove_weight_norm(self): class BigVGAN (line 133) | class BigVGAN(torch.nn.Module): method __init__ (line 135) | def __init__(self, h): method forward (line 181) | def forward(self, x): method remove_weight_norm (line 205) | def remove_weight_norm(self): class DiscriminatorP (line 216) | class DiscriminatorP(torch.nn.Module): method __init__ (line 217) | def __init__(self, h, period, kernel_size=5, stride=3, use_spectral_no... method forward (line 231) | def forward(self, x): class MultiPeriodDiscriminator (line 253) | class MultiPeriodDiscriminator(torch.nn.Module): method __init__ (line 254) | def __init__(self, h): method forward (line 261) | def forward(self, y, y_hat): class DiscriminatorR (line 277) | class DiscriminatorR(nn.Module): method __init__ (line 278) | def __init__(self, cfg, resolution): method forward (line 304) | def forward(self, x): method spectrogram (line 319) | def spectrogram(self, x): class MultiResolutionDiscriminator (line 330) | class MultiResolutionDiscriminator(nn.Module): method __init__ (line 331) | def __init__(self, cfg, debug=False): method forward (line 341) | def forward(self, y, y_hat): function feature_loss (line 358) | def feature_loss(fmap_r, fmap_g): function discriminator_loss (line 367) | def discriminator_loss(disc_real_outputs, disc_generated_outputs): function generator_loss (line 381) | def generator_loss(disc_outputs): class VocoderBigVGAN (line 393) | class VocoderBigVGAN(object): method __init__ (line 394) | def __init__(self, ckpt_vocoder,device='cuda'): method vocode (line 406) | def vocode(self, spec): method __call__ (line 413) | def __call__(self, wav): FILE: text_to_audio/Make_An_Audio/vocoder/hifigan/modules.py function get_padding (line 13) | def get_padding(kernel_size, dilation=1): function init_weights (line 16) | def init_weights(m, mean=0.0, std=0.01): class ResBlock1 (line 22) | class ResBlock1(torch.nn.Module): method __init__ (line 23) | def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)): method forward (line 46) | def forward(self, x): method remove_weight_norm (line 55) | def remove_weight_norm(self): class ResBlock2 (line 62) | class ResBlock2(torch.nn.Module): method __init__ (line 63) | def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)): method forward (line 74) | def forward(self, x): method remove_weight_norm (line 81) | def remove_weight_norm(self): class Generator (line 86) | class Generator(torch.nn.Module): method __init__ (line 87) | def __init__(self, h): method forward (line 111) | def forward(self, x): method remove_weight_norm (line 129) | def remove_weight_norm(self): class DiscriminatorP (line 139) | class DiscriminatorP(torch.nn.Module): method __init__ (line 140) | def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=... method forward (line 153) | def forward(self, x): class MultiPeriodDiscriminator (line 175) | class MultiPeriodDiscriminator(torch.nn.Module): method __init__ (line 176) | def __init__(self): method forward (line 186) | def forward(self, y, y_hat): class DiscriminatorS (line 202) | class DiscriminatorS(torch.nn.Module): method __init__ (line 203) | def __init__(self, use_spectral_norm=False): method forward (line 217) | def forward(self, x): class MultiScaleDiscriminator (line 230) | class MultiScaleDiscriminator(torch.nn.Module): method __init__ (line 231) | def __init__(self): method forward (line 243) | def forward(self, y, y_hat): function feature_loss (line 262) | def feature_loss(fmap_r, fmap_g): function discriminator_loss (line 271) | def discriminator_loss(disc_real_outputs, disc_generated_outputs): function generator_loss (line 285) | def generator_loss(disc_outputs): class VocoderHifigan (line 296) | class VocoderHifigan(object): method __init__ (line 297) | def __init__(self, ckpt_vocoder,device='cuda'): method vocode (line 312) | def vocode(self, spec, global_step=None): class VocoderHifigan_noload (line 319) | class VocoderHifigan_noload(object): method __init__ (line 320) | def __init__(self, vocoder_args,device='cuda'): method vocode (line 327) | def vocode(self, spec, global_step=None): FILE: text_to_audio/Make_An_Audio/wav_evaluation/models/CLAPWrapper.py class CLAPWrapper (line 19) | class CLAPWrapper(): method __init__ (line 24) | def __init__(self, model_fp,config_path, use_cuda=False): method load_clap (line 36) | def load_clap(self): method default_collate (line 73) | def default_collate(self, batch): method resample_and_duration (line 119) | def resample_and_duration(self,wav_sr,audio_duration,resample=False): method load_audio_into_tensor (line 145) | def load_audio_into_tensor(self, audio_path, audio_duration, resample=... method preprocess_audio (line 151) | def preprocess_audio(self, audio_files, resample): method preprocess_text (line 166) | def preprocess_text(self, text_queries): method get_text_embeddings (line 177) | def get_text_embeddings(self, class_labels): method get_audio_embeddings (line 184) | def get_audio_embeddings(self, audio_files, resample): method _get_text_embeddings (line 191) | def _get_text_embeddings(self, preprocessed_text): method _get_audio_embeddings (line 198) | def _get_audio_embeddings(self, preprocessed_audio): method compute_similarity (line 208) | def compute_similarity(self, audio_embeddings, text_embeddings,use_log... method cal_clap_score (line 217) | def cal_clap_score(self,txt,audio_path): method _generic_batch_inference (line 223) | def _generic_batch_inference(self, func, *args): method get_audio_embeddings_per_batch (line 246) | def get_audio_embeddings_per_batch(self, audio_files, batch_size): method get_text_embeddings_per_batch (line 250) | def get_text_embeddings_per_batch(self, class_labels, batch_size): method classify_audio_files_per_batch (line 254) | def classify_audio_files_per_batch(self, audio_files, class_labels, ba... FILE: text_to_audio/Make_An_Audio/wav_evaluation/models/audio.py function get_audio_encoder (line 6) | def get_audio_encoder(name: str): class ConvBlock (line 13) | class ConvBlock(nn.Module): method __init__ (line 14) | def __init__(self, in_channels, out_channels): method forward (line 32) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class ConvBlock5x5 (line 51) | class ConvBlock5x5(nn.Module): method __init__ (line 52) | def __init__(self, in_channels, out_channels): method forward (line 64) | def forward(self, input, pool_size=(2, 2), pool_type='avg'): class AttBlock (line 82) | class AttBlock(nn.Module): method __init__ (line 83) | def __init__(self, n_in, n_out, activation='linear', temperature=1.): method forward (line 93) | def forward(self, x): method nonlinear_transform (line 100) | def nonlinear_transform(self, x): class Cnn14 (line 107) | class Cnn14(nn.Module): method __init__ (line 108) | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, method forward (line 143) | def forward(self, input, mixup_lambda=None): FILE: text_to_audio/Make_An_Audio/wav_evaluation/models/clap.py class Projection (line 8) | class Projection(nn.Module): method __init__ (line 9) | def __init__(self, d_in: int, d_out: int, p: float=0.5) -> None: method forward (line 16) | def forward(self, x: torch.Tensor) -> torch.Tensor: class AudioEncoder (line 22) | class AudioEncoder(nn.Module): method __init__ (line 23) | def __init__(self, audioenc_name:str, d_in: int, d_out: int, sample_ra... method forward (line 36) | def forward(self, x): class TextEncoder (line 42) | class TextEncoder(nn.Module): method __init__ (line 43) | def __init__(self, d_out: int, text_model: str, transformer_embed_dim:... method forward (line 49) | def forward(self, x): class CLAP (line 55) | class CLAP(nn.Module): method __init__ (line 56) | def __init__(self, method forward (line 86) | def forward(self, audio, text): FILE: text_to_audio/Make_An_Audio/wav_evaluation/models/utils.py function read_config_as_args (line 5) | def read_config_as_args(config_path,args=None,is_config_str=False):