SYMBOL INDEX (2719 symbols across 329 files) FILE: KoSentenceT5/apex/RNN/RNNBackend.py function is_iterable (line 10) | def is_iterable(maybe_iterable): function flatten_list (line 14) | def flatten_list(tens_list): class bidirectionalRNN (line 25) | class bidirectionalRNN(nn.Module): method __init__ (line 29) | def __init__(self, inputRNN, num_layers=1, dropout = 0): method forward (line 37) | def forward(self, input, collect_hidden=False): method reset_parameters (line 52) | def reset_parameters(self): method init_hidden (line 59) | def init_hidden(self, bsz): method detach_hidden (line 66) | def detach_hidden(self): method reset_hidden (line 73) | def reset_hidden(self, bsz): method init_inference (line 80) | def init_inference(self, bsz): class stackedRNN (line 90) | class stackedRNN(nn.Module): method __init__ (line 94) | def __init__(self, inputRNN, num_layers=1, dropout=0): method forward (line 122) | def forward(self, input, collect_hidden=False, reverse=False): method reset_parameters (line 197) | def reset_parameters(self): method init_hidden (line 204) | def init_hidden(self, bsz): method detach_hidden (line 211) | def detach_hidden(self): method reset_hidden (line 218) | def reset_hidden(self, bsz): method init_inference (line 225) | def init_inference(self, bsz): class RNNCell (line 232) | class RNNCell(nn.Module): method __init__ (line 242) | def __init__(self, gate_multiplier, input_size, hidden_size, cell, n_h... method new_like (line 274) | def new_like(self, new_input_size=None): method reset_parameters (line 291) | def reset_parameters(self, gain=1): method init_hidden (line 309) | def init_hidden(self, bsz): method reset_hidden (line 330) | def reset_hidden(self, bsz): method detach_hidden (line 338) | def detach_hidden(self): method forward (line 348) | def forward(self, input): FILE: KoSentenceT5/apex/RNN/cells.py class mLSTMRNNCell (line 12) | class mLSTMRNNCell(RNNCell): method __init__ (line 17) | def __init__(self, input_size, hidden_size, bias = False, output_size ... method forward (line 26) | def forward(self, input): method new_like (line 45) | def new_like(self, new_input_size=None): function mLSTMCell (line 55) | def mLSTMCell(input, hidden, w_ih, w_hh, w_mih, w_mhh, b_ih=None, b_hh=N... FILE: KoSentenceT5/apex/RNN/models.py function toRNNBackend (line 8) | def toRNNBackend(inputRNN, num_layers, bidirectional=False, dropout = 0): function LSTM (line 19) | def LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=Fal... function GRU (line 26) | def GRU(input_size, hidden_size, num_layers, bias=True, batch_first=Fals... function ReLU (line 33) | def ReLU(input_size, hidden_size, num_layers, bias=True, batch_first=Fal... function Tanh (line 40) | def Tanh(input_size, hidden_size, num_layers, bias=True, batch_first=Fal... function mLSTM (line 47) | def mLSTM(input_size, hidden_size, num_layers, bias=True, batch_first=Fa... FILE: KoSentenceT5/apex/amp/_amp_state.py class AmpState (line 18) | class AmpState(object): method __init__ (line 19) | def __init__(self): function warn_or_err (line 29) | def warn_or_err(msg): function maybe_print (line 39) | def maybe_print(msg, rank0=False): function master_params (line 60) | def master_params(optimizer): FILE: KoSentenceT5/apex/amp/_initialize.py function to_type (line 21) | def to_type(dtype, t): function applier (line 39) | def applier(value, fn): function check_models (line 64) | def check_models(models): function check_params_fp32 (line 79) | def check_params_fp32(models): function check_optimizers (line 119) | def check_optimizers(optimizers): class O2StateDictHook (line 133) | class O2StateDictHook(object): method __init__ (line 134) | def __init__(self, fn): method __call__ (line 137) | def __call__(self, module, state_dict, prefix, local_metadata): function _initialize (line 145) | def _initialize(models, optimizers, properties, num_losses=1, cast_model... FILE: KoSentenceT5/apex/amp/_process_optimizer.py class AmpOptimizerState (line 9) | class AmpOptimizerState(object): method __init__ (line 10) | def __init__(self): function _master_params_to_model_params (line 14) | def _master_params_to_model_params(self): function lazy_init_with_master_weights (line 28) | def lazy_init_with_master_weights(self): function post_backward_models_are_masters (line 93) | def post_backward_models_are_masters(scaler, params, stashed_grads, scal... function prepare_backward_with_master_weights (line 142) | def prepare_backward_with_master_weights(self): function post_backward_with_master_weights (line 161) | def post_backward_with_master_weights(self, scaler): function lazy_init_no_master_weights (line 205) | def lazy_init_no_master_weights(self): function prepare_backward_no_master_weights (line 224) | def prepare_backward_no_master_weights(self): function post_backward_no_master_weights (line 240) | def post_backward_no_master_weights(self, scaler): function prepare_backward_with_master_weights_FusedSGD (line 258) | def prepare_backward_with_master_weights_FusedSGD(self): function post_backward_with_master_weights_FusedSGD (line 277) | def post_backward_with_master_weights_FusedSGD(self, scaler): function prepare_backward_no_master_weights_FusedSGD (line 305) | def prepare_backward_no_master_weights_FusedSGD(self): function post_backward_no_master_weights_FusedSGD (line 309) | def post_backward_no_master_weights_FusedSGD(self, scaler): function _amp_lazy_init (line 313) | def _amp_lazy_init(self): function _process_optimizer (line 321) | def _process_optimizer(optimizer, properties): FILE: KoSentenceT5/apex/amp/amp.py function _decorator_helper (line 18) | def _decorator_helper(orig_fn, cast_fn, wrap_fn): function half_function (line 30) | def half_function(fn): function float_function (line 35) | def float_function(fn): function promote_function (line 40) | def promote_function(fn): function register_half_function (line 46) | def register_half_function(module, name): function register_float_function (line 53) | def register_float_function(module, name): function register_promote_function (line 60) | def register_promote_function(module, name): function init (line 68) | def init(enabled=True, loss_scale="dynamic", enable_caching=True, verbos... FILE: KoSentenceT5/apex/amp/compat.py function variable_is_tensor (line 4) | def variable_is_tensor(): function tensor_is_variable (line 8) | def tensor_is_variable(): function tensor_is_float_tensor (line 13) | def tensor_is_float_tensor(): function is_tensor_like (line 19) | def is_tensor_like(x): function is_floating_point (line 24) | def is_floating_point(x): function scalar_python_val (line 35) | def scalar_python_val(x): function filter_attrs (line 45) | def filter_attrs(module, attrs): FILE: KoSentenceT5/apex/amp/frontend.py class Properties (line 7) | class Properties(object): method __init__ (line 13) | def __init__(self): method _update_options_dict (line 33) | def _update_options_dict(self, new_options): method __getattr__ (line 43) | def __getattr__(self, name): method __setattr__ (line 51) | def __setattr__(self, name, value): class O3 (line 102) | class O3: method __call__ (line 111) | def __call__(self, properties): class O2 (line 124) | class O2: method __call__ (line 134) | def __call__(self, properties): class O1 (line 147) | class O1: method __call__ (line 156) | def __call__(self, properties): class O0 (line 169) | class O0: method __call__ (line 175) | def __call__(self, properties): function initialize (line 195) | def initialize( function state_dict (line 361) | def state_dict(destination=None): function load_state_dict (line 373) | def load_state_dict(state_dict): FILE: KoSentenceT5/apex/amp/handle.py function scale_loss (line 17) | def scale_loss(loss, function disable_casts (line 164) | def disable_casts(): class AmpHandle (line 170) | class AmpHandle(object): method __init__ (line 171) | def __init__(self, loss_scale="dynamic", enable_caching=True, verbose=... method is_active (line 179) | def is_active(self): method _disable_casts (line 183) | def _disable_casts(self): method wrap_optimizer (line 188) | def wrap_optimizer(self, optimizer, num_loss=1): method scale_loss (line 193) | def scale_loss(self, loss, optimizer): method _clear_cache (line 226) | def _clear_cache(self): method _save_func (line 230) | def _save_func(self, mod, fn, func): method _deactivate (line 233) | def _deactivate(self): method has_cache (line 239) | def has_cache(self): method cache (line 243) | def cache(self): method remove_cache (line 246) | def remove_cache(self, param): method verbose (line 251) | def verbose(self): class NoOpHandle (line 254) | class NoOpHandle(object): method is_active (line 255) | def is_active(self): method _disable_casts (line 259) | def _disable_casts(self): method wrap_optimizer (line 262) | def wrap_optimizer(self, optimizer, num_loss=1): method scale_loss (line 266) | def scale_loss(self, loss, optimizer): method has_cache (line 270) | def has_cache(self): method verbose (line 274) | def verbose(self): method _clear_cache (line 277) | def _clear_cache(self): method _deactivate (line 280) | def _deactivate(self): FILE: KoSentenceT5/apex/amp/opt.py class OptimWrapper (line 9) | class OptimWrapper(object): method __init__ (line 10) | def __init__(self, optimizer, amp_handle, num_loss): method scale_loss (line 19) | def scale_loss(self, loss): method _cur_loss_scaler (line 55) | def _cur_loss_scaler(self): method step (line 59) | def step(self, closure=None): method __getattr__ (line 80) | def __getattr__(self, attr): method __getstate__ (line 84) | def __getstate__(self): method __setstate__ (line 87) | def __setstate__(self): method __repr__ (line 90) | def __repr__(self): method state_dict (line 93) | def state_dict(self): method load_state_dict (line 96) | def load_state_dict(self, state_dict): method zero_grad (line 99) | def zero_grad(self): method add_param_group (line 102) | def add_param_group(self, param_group): FILE: KoSentenceT5/apex/amp/rnn_compat.py function _gen_VF_wrapper (line 7) | def _gen_VF_wrapper(name): class VariableFunctionsShim (line 17) | class VariableFunctionsShim(object): method __init__ (line 18) | def __init__(self): function has_old_rnns (line 24) | def has_old_rnns(): function whitelist_rnn_cells (line 31) | def whitelist_rnn_cells(handle, verbose): FILE: KoSentenceT5/apex/amp/scaler.py function scale_check_overflow_python (line 6) | def scale_check_overflow_python(model_grad, master_grad, scale, check_ov... function axpby_check_overflow_python (line 19) | def axpby_check_overflow_python(model_grad, stashed_grad, master_grad, a... class LossScaler (line 33) | class LossScaler(object): method __init__ (line 38) | def __init__(self, method loss_scale (line 73) | def loss_scale(self): method unscale_python (line 76) | def unscale_python(self, model_grads, master_grads, scale): method unscale (line 94) | def unscale(self, model_grads, master_grads, unused_scale, models_are_... method unscale_with_stashed_python (line 126) | def unscale_with_stashed_python(self, method unscale_with_stashed (line 152) | def unscale_with_stashed(self, method clear_overflow_state (line 191) | def clear_overflow_state(self): method update_scale (line 197) | def update_scale(self): FILE: KoSentenceT5/apex/amp/utils.py function is_cuda_enabled (line 8) | def is_cuda_enabled(): function get_cuda_version (line 11) | def get_cuda_version(): function is_fp_tensor (line 14) | def is_fp_tensor(x): function is_nested (line 23) | def is_nested(x): function should_cache (line 26) | def should_cache(x): function collect_fp_tensor_types (line 36) | def collect_fp_tensor_types(args, kwargs): function type_string (line 51) | def type_string(x): function maybe_half (line 54) | def maybe_half(x, name='', verbose=False): function maybe_float (line 65) | def maybe_float(x, name='', verbose=False): function casted_args (line 77) | def casted_args(cast_fn, args, kwargs): function cached_cast (line 90) | def cached_cast(cast_fn, x, cache): function verbosify (line 124) | def verbosify(cast_fn, fn_name, verbose): function as_inplace (line 130) | def as_inplace(fns): function has_func (line 134) | def has_func(mod, fn): function get_func (line 140) | def get_func(mod, fn): function set_func (line 146) | def set_func(mod, fn, new_fn): function set_func_save (line 152) | def set_func_save(handle, mod, fn, new_fn): function synthesize_flattened_rnn_weights (line 171) | def synthesize_flattened_rnn_weights(fp32_weights, function new_synthesize_flattened_rnn_weights (line 194) | def new_synthesize_flattened_rnn_weights(fp32_weights, FILE: KoSentenceT5/apex/amp/wrap.py function make_cast_wrapper (line 10) | def make_cast_wrapper(orig_fn, cast_fn, handle, function cached_cast (line 31) | def cached_cast(mod, fn, cast_fn, handle, function make_promote_wrapper (line 44) | def make_promote_wrapper(orig_fn, cast_fn, handle=None): function promote (line 65) | def promote(mod, fn, handle, verbose=False): function sequence_promote (line 71) | def sequence_promote(mod, fn, handle, verbose=False): function promote_match_arg0 (line 92) | def promote_match_arg0(mod, fn, handle, verbose=False): function err_if_any_half (line 114) | def err_if_any_half(mod, fn, handle, custom_err_msg=None): function err_if_arg0_half (line 132) | def err_if_arg0_half(mod, fn, handle, verbose=False): function rnn_cast (line 157) | def rnn_cast(backend, fn, handle, verbose=False): function new_rnn_cast (line 222) | def new_rnn_cast(fn, handle, verbose=False): function disable_casts (line 267) | def disable_casts(mod, fn, handle): FILE: KoSentenceT5/apex/contrib/bottleneck/bottleneck.py function kaiming_uniform_ (line 5) | def kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_rel... class FrozenBatchNorm2d (line 9) | class FrozenBatchNorm2d(torch.nn.Module): method __init__ (line 13) | def __init__(self, n): method get_scale_bias (line 20) | def get_scale_bias(self, nhwc=False): method forward (line 31) | def forward(self, x): function drelu_dscale1 (line 37) | def drelu_dscale1(grad_o, output, scale1): function drelu_dscale2 (line 44) | def drelu_dscale2(grad_o, output, scale1, scale2): class BottleneckFunction (line 51) | class BottleneckFunction(torch.autograd.Function): method forward (line 53) | def forward(ctx, nhwc, stride_1x1, scale, bias, x, *conv): method backward (line 75) | def backward(ctx, grad_o): function conv3x3 (line 102) | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): function conv1x1 (line 107) | def conv1x1(in_planes, out_planes, stride=1): class Bottleneck (line 111) | class Bottleneck(torch.nn.Module): method __init__ (line 119) | def __init__(self, in_channels, bottleneck_channels, out_channels, str... method forward (line 174) | def forward(self, x): FILE: KoSentenceT5/apex/contrib/csrc/bottleneck/bottleneck.cpp function checkCudnnError (line 31) | int checkCudnnError(cudnnStatus_t code, const char* expr, const char* fi... function checkError (line 42) | void checkError(cudaError_t code, char const * func, const char *file, c... function generateStrides (line 55) | void generateStrides(const int64_t* dimA, int64_t* strideA, int nbDims, ... function getFwdConvDilatedFilterDim (line 75) | int getFwdConvDilatedFilterDim(int filterDim, int dilation) { function getFwdConvPaddedImageDim (line 79) | int getFwdConvPaddedImageDim(int tensorDim, int pad) { function getFwdConvOutputDim (line 83) | int getFwdConvOutputDim( function common_conv_descriptors (line 111) | common_conv_descriptors function common_convbias_descriptors (line 173) | common_convbias_descriptors function dconv_descriptors (line 294) | dconv_descriptors function getConvFusionString (line 377) | std::string getConvFusionString(int64_t* x_dim_padded, function run_conv_scale_bias_add_activation (line 469) | void function run_conv_scale_bias (line 630) | void function run_dconv_drelu_dscale (line 759) | void function run_dconv (line 886) | void function run_dconv_add (line 992) | void function bottleneck_forward (line 1104) | std::vector bottleneck_forward(bool explicit_nhwc, int strid... function bottleneck_backward (line 1287) | std::vector bottleneck_backward(bool explicit_nhwc, int stri... function PYBIND11_MODULE (line 1609) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/fmha/fmha_api.cpp function set_params (line 33) | void set_params(Fused_multihead_attention_fprop_params ¶ms, function mha_fwd (line 86) | std::vector function mha_bwd (line 182) | std::vector function mha_fwd_nl (line 262) | std::vector mha_fwd_nl(const at::Tensor &qkv, // tot... function mha_bwd_nl (line 342) | std::vector mha_bwd_nl(const at::Tensor &dout, // tot... function PYBIND11_MODULE (line 426) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha.h type Qkv_params (line 46) | struct Qkv_params { function Qkv_params (line 59) | struct Fused_multihead_attention_fprop_params : public Qkv_params { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/gemm.h function namespace (line 34) | namespace fmha { type Fragment_accumulator (line 145) | struct Fragment_accumulator function add (line 152) | void add(const Other_fragment_ &other) { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/gmem_tile.h function namespace (line 30) | namespace fmha { function __device__ (line 112) | inline __device__ void store(const uint4 (&data)[LDGS]) { function __device__ (line 123) | inline __device__ void move() { function __device__ (line 201) | inline __device__ void store(const uint4 (&src)[STGS_PER_LOOP], int mi) { function __device__ (line 222) | inline __device__ void move() { function __device__ (line 273) | __device__ Gmem_tile_mma_sd(void *ptr, const Params ¶ms, const int t... function __device__ (line 288) | inline __device__ void store(const Type &data, const int mi, const int n... function __device__ (line 300) | inline __device__ void move() { function Base (line 311) | struct Gmem_tile_mma_s : public Base { function Base (line 404) | struct Gmem_tile_dq : public Base { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/mask.h function namespace (line 30) | namespace fmha { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/smem_tile.h function namespace (line 33) | namespace fmha { function __device__ (line 396) | inline __device__ Smem_tile_row_a(void *smem, int tidx) : Base(smem, tid... function __device__ (line 462) | inline __device__ void reset_read_offset() { function __device__ (line 494) | inline __device__ Smem_tile_a(void *smem, int tidx) : Base(smem, tidx) { function __device__ (line 581) | inline __device__ Smem_tile_col_b(void *smem, int tidx) : Base(smem, tid... function __device__ (line 653) | inline __device__ void reset_read_offset() { function __device__ (line 685) | inline __device__ Smem_tile_b(void *smem, int tidx) : Base(smem, tidx) { function __device__ (line 748) | inline __device__ Smem_tile_row_b(void *smem, int tidx) : Base(smem, tid... function __device__ (line 892) | inline __device__ Smem_tile_b(void *smem, int tidx) : Base(smem, tidx) { function __device__ (line 912) | inline __device__ Smem_tile_v(void *smem, int tidx) : Base(smem, tidx) { function __device__ (line 1003) | inline __device__ Smem_tile_o(void *smem, int tidx) { function store (line 1057) | void store(const Accumulator (&acc)[M][N], int mi) { function __device__ (line 1129) | inline __device__ Smem_tile_mma(char *smem, int tidx) { function store (line 1147) | void store(const uint4 (®s)[M][N]) { function __device__ (line 1177) | inline __device__ Smem_tile_mma_transposed(char *smem, int tidx) : Base(... function load (line 1189) | void load(Fragment (&frag)[M][N]) { function __device__ (line 1223) | inline __device__ Smem_tile_mma_epilogue(char *smem, int tidx) : Base(sm... function store (line 1238) | void store(const Acc (&acc)[M][N]){ function store (line 1272) | void store(const uint4 (®s)[M][N]) { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/softmax.h function namespace (line 30) | namespace fmha { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/utils.h function namespace (line 38) | namespace fmha { function __device__ (line 247) | static inline __device__ uint32_t hadd2(uint32_t a, uint32_t b) { function __device__ (line 255) | static inline __device__ uint32_t hmin2(uint32_t a, uint32_t b) { function __device__ (line 263) | static inline __device__ uint32_t hmul2(uint32_t a, uint32_t b) { function __device__ (line 271) | static inline __device__ uint2 hmul4(uint2 a, uint2 b) { function __device__ (line 280) | static inline __device__ uint4 hmul8(uint4 a, uint4 b) { function __device__ (line 291) | static inline __device__ uint4 hmul8(uint32_t a, uint4 b) { function __device__ (line 317) | static inline __device__ uint32_t habs2(uint32_t x) { function __device__ (line 332) | static inline __device__ uint16_t clamp_to_zero(uint16_t x) { function __device__ (line 340) | static inline __device__ uint16_t float_to_half(float f) { function __device__ (line 348) | static inline __device__ uint32_t float2_to_half2(float a, float b) { function __device__ (line 362) | static inline __device__ uint32_t float_to_half2(float a) { function __device__ (line 368) | static inline __device__ uint32_t float2_to_half2(const float2 &f) { function __device__ (line 374) | static inline __device__ uint2 float4_to_half4(float x, float y, float z... function __device__ (line 383) | static inline __device__ uint32_t hfma2(uint32_t a, uint32_t b, uint32_t... function __device__ (line 391) | static inline __device__ uint32_t hfma2_relu(uint32_t a, uint32_t b, uin... function __device__ (line 403) | static inline __device__ uint32_t h0_h0(uint32_t x) { function __device__ (line 412) | static inline __device__ float h0_to_float(uint32_t h2) { function __device__ (line 424) | static inline __device__ uint32_t h1_h1(uint32_t x) { function __device__ (line 433) | static inline __device__ uint16_t hadd(uint16_t a, uint16_t b) { function __device__ (line 441) | static inline __device__ uint32_t hadd(uint32_t a, uint32_t b) { function __device__ (line 447) | static inline __device__ uint2 hadd4(uint2 a, uint2 b) { function __device__ (line 456) | static inline __device__ uint2 hadd(uint2 a, uint2 b) { function __device__ (line 462) | static inline __device__ uint4 hadd8(uint4 a, uint4 b) { function __device__ (line 473) | static inline __device__ uint4 fadd4(uint4 a, uint4 b) { function __device__ (line 484) | static inline __device__ uint4 hadd(uint4 a, uint4 b) { function __device__ (line 490) | static inline __device__ float half_to_float(uint16_t h) { function __device__ (line 498) | static inline __device__ float2 half2_to_float2(uint32_t x) { function __device__ (line 514) | static inline __device__ uint16_t hfma(uint16_t a, uint16_t b, uint16_t ... function __device__ (line 522) | static inline __device__ uint16_t hmul(uint16_t a, uint16_t b) { function __device__ (line 530) | static inline __device__ float sigmoid(float x) { function __device__ (line 685) | inline __device__ Ldg_functor(Data_type (&fetch)[N], const void* (&ptrs)... function __device__ (line 690) | inline __device__ void clear(int ii) { function __device__ (line 695) | inline __device__ void load(int ii, bool p) { function __device__ (line 847) | inline __device__ void stg(void *ptr, uint8_t val) { function __device__ (line 853) | inline __device__ void stg(void *ptr, uint16_t val) { function __device__ (line 859) | inline __device__ void stg(void *ptr, uint32_t val) { function __device__ (line 865) | inline __device__ void stg(void *ptr, uint2 val) { function __device__ (line 871) | inline __device__ void stg(void *ptr, uint4 val) { function __device__ (line 881) | inline __device__ void sts(uint32_t ptr, uint16_t val) { function __device__ (line 887) | inline __device__ void sts(uint32_t ptr, uint32_t val) { function __device__ (line 893) | inline __device__ void sts(uint32_t ptr, uint2 val) { function __device__ (line 903) | inline __device__ void sts(uint32_t ptr, uint4 val) { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_dgrad_kernel_1xN_reload.h function namespace (line 34) | namespace fmha { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_dgrad_kernel_1xN_reload_nl.h function namespace (line 34) | namespace fmha { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN.h function namespace (line 34) | namespace fmha { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN_nl.h function namespace (line 35) | namespace fmha { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN_reload_v.h function namespace (line 34) | namespace fmha { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_kernel.h function namespace (line 39) | namespace fmha { function __device__ (line 90) | inline __device__ Noloop_traits(const int bidc) function move_all (line 96) | void move_all(Tiles & ... tiles) const { function __device__ (line 113) | inline __device__ int offset_loop_count(const int l) { function __device__ (line 157) | inline __device__ int offset_loop_count(const int l) { FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_utils.h type Data_type (line 53) | enum Data_type { DATA_TYPE_FP16, DATA_TYPE_FP32, DATA_TYPE_INT32, DATA_T... function set_alpha (line 57) | static inline void set_alpha( uint32_t &alpha, float norm, Data_type dty... function get_size_in_bytes (line 75) | static inline size_t get_size_in_bytes( size_t n, Data_type dtype ) { FILE: KoSentenceT5/apex/contrib/csrc/groupbn/batch_norm.h function class (line 41) | class NhwcBatchNorm { function createTensorDescriptor (line 193) | void createTensorDescriptor(cudnnTensorDescriptor_t *descriptor) { function destroyTensorDescriptor (line 199) | void destroyTensorDescriptor(cudnnTensorDescriptor_t descriptor) { type StorageType (line 223) | typedef uint16_t StorageType; function _fwdKernelLauncher (line 258) | void _fwdKernelLauncher(cudaStream_t stream, NhwcBatchNormFwdParams params, function _bwdKernelLauncher (line 338) | void _bwdKernelLauncher(cudaStream_t stream, NhwcBatchNormBwdParams params, function smem_driven_bwd_occupancy (line 469) | static int smem_driven_bwd_occupancy(int device_id, const int max_cta_pe... function std (line 478) | const std::vector NhwcBatchNorm::numWorkspaceBytes() const { function _setFwdParams (line 510) | void NhwcBatchNorm::_setFwdParams(NhwcBatchNormFwdParams *params) const { function _setFwdInferenceParams (line 534) | void NhwcBatchNorm::_setFwdInferenceParams(NhwcBatchNormFwdInferenceParams function _setBwdParams (line 548) | void NhwcBatchNorm::_setBwdParams(NhwcBatchNormBwdParams *params) const { function fwdInference (line 569) | void NhwcBatchNorm::fwdInference(cudaStream_t stream, bool use_relu) { function dim3 (line 612) | dim3 NhwcBatchNorm::calc_fwd_grid(int *loop, const int grid_dim_x) { function dim3 (line 635) | dim3 NhwcBatchNorm::calc_bwd_grid(int *loop, const int grid_dim_x) { function fwd (line 658) | void NhwcBatchNorm::fwd(cudaStream_t stream, bool use_relu, void* my_dat... function dgrad (line 697) | void NhwcBatchNorm::dgrad(cudaStream_t stream, bool use_relu, void* my_d... FILE: KoSentenceT5/apex/contrib/csrc/groupbn/batch_norm_add_relu.h function class (line 41) | class NhwcBatchNormAddRelu { function createTensorDescriptor (line 197) | void createTensorDescriptor(cudnnTensorDescriptor_t *descriptor) { function destroyTensorDescriptor (line 203) | void destroyTensorDescriptor(cudnnTensorDescriptor_t descriptor) { type StorageType (line 228) | typedef uint16_t StorageType; function _fwdKernelLauncher (line 262) | void _fwdKernelLauncher(cudaStream_t stream, NhwcBatchNormFwdParams params, function _bwdKernelLauncher (line 332) | void _bwdKernelLauncher(cudaStream_t stream, NhwcBatchNormBwdParams params, function smem_driven_bwd_occupancy (line 409) | static int smem_driven_bwd_occupancy(int device_id, const int max_cta_pe... function std (line 418) | const std::vector NhwcBatchNormAddRelu::numWorkspaceBytes() const { function _setFwdParams (line 456) | void NhwcBatchNormAddRelu::_setFwdParams(NhwcBatchNormFwdParams *params)... function _setFwdInferenceParams (line 480) | void NhwcBatchNormAddRelu::_setFwdInferenceParams(NhwcBatchNormFwdInfere... function _setBwdParams (line 494) | void NhwcBatchNormAddRelu::_setBwdParams(NhwcBatchNormBwdParams *params)... function fwdInference (line 515) | void NhwcBatchNormAddRelu::fwdInference(cudaStream_t stream) { function dim3 (line 552) | dim3 NhwcBatchNormAddRelu::calc_fwd_grid(int *loop, const int grid_dim_x) { function dim3 (line 575) | dim3 NhwcBatchNormAddRelu::calc_bwd_grid(int *loop, const int grid_dim_x) { function fwd (line 598) | void NhwcBatchNormAddRelu::fwd(cudaStream_t stream, void* my_data, void*... function dgrad (line 640) | void NhwcBatchNormAddRelu::dgrad(cudaStream_t stream, void* my_data, voi... FILE: KoSentenceT5/apex/contrib/csrc/groupbn/cuda_utils.h function namespace (line 5) | namespace at { FILE: KoSentenceT5/apex/contrib/csrc/groupbn/interface.cpp function PYBIND11_MODULE (line 154) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/groupbn/nhwc_batch_norm_kernel.h type T (line 43) | typedef T Type; type Type (line 51) | typedef int Type; function DEVICE_FUNCTION (line 247) | DEVICE_FUNCTION void write_to_gmem(float *gmem, int idx, const float (&s... function DEVICE_FUNCTION (line 253) | DEVICE_FUNCTION void write_to_gmem(float *gmem, int idx, const float (&s... function DEVICE_FUNCTION (line 259) | DEVICE_FUNCTION void scaled_write_to_gmem(float *gmem, int idx, const fl... function DEVICE_FUNCTION (line 265) | DEVICE_FUNCTION void write_to_smem(float *smem, int idx, const float (&x... function DEVICE_FUNCTION (line 271) | DEVICE_FUNCTION void write_to_smem(int *smem, int idx, const int (&x)[1]) { function DEVICE_FUNCTION (line 277) | DEVICE_FUNCTION void write_to_smem(float *smem, int idx, const float (&x... function DEVICE_FUNCTION (line 283) | DEVICE_FUNCTION void write_to_smem(int *smem, int idx, const int (&x)[2]) { function Storage (line 351) | Storage relu(Storage in) { function parallel_sums (line 544) | void parallel_sums(float *smem, float (&x)[ELEMENTS_PER_LDG], int nhw) { type ParallelSums (line 637) | struct ParallelSums type ParallelSums (line 650) | struct ParallelSums function div_up (line 661) | static inline int div_up(int m, int n) { function DEVICE_FUNCTION (line 668) | DEVICE_FUNCTION void inter_block_sync(int* gmem_retired_ctas, int expect... type NhwcBatchNormFwdInferenceParams (line 697) | struct NhwcBatchNormFwdInferenceParams { type NhwcBatchNormFwdParams (line 799) | struct NhwcBatchNormFwdParams { type PackedStorage (line 870) | typedef PackedStorage PackedStorage_; type typename (line 872) | typedef typename PackedStorage_::Type PackedStorageType; type NhwcBatchNormBwdParams (line 1388) | struct NhwcBatchNormBwdParams { function nhwc_batch_norm_bwd (line 1528) | void nhwc_batch_norm_bwd(NhwcBatchNormBwdParams params) { function nhwc_batch_norm_bwd_relu (line 1892) | void nhwc_batch_norm_bwd_relu(NhwcBatchNormBwdParams params) { function nhwc_batch_norm_bwd_add_relu (line 2280) | void nhwc_batch_norm_bwd_add_relu(NhwcBatchNormBwdParams params) { FILE: KoSentenceT5/apex/contrib/csrc/layer_norm/ln_api.cpp function ln_fwd (line 15) | std::vector ln_fwd(const at::Tensor &x, // BxSxhidden_size function ln_bwd (line 58) | std::vector ln_bwd(const at::Tensor &dw, // BxSxhidden_size function PYBIND11_MODULE (line 102) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/additive_masked_softmax_dropout.cpp type multihead_attn (line 5) | namespace multihead_attn { type fused_softmax (line 6) | namespace fused_softmax { type additive_mask_softmax_dropout (line 7) | namespace additive_mask_softmax_dropout { function fwd (line 31) | std::vector fwd( function bwd (line 57) | torch::Tensor bwd( function PYBIND11_MODULE (line 87) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/encdec_multihead_attn.cpp type multihead_attn (line 4) | namespace multihead_attn { type encdec (line 5) | namespace encdec { type cublas_gemmex (line 6) | namespace cublas_gemmex { function fwd (line 43) | std::vector fwd( function bwd (line 88) | std::vector bwd( function PYBIND11_MODULE (line 153) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/encdec_multihead_attn_norm_add.cpp type multihead_attn (line 4) | namespace multihead_attn { type encdec_norm_add (line 5) | namespace encdec_norm_add { type cublas_gemmex (line 6) | namespace cublas_gemmex { function fwd (line 52) | std::vector fwd( function bwd (line 105) | std::vector bwd( function PYBIND11_MODULE (line 194) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/layer_norm.h function rsqrt (line 230) | float rsqrt(float v) { function rsqrt (line 233) | double rsqrt(double v) { function float (line 256) | struct SharedMemory function double (line 266) | struct SharedMemory function stream (line 653) | auto stream = at::cuda::getCurrentCUDAStream().stream(); FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/masked_softmax_dropout.cpp type multihead_attn (line 4) | namespace multihead_attn { type fused_softmax (line 5) | namespace fused_softmax { type mask_softmax_dropout (line 6) | namespace mask_softmax_dropout { function fwd (line 31) | std::vector fwd( function bwd (line 57) | torch::Tensor bwd( function PYBIND11_MODULE (line 89) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/philox.h function class (line 4) | class Philox { function __device__ (line 17) | __device__ inline uint4 operator()() { function __device__ (line 45) | __device__ inline void incr_n(unsigned long long n) { function __device__ (line 58) | __device__ inline void incr() { function mulhilo32 (line 67) | __device__ unsigned int mulhilo32(unsigned int a, unsigned int b, function __device__ (line 72) | __device__ inline uint4 single_round(uint4 ctr, uint2 key) { function __device__ (line 87) | __device__ __inline__ float4 uniform4(uint4 x) { FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/self_multihead_attn.cpp type multihead_attn (line 4) | namespace multihead_attn { type self (line 5) | namespace self { type cublas_gemmex (line 6) | namespace cublas_gemmex { function fwd (line 39) | std::vector fwd( function bwd (line 75) | std::vector bwd( function PYBIND11_MODULE (line 128) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/self_multihead_attn_bias.cpp type multihead_attn (line 4) | namespace multihead_attn { type self_bias (line 5) | namespace self_bias { type cublas_gemmex (line 6) | namespace cublas_gemmex { function fwd (line 43) | std::vector fwd( function bwd (line 82) | std::vector bwd( function PYBIND11_MODULE (line 135) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/self_multihead_attn_bias_additive_mask.cpp type multihead_attn (line 5) | namespace multihead_attn { type self_bias_additive_mask (line 6) | namespace self_bias_additive_mask { type cublas_gemmex (line 7) | namespace cublas_gemmex { function fwd (line 46) | std::vector fwd( function bwd (line 86) | std::vector bwd( function PYBIND11_MODULE (line 139) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/self_multihead_attn_norm_add.cpp type multihead_attn (line 4) | namespace multihead_attn { type self_norm_add (line 5) | namespace self_norm_add { type cublas_gemmex (line 6) | namespace cublas_gemmex { function fwd (line 47) | std::vector fwd( function bwd (line 93) | std::vector bwd( function PYBIND11_MODULE (line 169) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/softmax.h function acc_t (line 139) | acc_t sum[WARP_BATCH] { 0.0f }; function acc_t (line 363) | acc_t sum[WARP_BATCH] { 0.0f }; function additive_masked_softmax_dropout_warp_forward (line 429) | void additive_masked_softmax_dropout_warp_forward(output_t *dst, uint8_t... function softmax_warp_backward (line 2244) | void softmax_warp_backward(__half *gradInput, const __half *grad, const ... function masked_softmax_warp_backward (line 2455) | void masked_softmax_warp_backward(__half *gradInput, const __half *grad,... FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/strided_batched_gemm.h function cublasOperation_t (line 21) | cublasOperation_t convertTransToCublasOperation(char trans) { function CublasStridedBatchedGemm (line 31) | void CublasStridedBatchedGemm(THCState *state, char transa, char transb,... type cutlass (line 78) | typedef cutlass::gemm::Gemm Gemm; function gemm_switch_fp32accum (line 149) | void gemm_switch_fp32accum(THCState *state, char transa, char transb, lo... function adjustLdLevel3 (line 278) | void adjustLdLevel3(char transa, char transb, int64_t m, int64_t n, int6... function HgemmStridedBatched (line 312) | void HgemmStridedBatched(THCState *state, char transa, char transb, long... FILE: KoSentenceT5/apex/contrib/csrc/optimizers/fused_adam_cuda.cpp function strided_check_finite (line 20) | void strided_check_finite( function adam (line 29) | void adam(at::Tensor & p, at::Tensor & p_copy, at::Tensor & m, at::Tenso... function reversible_adam (line 43) | void reversible_adam(at::Tensor & p, at::Tensor & p_copy, at::Tensor & m... function maybe_adam_undo (line 57) | void maybe_adam_undo(at::Tensor & overflow_flag, at::Tensor & p, at::Ten... function maybe_cast (line 69) | void maybe_cast(at::Tensor & overflow_flag, at::Tensor & p_in, at::Tenso... function PYBIND11_MODULE (line 78) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/optimizers/fused_lamb_cuda.cpp function PYBIND11_MODULE (line 19) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/optimizers/multi_tensor_distopt_adam.cpp function PYBIND11_MODULE (line 17) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/optimizers/multi_tensor_distopt_lamb.cpp function PYBIND11_MODULE (line 31) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/transducer/transducer_joint.cpp function transducer_joint_forward (line 33) | std::vector transducer_joint_forward( function transducer_joint_backward (line 67) | std::vector transducer_joint_backward( function PYBIND11_MODULE (line 95) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/transducer/transducer_loss.cpp function transducer_loss_forward (line 35) | std::vector transducer_loss_forward( function transducer_loss_backward (line 65) | torch::Tensor transducer_loss_backward( function PYBIND11_MODULE (line 106) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/csrc/xentropy/interface.cpp function softmax_xentropy_forward (line 24) | std::vector softmax_xentropy_forward( function softmax_xentropy_backward (line 35) | at::Tensor softmax_xentropy_backward( function PYBIND11_MODULE (line 49) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSentenceT5/apex/contrib/fmha/fmha.py class FMHAFun (line 33) | class FMHAFun(torch.autograd.Function): method forward (line 35) | def forward(ctx, qkv, cu_seqlens, p_dropout, max_s, is_training): method backward (line 48) | def backward(ctx, dout): class FMHA (line 58) | class FMHA(torch.nn.Module): method __init__ (line 60) | def __init__(self, config): method forward (line 70) | def forward(self, qkv, cu_seqlens, max_s, is_training=True): FILE: KoSentenceT5/apex/contrib/groupbn/batch_norm.py class bn_NHWC_impl (line 7) | class bn_NHWC_impl(torch.autograd.Function): method forward (line 9) | def forward(ctx, x, s, b, rm, riv, mini_m, mini_riv, ret_cta, mom, eps... method backward (line 32) | def backward(ctx, grad_y): class bn_addrelu_NHWC_impl (line 53) | class bn_addrelu_NHWC_impl(torch.autograd.Function): method forward (line 55) | def forward(ctx, x, z, s, b, rm, riv, mini_m, mini_riv, grid_dim_y, re... method backward (line 78) | def backward(ctx, grad_y): class BatchNorm2d_NHWC (line 101) | class BatchNorm2d_NHWC(_BatchNorm): method __init__ (line 103) | def __init__(self, num_features, fuse_relu=False, bn_group=1, max_cta_... method forward (line 196) | def forward(self, x, z=None): method __del__ (line 219) | def __del__(self): FILE: KoSentenceT5/apex/contrib/layer_norm/layer_norm.py class FastLayerNormFN (line 6) | class FastLayerNormFN(torch.autograd.Function): method forward (line 8) | def forward(ctx, x, gamma, beta, epsilon): method backward (line 19) | def backward(ctx, dy): class FastLayerNorm (line 31) | class FastLayerNorm(torch.nn.Module): method __init__ (line 32) | def __init__(self, hidden_size, eps=1e-5): method reset_parameters (line 39) | def reset_parameters(self): method forward (line 43) | def forward(self, x): FILE: KoSentenceT5/apex/contrib/multihead_attn/encdec_multihead_attn.py function jit_dropout_add (line 19) | def jit_dropout_add(x, residual, prob, is_training): class EncdecMultiheadAttn (line 26) | class EncdecMultiheadAttn(nn.Module): method __init__ (line 31) | def __init__(self, embed_dim, num_heads, dropout=0., bias=False, inclu... method reset_parameters (line 79) | def reset_parameters(self): method forward (line 98) | def forward(self, query, key, value, key_padding_mask=None, need_weigh... FILE: KoSentenceT5/apex/contrib/multihead_attn/encdec_multihead_attn_func.py class EncdecAttnFunc (line 5) | class EncdecAttnFunc(torch.autograd.Function): method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, scale, inputs_q, i... method backward (line 135) | def backward(ctx, output_grads): FILE: KoSentenceT5/apex/contrib/multihead_attn/fast_encdec_multihead_attn_func.py class FastEncdecAttnFunc (line 5) | class FastEncdecAttnFunc(torch.autograd.Function): method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, inputs_q, inputs_k... method backward (line 50) | def backward(ctx, output_grads): FILE: KoSentenceT5/apex/contrib/multihead_attn/fast_encdec_multihead_attn_norm_add_func.py class FastEncdecAttnNormAddFunc (line 12) | class FastEncdecAttnNormAddFunc(torch.autograd.Function): method forward (line 14) | def forward(ctx, use_time_mask, is_training, heads, inputs_q, inputs_k... method backward (line 69) | def backward(ctx, output_grads): FILE: KoSentenceT5/apex/contrib/multihead_attn/fast_self_multihead_attn_func.py class FastSelfAttnFunc (line 6) | class FastSelfAttnFunc(torch.autograd.Function) : method forward (line 8) | def forward(ctx, use_time_mask, is_training, heads, inputs, input_weig... method backward (line 120) | def backward(ctx, output_grads): FILE: KoSentenceT5/apex/contrib/multihead_attn/fast_self_multihead_attn_norm_add_func.py class FastSelfAttnNormAddFunc (line 5) | class FastSelfAttnNormAddFunc(torch.autograd.Function): method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, inputs, lyr_nrm_ga... method backward (line 56) | def backward(ctx, output_grads): FILE: KoSentenceT5/apex/contrib/multihead_attn/mask_softmax_dropout_func.py class MaskSoftmaxDropout (line 6) | class MaskSoftmaxDropout(torch.autograd.Function) : method forward (line 8) | def forward(ctx, is_training, heads, inputs, pad_mask, mask_additive, ... method backward (line 51) | def backward(ctx, output_grads): FILE: KoSentenceT5/apex/contrib/multihead_attn/self_multihead_attn.py function jit_dropout_add (line 19) | def jit_dropout_add(x, residual, prob, is_training): class SelfMultiheadAttn (line 26) | class SelfMultiheadAttn(nn.Module): method __init__ (line 31) | def __init__(self, embed_dim, num_heads, dropout=0., bias=False, inclu... method reset_parameters (line 97) | def reset_parameters(self): method forward (line 124) | def forward(self, query, key, value, key_padding_mask=None, need_weigh... FILE: KoSentenceT5/apex/contrib/multihead_attn/self_multihead_attn_func.py class SelfAttnFunc (line 4) | class SelfAttnFunc(torch.autograd.Function): method forward (line 6) | def forward(ctx, use_time_mask, is_training, heads, scale, inputs, method backward (line 121) | def backward(ctx, output_grads): FILE: KoSentenceT5/apex/contrib/optimizers/distributed_fused_adam.py class DistributedFusedAdam (line 9) | class DistributedFusedAdam(torch.optim.Optimizer): method __init__ (line 55) | def __init__(self, params, method _first_step_init (line 128) | def _first_step_init(self): method _init_everything (line 373) | def _init_everything(self): method set_last_step (line 378) | def set_last_step(self, last_step): method _get_flush_block (line 381) | def _get_flush_block(self): method _pipeline_block_reductions (line 397) | def _pipeline_block_reductions(self, block_id): method __launch_step_kernel (line 443) | def __launch_step_kernel(self): method _pipeline_step (line 469) | def _pipeline_step(self): method _flatten_grad_mt (line 479) | def _flatten_grad_mt(self, scale): method _do_overlapped_reduction (line 489) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of... method set_global_scale (line 504) | def set_global_scale(self, global_scale): method global_scale (line 510) | def global_scale(self): method has_overflow (line 514) | def has_overflow(self): method peek_overflow (line 523) | def peek_overflow(self): method strided_check_finite (line 529) | def strided_check_finite(self, output_params, stride=1, start=-1, end=... method L2_grad_norm (line 545) | def L2_grad_norm(self): method complete_reductions (line 552) | def complete_reductions(self): method step (line 577) | def step(self, closure=None): method state_dict (line 598) | def state_dict(self): method load_state_dict (line 615) | def load_state_dict(self, state_dict): FILE: KoSentenceT5/apex/contrib/optimizers/distributed_fused_adam_v2.py class DistributedFusedAdamV2 (line 7) | class DistributedFusedAdamV2(torch.optim.Optimizer): method __init__ (line 43) | def __init__(self, params, method set_last_step (line 351) | def set_last_step(self, last_step): method _get_flush_block (line 354) | def _get_flush_block(self): method _pipeline_block_reductions (line 370) | def _pipeline_block_reductions(self, block_id): method __launch_step_kernel (line 406) | def __launch_step_kernel(self, p, p_copy, m, v, g): method _pipeline_block_step (line 425) | def _pipeline_block_step(self, block_id): method _pipeline_step (line 445) | def _pipeline_step(self): method _flatten_grad_mt (line 460) | def _flatten_grad_mt(self, scale): method _do_overlapped_reduction (line 470) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of... method set_global_scale (line 487) | def set_global_scale(self, global_scale): method global_scale (line 493) | def global_scale(self): method has_overflow (line 497) | def has_overflow(self): method peek_overflow (line 506) | def peek_overflow(self): method strided_check_finite (line 512) | def strided_check_finite(self, output_params, stride=1, start=-1, end=... method L2_grad_norm (line 528) | def L2_grad_norm(self): method complete_reductions (line 535) | def complete_reductions(self): method revert_step (line 560) | def revert_step(self): method step (line 586) | def step(self, closure=None, skip_overflow_check=False): FILE: KoSentenceT5/apex/contrib/optimizers/distributed_fused_adam_v3.py class DistributedFusedAdamV3 (line 7) | class DistributedFusedAdamV3(torch.optim.Optimizer): method __init__ (line 43) | def __init__(self, params, method has_overflow (line 196) | def has_overflow(self): method set_last_step (line 199) | def set_last_step(self, last_step): method _get_flush_block (line 202) | def _get_flush_block(self): method __launch_step_kernel (line 218) | def __launch_step_kernel(self, p, p_copy, m, v, g): method _flatten_grad_mt (line 237) | def _flatten_grad_mt(self, scale): method _do_overlapped_reduction (line 247) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of... method set_global_scale (line 268) | def set_global_scale(self, global_scale): method global_scale (line 274) | def global_scale(self): method L2_grad_norm (line 278) | def L2_grad_norm(self): method complete_reductions (line 282) | def complete_reductions(self): method step (line 306) | def step(self, closure=None, skip_overflow_check=False): FILE: KoSentenceT5/apex/contrib/optimizers/distributed_fused_lamb.py class DistributedFusedLAMB (line 9) | class DistributedFusedLAMB(torch.optim.Optimizer): class AtomicCounter (line 70) | class AtomicCounter(object): method __init__ (line 71) | def __init__(self): method add (line 77) | def add(self, idx): method __init__ (line 82) | def __init__(self, params, method _lazy_init_stage1 (line 210) | def _lazy_init_stage1(self): method _lazy_init_stage2 (line 330) | def _lazy_init_stage2(self): method set_is_accumulation_step (line 451) | def set_is_accumulation_step(self, is_accumulation_step): method set_last_step (line 454) | def set_last_step(self, last_step): method _get_flush_block (line 457) | def _get_flush_block(self): method _pipeline_block_reductions (line 473) | def _pipeline_block_reductions(self, block_id): method __compute_contrib_param_norm (line 556) | def __compute_contrib_param_norm(self): method __compute_contrib_update_norm (line 569) | def __compute_contrib_update_norm(self): method _pipeline_step (line 577) | def _pipeline_step(self): method _flatten_grad_mt (line 633) | def _flatten_grad_mt(self, scale): method _do_overlapped_reduction (line 651) | def _do_overlapped_reduction(self, param_i, param): method set_global_scale (line 667) | def set_global_scale(self, global_scale): method global_scale (line 673) | def global_scale(self): method L2_grad_norm (line 677) | def L2_grad_norm(self): method complete_reductions (line 681) | def complete_reductions(self): method step (line 704) | def step(self, closure=None, grad_scaler=None): method state_dict (line 740) | def state_dict(self): method load_state_dict (line 757) | def load_state_dict(self, state_dict): FILE: KoSentenceT5/apex/contrib/optimizers/fp16_optimizer.py class FP16_Optimizer (line 4) | class FP16_Optimizer(object): method __init__ (line 25) | def __init__(self, method zero_grad (line 79) | def zero_grad(self, set_grads_to_None=True): method step (line 94) | def step(self, closure=None): method backward (line 132) | def backward(self, loss): method _update_scale (line 142) | def _update_scale(self, skip): method _get_state (line 161) | def _get_state(self): method _set_state (line 164) | def _set_state(self, value): method _get_param_groups (line 171) | def _get_param_groups(self): method _set_param_groups (line 174) | def _set_param_groups(self, value): method state_dict (line 179) | def state_dict(self): method load_state_dict (line 202) | def load_state_dict(self, state_dict): FILE: KoSentenceT5/apex/contrib/optimizers/fused_adam.py class FusedAdam (line 6) | class FusedAdam(torch.optim.Optimizer): method __init__ (line 38) | def __init__(self, params, method step (line 64) | def step(self, closure=None, grads=None, output_params=None, scale=1.,... FILE: KoSentenceT5/apex/contrib/optimizers/fused_lamb.py class FusedLAMB (line 6) | class FusedLAMB(torch.optim.Optimizer): method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True, method zero_grad (line 87) | def zero_grad(self): method step (line 95) | def step(self, closure=None): FILE: KoSentenceT5/apex/contrib/optimizers/fused_sgd.py class FusedSGD (line 7) | class FusedSGD(Optimizer): method __init__ (line 66) | def __init__(self, params, lr=required, momentum=0, dampening=0, method __setstate__ (line 93) | def __setstate__(self, state): method get_momentums (line 98) | def get_momentums(self, params): method step (line 115) | def step(self, closure=None, grads=None, output_params=None, scale=1.,... FILE: KoSentenceT5/apex/contrib/sparsity/asp.py function eligible_modules (line 12) | def eligible_modules(model, whitelist_layer_types, allowed_layer_names, ... class ASP (line 21) | class ASP: method init_model_for_pruning (line 29) | def init_model_for_pruning(cls, model, mask_calculator="m4n2_1d", method init_optimizer_for_pruning (line 127) | def init_optimizer_for_pruning(cls, optimizer): method compute_sparse_masks (line 155) | def compute_sparse_masks(cls): method restore_pruned_weights (line 176) | def restore_pruned_weights(cls): method is_sparsity_enabled (line 191) | def is_sparsity_enabled(cls): method prune_trained_model (line 212) | def prune_trained_model(cls, model, optimizer): FILE: KoSentenceT5/apex/contrib/sparsity/sparse_masklib.py function fill (line 9) | def fill(x): function reshape_1d (line 13) | def reshape_1d(matrix, m): function compute_valid_1d_patterns (line 25) | def compute_valid_1d_patterns(m,n): function mn_1d_best (line 37) | def mn_1d_best(matrix, m, n): function m4n2_1d (line 49) | def m4n2_1d(mat, density): function mn_2d_greedy (line 67) | def mn_2d_greedy(matrix, m, n): function m4n2_2d_greedy (line 98) | def m4n2_2d_greedy(mat, density): function compute_valid_2d_patterns (line 103) | def compute_valid_2d_patterns(m,n): function mn_2d_best (line 122) | def mn_2d_best(matrix, m, n): function m4n2_2d_best (line 140) | def m4n2_2d_best(mat, density): function create_mask (line 145) | def create_mask(tensor, pattern="m4n2_1d", density=0.5): FILE: KoSentenceT5/apex/contrib/sparsity/test/checkpointing_test_part1.py function build_model (line 7) | def build_model(args): function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step): function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps): function main (line 38) | def main(args): class Args (line 76) | class Args: FILE: KoSentenceT5/apex/contrib/sparsity/test/checkpointing_test_part2.py function build_model (line 7) | def build_model(args): function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step): function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps): function main (line 38) | def main(step, args, model_state_dict, optimizer_state_dict): class Args (line 61) | class Args: FILE: KoSentenceT5/apex/contrib/sparsity/test/checkpointing_test_reference.py function build_model (line 11) | def build_model(args): function train_step (line 25) | def train_step(args, model, optimizer, input_batch, target_batch, step): function train_loop (line 35) | def train_loop(args, model, optimizer, step, num_steps): function main (line 42) | def main(args): class Args (line 79) | class Args: FILE: KoSentenceT5/apex/contrib/sparsity/test/toy_problem.py function build_model (line 7) | def build_model(args): function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step): function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps): function main (line 38) | def main(args): class Args (line 75) | class Args: FILE: KoSentenceT5/apex/contrib/test/fmha/test_fmha.py function py_mha (line 37) | def py_mha(qkv, amask, b, s, h, d): class TestFMHA (line 52) | class TestFMHA(unittest.TestCase): method run_test (line 54) | def run_test(self, s, b): method test_128 (line 106) | def test_128(self): method test_256 (line 109) | def test_256(self): method test_384 (line 112) | def test_384(self): method test_512 (line 115) | def test_512(self): FILE: KoSentenceT5/apex/contrib/test/layer_norm/test_fast_layer_norm.py class GPUTimer (line 12) | class GPUTimer: method __init__ (line 13) | def __init__(self, stream): method start (line 17) | def start(self): method stop (line 19) | def stop(self): method sync (line 21) | def sync(self): method millis (line 23) | def millis(self): function size_in_bytes (line 26) | def size_in_bytes(t): function abs_err (line 28) | def abs_err(x, y): class TestFastLayerNorm (line 35) | class TestFastLayerNorm(unittest.TestCase): method setUp (line 37) | def setUp(self, seed=1234): method test_ln_fp32 (line 42) | def test_ln_fp32(self): method test_ln_fp16 (line 44) | def test_ln_fp16(self): method run_test_layer_norm (line 47) | def run_test_layer_norm(self, dtype, atol, rtol=1e-5): method test_performance (line 94) | def test_performance(self): FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_encdec_multihead_attn.py class EncdecMultiheadAttnTest (line 7) | class EncdecMultiheadAttnTest(unittest.TestCase): method setUp (line 8) | def setUp(self, seed=1234): method test_encdec_multihead_attn (line 49) | def test_encdec_multihead_attn(self) : method test_encdec_multihead_attn_time_mask (line 76) | def test_encdec_multihead_attn_time_mask(self) : method test_encdec_multihead_attn_pad_mask (line 105) | def test_encdec_multihead_attn_pad_mask(self) : FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_encdec_multihead_attn_norm_add.py class EncdecMultiheadAttnNormAddTest (line 7) | class EncdecMultiheadAttnNormAddTest(unittest.TestCase): method setUp (line 8) | def setUp(self, seed=1234): method test_encdec_multihead_attn_norm_add (line 49) | def test_encdec_multihead_attn_norm_add(self) : FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_fast_self_multihead_attn_bias.py class SelfMultiheadAttnTest (line 7) | class SelfMultiheadAttnTest(unittest.TestCase): method setUp (line 8) | def setUp(self, seed=1234): method test_self_multihead_attn_additive_mask (line 48) | def test_self_multihead_attn_additive_mask(self) : FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_mha_fused_softmax.py class FusedSoftmaxTest (line 6) | class FusedSoftmaxTest(unittest.TestCase): method setUp (line 7) | def setUp(self, seed=1234): method test_fused_softmax (line 24) | def test_fused_softmax(self) : FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_self_multihead_attn.py class SelfMultiheadAttnTest (line 7) | class SelfMultiheadAttnTest(unittest.TestCase): method setUp (line 8) | def setUp(self, seed=1234): method test_self_multihead_attn (line 45) | def test_self_multihead_attn(self) : method test_self_multihead_attn_time_mask (line 71) | def test_self_multihead_attn_time_mask(self) : method test_self_multihead_attn_pad_mask (line 100) | def test_self_multihead_attn_pad_mask(self) : FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_self_multihead_attn_norm_add.py class SelfMultiheadAttnNormAddTest (line 7) | class SelfMultiheadAttnNormAddTest(unittest.TestCase): method setUp (line 8) | def setUp(self, seed=1234): method test_self_multihead_attn_norm_add (line 45) | def test_self_multihead_attn_norm_add(self) : FILE: KoSentenceT5/apex/contrib/test/test_label_smoothing.py function label_smoothing_raw (line 10) | def label_smoothing_raw(x, target, padding_idx, smoothing): function label_smoothing_opt_1 (line 20) | def label_smoothing_opt_1(x, target, padding_idx, smoothing): class LabelSmoothingTest (line 30) | class LabelSmoothingTest(unittest.TestCase): method setUp (line 31) | def setUp(self, seed=1234): method gen_test_inputs (line 40) | def gen_test_inputs(self, N, T, H, smoothing, padding_idx): method print_max_diff_elem (line 50) | def print_max_diff_elem(self, ref, tst): method test_label_smoothing_function (line 57) | def test_label_smoothing_function(self): method test_label_smoothing_perf (line 91) | def test_label_smoothing_perf(self): FILE: KoSentenceT5/apex/contrib/test/transducer/test_transducer_joint.py class TransducerJointTest (line 6) | class TransducerJointTest(unittest.TestCase): method setUp (line 7) | def setUp(self, seed=1234): method gen_input (line 11) | def gen_input(self, for_vector_kernel): method _pack (line 41) | def _pack(self, x, f_len, g_len): method _unpack (line 53) | def _unpack(self, x, f_len, g_len): method run_transducer_joint (line 67) | def run_transducer_joint(self, for_vector_kernel, pack_output, relu, d... method test_transducer_joint (line 118) | def test_transducer_joint(self): method test_transducer_joint_vec (line 121) | def test_transducer_joint_vec(self): method test_transducer_joint_pack (line 124) | def test_transducer_joint_pack(self): method test_transducer_joint_vec_pack (line 127) | def test_transducer_joint_vec_pack(self): method test_transducer_joint_relu (line 130) | def test_transducer_joint_relu(self): method test_transducer_joint_vec_relu (line 133) | def test_transducer_joint_vec_relu(self): method test_transducer_joint_pack_relu (line 136) | def test_transducer_joint_pack_relu(self): method test_transducer_joint_vec_pack_relu (line 139) | def test_transducer_joint_vec_pack_relu(self): method test_transducer_joint_relu_dropout (line 142) | def test_transducer_joint_relu_dropout(self): method test_transducer_joint_vec_relu_dropout (line 145) | def test_transducer_joint_vec_relu_dropout(self): method test_transducer_joint_pack_relu_dropout (line 148) | def test_transducer_joint_pack_relu_dropout(self): method test_transducer_joint_vec_pack_relu_dropout (line 151) | def test_transducer_joint_vec_pack_relu_dropout(self): FILE: KoSentenceT5/apex/contrib/test/transducer/test_transducer_loss.py class TransducerLossTest (line 6) | class TransducerLossTest(unittest.TestCase): method setUp (line 7) | def setUp(self, seed=1234): method gen_input (line 11) | def gen_input(self, scalar_t, for_vector_kernel): method _pack (line 41) | def _pack(self, x): method _unpack (line 52) | def _unpack(self, x): method run_transducer_loss (line 64) | def run_transducer_loss(self, scalar_t, fuse_softmax_backward, packed_... method test_transducer_loss_fp32 (line 90) | def test_transducer_loss_fp32(self): method test_transducer_loss_fp16 (line 98) | def test_transducer_loss_fp16(self): method test_transducer_loss_fp16_backward_fusion (line 106) | def test_transducer_loss_fp16_backward_fusion(self): method test_transducer_loss_fp16_backward_fusion_packed (line 114) | def test_transducer_loss_fp16_backward_fusion_packed(self): method test_transducer_loss_fp16_backward_fusion_packed_vec (line 122) | def test_transducer_loss_fp16_backward_fusion_packed_vec(self): FILE: KoSentenceT5/apex/contrib/test/transducer/transducer_ref.py function transducer_loss_reference (line 5) | def transducer_loss_reference(x, label, f_len, y_len, blank_idx, loss_gr... function transducer_joint_reference (line 79) | def transducer_joint_reference(f, g, h_grad, f_len, g_len, pack_output, ... FILE: KoSentenceT5/apex/contrib/transducer/transducer.py class TransducerJoint (line 5) | class TransducerJoint(torch.nn.Module): method __init__ (line 27) | def __init__(self, pack_output=False, relu=False, dropout=False, opt=1... method forward (line 43) | def forward(self, f, g, f_len, g_len, batch_offset=None, packed_batch=0): class TransducerLoss (line 68) | class TransducerLoss(torch.nn.Module): method __init__ (line 81) | def __init__(self, fuse_softmax_backward=True, opt=1, packed_input=Fal... method forward (line 89) | def forward(self, x, label, f_len, y_len, blank_idx, batch_offset=None... class TransducerLossFunc (line 127) | class TransducerLossFunc(torch.autograd.Function): method forward (line 129) | def forward(ctx, x, label, f_len, y_len, batch_offset, max_f_len, blan... method backward (line 149) | def backward(ctx, loss_grad): class TransducerJointFunc (line 158) | class TransducerJointFunc(torch.autograd.Function): method forward (line 160) | def forward(ctx, f, g, f_len, g_len, pack_output, relu, dropout, batch... method backward (line 180) | def backward(ctx, loss_grad): FILE: KoSentenceT5/apex/contrib/xentropy/softmax_xentropy.py class SoftmaxCrossEntropyLoss (line 4) | class SoftmaxCrossEntropyLoss(torch.autograd.Function): method forward (line 6) | def forward(ctx, logits, labels, smoothing=0.0, padding_idx=0, half_to... method backward (line 18) | def backward(ctx, grad_loss): FILE: KoSentenceT5/apex/fp16_utils/fp16_optimizer.py class FP16_Optimizer (line 13) | class FP16_Optimizer(object): method __init__ (line 14) | def __init__(self, method maybe_print (line 110) | def maybe_print(self, msg): method __getstate__ (line 114) | def __getstate__(self): method __setstate__ (line 117) | def __setstate__(self, state): method zero_grad (line 120) | def zero_grad(self, set_grads_to_None=False): method _master_params_to_model_params (line 160) | def _master_params_to_model_params(self): method clip_master_grads (line 185) | def clip_master_grads(self, max_norm, norm_type=2): method state_dict (line 209) | def state_dict(self): method load_state_dict (line 230) | def load_state_dict(self, state_dict): method step (line 272) | def step(self, closure=None): # could add clip option. method _step_with_closure (line 334) | def _step_with_closure(self, closure): method backward (line 373) | def backward(self, loss, update_master_grads=True, retain_graph=False): method update_master_grads (line 436) | def update_master_grads(self): method inspect_master_grad_data (line 493) | def inspect_master_grad_data(self): method _get_loss_scale (line 528) | def _get_loss_scale(self): method _set_loss_scale (line 531) | def _set_loss_scale(self, value): method _get_state (line 537) | def _get_state(self): method _set_state (line 540) | def _set_state(self, value): method _get_param_groups (line 547) | def _get_param_groups(self): method _set_param_groups (line 550) | def _set_param_groups(self, value): FILE: KoSentenceT5/apex/fp16_utils/fp16util.py class tofp16 (line 7) | class tofp16(nn.Module): method __init__ (line 15) | def __init__(self): method forward (line 18) | def forward(self, input): function BN_convert_float (line 22) | def BN_convert_float(module): function network_to_half (line 35) | def network_to_half(network): function convert_module (line 44) | def convert_module(module, dtype): function convert_network (line 60) | def convert_network(network, dtype): class FP16Model (line 73) | class FP16Model(nn.Module): method __init__ (line 78) | def __init__(self, network): method forward (line 82) | def forward(self, *inputs): function backwards_debug_hook (line 87) | def backwards_debug_hook(grad): function prep_param_lists (line 90) | def prep_param_lists(model, flat_master=False): function model_grads_to_master_grads (line 136) | def model_grads_to_master_grads(model_params, master_params, flat_master... function master_params_to_model_params (line 158) | def master_params_to_model_params(model_params, master_params, flat_mast... function to_python_float (line 176) | def to_python_float(t): FILE: KoSentenceT5/apex/fp16_utils/loss_scaler.py function to_python_float (line 4) | def to_python_float(t): class LossScaler (line 10) | class LossScaler: method __init__ (line 22) | def __init__(self, scale=1): method has_overflow (line 26) | def has_overflow(self, params): method _has_inf_or_nan (line 30) | def _has_inf_or_nan(x): method update_scale (line 33) | def update_scale(self, overflow): method loss_scale (line 37) | def loss_scale(self): method scale_gradient (line 40) | def scale_gradient(self, module, grad_in, grad_out): method backward (line 43) | def backward(self, loss, retain_graph=False): class DynamicLossScaler (line 47) | class DynamicLossScaler: method __init__ (line 73) | def __init__(self, method has_overflow (line 84) | def has_overflow(self, params): method _has_inf_or_nan (line 92) | def _has_inf_or_nan(x): method update_scale (line 113) | def update_scale(self, overflow): method loss_scale (line 124) | def loss_scale(self): method scale_gradient (line 127) | def scale_gradient(self, module, grad_in, grad_out): method backward (line 130) | def backward(self, loss, retain_graph=False): FILE: KoSentenceT5/apex/mlp/mlp.py class MlpFunction (line 8) | class MlpFunction(torch.autograd.Function): method forward (line 10) | def forward(ctx, bias, activation, *args): method backward (line 19) | def backward(ctx, grad_o): class MLP (line 26) | class MLP(torch.nn.Module): method __init__ (line 34) | def __init__(self, mlp_sizes, bias=True, activation='relu'): method reset_parameters (line 64) | def reset_parameters(self): method forward (line 74) | def forward(self, input): method extra_repr (line 77) | def extra_repr(self): FILE: KoSentenceT5/apex/multi_tensor_apply/multi_tensor_apply.py class MultiTensorApply (line 3) | class MultiTensorApply(object): method __init__ (line 7) | def __init__(self, chunk_size): method check_avail (line 16) | def check_avail(self): method __call__ (line 24) | def __call__(self, op, noop_flag_buffer, tensor_lists, *args): FILE: KoSentenceT5/apex/normalization/fused_layer_norm.py class FusedLayerNormAffineFunction (line 12) | class FusedLayerNormAffineFunction(torch.autograd.Function): method forward (line 15) | def forward(ctx, input, weight, bias, normalized_shape, eps): method backward (line 30) | def backward(ctx, grad_output): class FusedLayerNormFunction (line 39) | class FusedLayerNormFunction(torch.autograd.Function): method forward (line 42) | def forward(ctx, input, normalized_shape, eps): method backward (line 55) | def backward(ctx, grad_output): function fused_layer_norm_affine (line 64) | def fused_layer_norm_affine(input, normalized_shape, weight, bias, eps=1... function fused_layer_norm (line 67) | def fused_layer_norm(input, normalized_shape, eps=1e-6): class FusedLayerNorm (line 70) | class FusedLayerNorm(torch.nn.Module): method __init__ (line 129) | def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True): method reset_parameters (line 148) | def reset_parameters(self): method forward (line 153) | def forward(self, input): method extra_repr (line 163) | def extra_repr(self): FILE: KoSentenceT5/apex/optimizers/fused_adagrad.py class FusedAdagrad (line 5) | class FusedAdagrad(torch.optim.Optimizer): method __init__ (line 43) | def __init__(self, params, lr=1e-2, eps=1e-10, method zero_grad (line 59) | def zero_grad(self): method step (line 67) | def step(self, closure=None): FILE: KoSentenceT5/apex/optimizers/fused_adam.py class FusedAdam (line 4) | class FusedAdam(torch.optim.Optimizer): method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True, method zero_grad (line 82) | def zero_grad(self): method step (line 90) | def step(self, closure=None, grads=None, output_params=None, scale=Non... FILE: KoSentenceT5/apex/optimizers/fused_lamb.py class FusedLAMB (line 4) | class FusedLAMB(torch.optim.Optimizer): method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True, method zero_grad (line 88) | def zero_grad(self): method step (line 96) | def step(self, closure=None): FILE: KoSentenceT5/apex/optimizers/fused_novograd.py class FusedNovoGrad (line 4) | class FusedNovoGrad(torch.optim.Optimizer): method __init__ (line 67) | def __init__(self, params, lr=1e-3, bias_correction=True, method zero_grad (line 92) | def zero_grad(self): method load_state_dict (line 100) | def load_state_dict(self, state_dict): method step (line 108) | def step(self, closure=None): FILE: KoSentenceT5/apex/optimizers/fused_sgd.py class FusedSGD (line 6) | class FusedSGD(Optimizer): method __init__ (line 76) | def __init__(self, params, lr=required, momentum=0, dampening=0, method __setstate__ (line 108) | def __setstate__(self, state): method zero_grad (line 113) | def zero_grad(self): method get_momentums (line 121) | def get_momentums(self, params): method step (line 138) | def step(self, closure=None): FILE: KoSentenceT5/apex/parallel/LARC.py class LARC (line 5) | class LARC(object): method __init__ (line 39) | def __init__(self, optimizer, trust_coefficient=0.02, clip=True, eps=1... method __getstate__ (line 45) | def __getstate__(self): method __setstate__ (line 48) | def __setstate__(self, state): method state (line 52) | def state(self): method __repr__ (line 55) | def __repr__(self): method param_groups (line 59) | def param_groups(self): method param_groups (line 63) | def param_groups(self, value): method state_dict (line 66) | def state_dict(self): method load_state_dict (line 69) | def load_state_dict(self, state_dict): method zero_grad (line 72) | def zero_grad(self): method add_param_group (line 75) | def add_param_group(self, param_group): method step (line 78) | def step(self): FILE: KoSentenceT5/apex/parallel/__init__.py function convert_syncbn_model (line 21) | def convert_syncbn_model(module, process_group=None, channel_last=False): function create_syncbn_process_group (line 58) | def create_syncbn_process_group(group_size): FILE: KoSentenceT5/apex/parallel/distributed.py function import_flatten_impl (line 13) | def import_flatten_impl(): function flatten (line 25) | def flatten(bucket): function unflatten (line 30) | def unflatten(coalesced, bucket): function apply_flat_dist_call (line 36) | def apply_flat_dist_call(bucket, call, extra_args=None): function split_half_float_double (line 51) | def split_half_float_double(tensors): function split_by_type (line 60) | def split_by_type(tensors): function flat_dist_call (line 70) | def flat_dist_call(tensors, call, extra_args=None): function extract_tensors (line 78) | def extract_tensors(maybe_tensor, tensor_list): class Reducer (line 89) | class Reducer(object): method __init__ (line 111) | def __init__(self, module_or_grads_list): method reduce (line 121) | def reduce(self): class DistributedDataParallel (line 129) | class DistributedDataParallel(Module): method __init__ (line 162) | def __init__(self, method __setstate__ (line 256) | def __setstate__(self, state): method __getstate__ (line 268) | def __getstate__(self): method enable_allreduce (line 275) | def enable_allreduce(self): method disable_allreduce (line 278) | def disable_allreduce(self): method sync_bucket_structure (line 283) | def sync_bucket_structure(self): method create_hooks (line 319) | def create_hooks(self): method _stream_this_bucket (line 411) | def _stream_this_bucket(self, bucket_idx): method _event_this_bucket (line 418) | def _event_this_bucket(self, bucket_idx): method allreduce_bucket (line 425) | def allreduce_bucket(self, bucket, bucket_idx, force_default_stream): method allreduce_maybe_retain (line 478) | def allreduce_maybe_retain(self, bucket, bucket_idx, force_default_str... method allreduce_fallback (line 491) | def allreduce_fallback(self): method comm_ready_buckets (line 513) | def comm_ready_buckets(self, param): method forward (line 559) | def forward(self, *inputs, **kwargs): FILE: KoSentenceT5/apex/parallel/multiproc.py function docstring_hack (line 5) | def docstring_hack(): FILE: KoSentenceT5/apex/parallel/optimized_sync_batchnorm.py class SyncBatchNorm (line 9) | class SyncBatchNorm(_BatchNorm): method __init__ (line 58) | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, ... method _specify_process_group (line 64) | def _specify_process_group(self, process_group): method _specify_channel_last (line 67) | def _specify_channel_last(self, channel_last): method forward (line 70) | def forward(self, input, z = None): FILE: KoSentenceT5/apex/parallel/optimized_sync_batchnorm_kernel.py class SyncBatchnormFunction (line 7) | class SyncBatchnormFunction(Function): method forward (line 10) | def forward(ctx, input, z, weight, bias, running_mean, running_varianc... method backward (line 75) | def backward(ctx, grad_output): FILE: KoSentenceT5/apex/parallel/sync_batchnorm.py class SyncBatchNorm (line 9) | class SyncBatchNorm(_BatchNorm): method __init__ (line 51) | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, ... method _specify_process_group (line 65) | def _specify_process_group(self, process_group): method forward (line 68) | def forward(self, input): FILE: KoSentenceT5/apex/parallel/sync_batchnorm_kernel.py class SyncBatchnormFunction (line 7) | class SyncBatchnormFunction(Function): method forward (line 10) | def forward(ctx, input, weight, bias, running_mean, running_variance, ... method backward (line 33) | def backward(ctx, grad_output): FILE: KoSentenceT5/apex/pyprof/examples/custom_func_module/custom_function.py class Foo (line 9) | class Foo(torch.autograd.Function): method forward (line 11) | def forward(ctx, in1, in2): method backward (line 16) | def backward(ctx, grad): FILE: KoSentenceT5/apex/pyprof/examples/custom_func_module/custom_module.py class Foo (line 8) | class Foo(torch.nn.Module): method __init__ (line 9) | def __init__(self, size): method forward (line 14) | def forward(self, input): FILE: KoSentenceT5/apex/pyprof/examples/imagenet/imagenet.py function parseArgs (line 17) | def parseArgs(): function main (line 89) | def main(): FILE: KoSentenceT5/apex/pyprof/examples/jit/jit_script_function.py function foo (line 11) | def foo(x, y): FILE: KoSentenceT5/apex/pyprof/examples/jit/jit_script_method.py class Foo (line 7) | class Foo(torch.jit.ScriptModule): method __init__ (line 8) | def __init__(self, size): method forward (line 14) | def forward(self, input): FILE: KoSentenceT5/apex/pyprof/examples/jit/jit_trace_function.py function foo (line 7) | def foo(x, y): FILE: KoSentenceT5/apex/pyprof/examples/jit/jit_trace_method.py class Foo (line 7) | class Foo(torch.nn.Module): method __init__ (line 8) | def __init__(self, size): method forward (line 13) | def forward(self, input): FILE: KoSentenceT5/apex/pyprof/examples/lenet.py class LeNet5 (line 12) | class LeNet5(nn.Module): method __init__ (line 13) | def __init__(self): method forward (line 24) | def forward(self, x): method num_flat_features (line 35) | def num_flat_features(self, x): FILE: KoSentenceT5/apex/pyprof/examples/user_annotation/resnet.py function conv3x3 (line 15) | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): function conv1x1 (line 20) | def conv1x1(in_planes, out_planes, stride=1): class Bottleneck (line 24) | class Bottleneck(nn.Module): method __init__ (line 28) | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, method forward (line 48) | def forward(self, x): class ResNet (line 102) | class ResNet(nn.Module): method __init__ (line 104) | def __init__(self, block, layers, num_classes=1000, method _make_layer (line 134) | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): method forward (line 158) | def forward(self, x): function resnet50 (line 193) | def resnet50(): FILE: KoSentenceT5/apex/pyprof/nvtx/nvmarker.py function isfunc (line 27) | def isfunc(mod, f): function traceMarker (line 46) | def traceMarker(stack): function modMarker (line 56) | def modMarker(mod, fn_name, args): function add_wrapper (line 67) | def add_wrapper(mod, fn_name): function argMarker (line 110) | def argMarker(mod, op, args, kwargs): function patchClass (line 201) | def patchClass(cls): function init (line 206) | def init(): FILE: KoSentenceT5/apex/pyprof/parse/db.py class DB (line 3) | class DB(object): method __init__ (line 9) | def __init__(self, dbFile): method select (line 21) | def select(self, cmd): method insert (line 36) | def insert(self, cmd, data): method execute (line 46) | def execute(self, cmd): method commit (line 56) | def commit(self): method close (line 59) | def close(self): FILE: KoSentenceT5/apex/pyprof/parse/kernel.py function demangle (line 5) | def demangle(name): function encode_object_id (line 11) | def encode_object_id(pid, tid): function getShortName (line 20) | def getShortName(name): class Kernel (line 33) | class Kernel(object): method __init__ (line 41) | def __init__(self): method setKernelInfo (line 77) | def setKernelInfo(self, info): method setKernelName (line 93) | def setKernelName(self, name): method setRunTimeInfo (line 98) | def setRunTimeInfo(self, info): method setMarkerInfo (line 107) | def setMarkerInfo(self, info): method setDirection (line 111) | def setDirection(self): method setOp (line 123) | def setOp(self): method print (line 180) | def print(self): FILE: KoSentenceT5/apex/pyprof/parse/nvvp.py class NVVP (line 3) | class NVVP(object): method __init__ (line 14) | def __init__(self, db): method getProfileStart (line 18) | def getProfileStart(self): method getString (line 36) | def getString(self, id_): method createMarkerTable (line 45) | def createMarkerTable(self): method getCPUInfo (line 65) | def getCPUInfo(self, corrId): method getKernelInfo (line 91) | def getKernelInfo(self): method getMarkerInfo (line 99) | def getMarkerInfo(self, objId, startTime, endTime): FILE: KoSentenceT5/apex/pyprof/parse/parse.py function parseArgs (line 15) | def parseArgs(): function main (line 25) | def main(): FILE: KoSentenceT5/apex/pyprof/prof/activation.py class Activation (line 5) | class Activation(OperatorLayerBase): method __init__ (line 12) | def __init__(self, d): method params (line 35) | def params(self): method flops (line 39) | def flops(self): method bytes (line 48) | def bytes(self): method tc (line 58) | def tc(self): method op (line 61) | def op(self): method mod (line 64) | def mod(self): FILE: KoSentenceT5/apex/pyprof/prof/base.py class OperatorLayerBase (line 3) | class OperatorLayerBase(ABC): method tc (line 10) | def tc(self): method params (line 18) | def params(self): method flops (line 25) | def flops(self): method bytes (line 32) | def bytes(self): method mod (line 36) | def mod(self): method op (line 43) | def op(self): FILE: KoSentenceT5/apex/pyprof/prof/blas.py class Addmm (line 8) | class Addmm(OperatorLayerBase): method __init__ (line 10) | def __init__(self, d): method tc (line 63) | def tc(self): method bytes (line 69) | def bytes(self): method flops (line 73) | def flops(self): method op (line 76) | def op(self): method mod (line 79) | def mod(self): method params (line 82) | def params(self): class Bmm (line 86) | class Bmm(OperatorLayerBase): method __init__ (line 88) | def __init__(self, d): method tc (line 123) | def tc(self): method params (line 129) | def params(self): method flops (line 134) | def flops(self): method bytes (line 137) | def bytes(self): method op (line 141) | def op(self): method mod (line 144) | def mod(self): class Matmul (line 147) | class Matmul(OperatorLayerBase): method __init__ (line 152) | def __init__(self, d): method params (line 252) | def params(self): method tc (line 255) | def tc(self): method bytes (line 264) | def bytes(self): method flops (line 272) | def flops(self): method op (line 279) | def op(self): method mod (line 282) | def mod(self): class Mm (line 285) | class Mm(OperatorLayerBase): method __init__ (line 287) | def __init__(self, d): method params (line 319) | def params(self): method tc (line 323) | def tc(self): method bytes (line 329) | def bytes(self): method flops (line 333) | def flops(self): method op (line 336) | def op(self): method mod (line 339) | def mod(self): FILE: KoSentenceT5/apex/pyprof/prof/conv.py class Conv (line 5) | class Conv(OperatorLayerBase): method __init__ (line 26) | def __init__(self, d): method params (line 180) | def params(self): method conv_bytes_flops (line 184) | def conv_bytes_flops(self, N, C, H, W, K, P, Q, R, S, g, t): method bytes_flops (line 190) | def bytes_flops(self): method bytes (line 218) | def bytes(self): method flops (line 222) | def flops(self): method tc (line 226) | def tc(self): method op (line 232) | def op(self): method mod (line 235) | def mod(self): FILE: KoSentenceT5/apex/pyprof/prof/convert.py class Convert (line 5) | class Convert(OperatorLayerBase): method __init__ (line 11) | def __init__(self, d): method params (line 41) | def params(self): method op (line 45) | def op(self): method mod (line 48) | def mod(self): method tc (line 51) | def tc(self): method elems (line 54) | def elems(self): method flops (line 57) | def flops(self): method bytes (line 60) | def bytes(self): FILE: KoSentenceT5/apex/pyprof/prof/data.py class Data (line 3) | class Data(object): method __init__ (line 7) | def __init__(self, kernel): method setParams (line 41) | def setParams(self, params): FILE: KoSentenceT5/apex/pyprof/prof/dropout.py class Dropout (line 5) | class Dropout(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method params (line 28) | def params(self): method op (line 32) | def op(self): method mod (line 35) | def mod(self): method tc (line 38) | def tc(self): method elems (line 41) | def elems(self): method bytes (line 44) | def bytes(self): method flops (line 48) | def flops(self): FILE: KoSentenceT5/apex/pyprof/prof/embedding.py class Embedding (line 5) | class Embedding(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method params (line 33) | def params(self): method op (line 37) | def op(self): method mod (line 40) | def mod(self): method tc (line 43) | def tc(self): method bytes (line 46) | def bytes(self): method flops (line 69) | def flops(self): FILE: KoSentenceT5/apex/pyprof/prof/index_slice_join_mutate.py class Cat (line 6) | class Cat(OperatorLayerBase): method __init__ (line 8) | def __init__(self, d): method params (line 34) | def params(self): method flops (line 38) | def flops(self): method tc (line 41) | def tc(self): method op (line 44) | def op(self): method mod (line 47) | def mod(self): method bytes (line 50) | def bytes(self): class Reshape (line 56) | class Reshape(OperatorLayerBase): method __init__ (line 58) | def __init__(self, d): method params (line 82) | def params(self): method flops (line 86) | def flops(self): method tc (line 89) | def tc(self): method op (line 92) | def op(self): method mod (line 95) | def mod(self): method bytes (line 98) | def bytes(self): class Gather (line 101) | class Gather(OperatorLayerBase): method __init__ (line 103) | def __init__(self, d): method params (line 132) | def params(self): method flops (line 136) | def flops(self): method tc (line 139) | def tc(self): method op (line 142) | def op(self): method mod (line 145) | def mod(self): method bytes (line 148) | def bytes(self): class MaskedScatter (line 151) | class MaskedScatter(OperatorLayerBase): method __init__ (line 153) | def __init__(self, d): method params (line 178) | def params(self): method flops (line 182) | def flops(self): method tc (line 185) | def tc(self): method op (line 188) | def op(self): method mod (line 191) | def mod(self): method bytes (line 194) | def bytes(self): class Nonzero (line 207) | class Nonzero(OperatorLayerBase): method __init__ (line 209) | def __init__(self, d): method params (line 229) | def params(self): method flops (line 233) | def flops(self): method tc (line 236) | def tc(self): method op (line 239) | def op(self): method mod (line 242) | def mod(self): method bytes (line 245) | def bytes(self): class IndexSelect (line 260) | class IndexSelect(OperatorLayerBase): method __init__ (line 262) | def __init__(self, d): method params (line 311) | def params(self): method tc (line 315) | def tc(self): method op (line 318) | def op(self): method mod (line 321) | def mod(self): method flops (line 324) | def flops(self): method bytes (line 327) | def bytes(self): class MaskedSelect (line 343) | class MaskedSelect(OperatorLayerBase): method __init__ (line 345) | def __init__(self, d): method params (line 393) | def params(self): method tc (line 397) | def tc(self): method op (line 400) | def op(self): method mod (line 403) | def mod(self): method bytes (line 406) | def bytes(self): method flops (line 418) | def flops(self): FILE: KoSentenceT5/apex/pyprof/prof/linear.py class Linear (line 5) | class Linear(OperatorLayerBase): method setXWBMNK (line 17) | def setXWBMNK(self, args): method tc (line 63) | def tc(self): method __init__ (line 69) | def __init__(self, d): method params (line 118) | def params(self): method op (line 145) | def op(self): method bytesFlops (line 148) | def bytesFlops(self): method bytes (line 179) | def bytes(self): method flops (line 183) | def flops(self): method mod (line 187) | def mod(self): FILE: KoSentenceT5/apex/pyprof/prof/loss.py class MSELoss (line 7) | class MSELoss(OperatorLayerBase): method __init__ (line 9) | def __init__(self, d): method params (line 51) | def params(self): method elems (line 55) | def elems(self): method bytes (line 71) | def bytes(self): method flops (line 74) | def flops(self): method tc (line 77) | def tc(self): method op (line 80) | def op(self): method mod (line 83) | def mod(self): FILE: KoSentenceT5/apex/pyprof/prof/misc.py class Foo (line 5) | class Foo(OperatorLayerBase): method __init__ (line 9) | def __init__(self, d): method params (line 31) | def params(self): method tc (line 35) | def tc(self): method op (line 38) | def op(self): method mod (line 41) | def mod(self): method flops (line 44) | def flops(self): method bytes (line 47) | def bytes(self): class Copy (line 50) | class Copy(OperatorLayerBase): method __init__ (line 52) | def __init__(self, d): method params (line 75) | def params(self): method tc (line 80) | def tc(self): method op (line 83) | def op(self): method mod (line 86) | def mod(self): method flops (line 89) | def flops(self): method elems (line 92) | def elems(self): method bytes (line 95) | def bytes(self): class Clone (line 98) | class Clone(OperatorLayerBase): method __init__ (line 100) | def __init__(self, d): method params (line 118) | def params(self): method flops (line 122) | def flops(self): method tc (line 125) | def tc(self): method op (line 128) | def op(self): method mod (line 131) | def mod(self): method elems (line 134) | def elems(self): method bytes (line 137) | def bytes(self): class Contiguous (line 140) | class Contiguous(OperatorLayerBase): method __init__ (line 142) | def __init__(self, d): method params (line 160) | def params(self): method flops (line 164) | def flops(self): method bytes (line 167) | def bytes(self): method tc (line 170) | def tc(self): method op (line 173) | def op(self): method mod (line 176) | def mod(self): class Any (line 179) | class Any(OperatorLayerBase): method __init__ (line 181) | def __init__(self, d): method params (line 202) | def params(self): method op (line 206) | def op(self): method mod (line 209) | def mod(self): method tc (line 212) | def tc(self): method flops (line 215) | def flops(self): method bytes (line 218) | def bytes(self): FILE: KoSentenceT5/apex/pyprof/prof/normalization.py class BatchNorm (line 5) | class BatchNorm(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method params (line 27) | def params(self): method tc (line 31) | def tc(self): method op (line 34) | def op(self): method mod (line 37) | def mod(self): method elems (line 40) | def elems(self): method flops (line 43) | def flops(self): method bytes (line 47) | def bytes(self): FILE: KoSentenceT5/apex/pyprof/prof/optim.py class Adam (line 7) | class Adam(OperatorLayerBase): method __init__ (line 9) | def __init__(self, d): method params (line 31) | def params(self): method flops (line 35) | def flops(self): method bytes (line 38) | def bytes(self): method tc (line 58) | def tc(self): method op (line 61) | def op(self): method mod (line 64) | def mod(self): FILE: KoSentenceT5/apex/pyprof/prof/output.py class Output (line 3) | class Output(): method __init__ (line 33) | def __init__(self, args): method foo (line 77) | def foo(self, cadena, pformat): method header (line 99) | def header(self): method data (line 107) | def data(self, a): FILE: KoSentenceT5/apex/pyprof/prof/pointwise.py class Pointwise (line 6) | class Pointwise(OperatorLayerBase): method foo (line 26) | def foo(d): method __init__ (line 29) | def __init__(self, d): method params (line 84) | def params(self): method tc (line 88) | def tc(self): method op (line 91) | def op(self): method mod (line 94) | def mod(self): method elems (line 97) | def elems(self): method bytes (line 138) | def bytes(self): method flops (line 141) | def flops(self): FILE: KoSentenceT5/apex/pyprof/prof/pooling.py class MaxPool2d (line 7) | class MaxPool2d(object): method parse (line 9) | def parse(marker): FILE: KoSentenceT5/apex/pyprof/prof/prof.py function findFpropKernel (line 39) | def findFpropKernel(seq): function foo (line 56) | def foo(mod, op, d): function main (line 171) | def main(): FILE: KoSentenceT5/apex/pyprof/prof/randomSample.py class RandPerm (line 5) | class RandPerm(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method params (line 25) | def params(self): method tc (line 29) | def tc(self): method op (line 32) | def op(self): method mod (line 35) | def mod(self): method bytes (line 38) | def bytes(self): method flops (line 41) | def flops(self): FILE: KoSentenceT5/apex/pyprof/prof/recurrentCell.py function hasTileSize (line 5) | def hasTileSize(name): function ctaTile (line 11) | def ctaTile(name): class RNNCell (line 21) | class RNNCell(OperatorLayerBase): method __init__ (line 26) | def __init__(self, d): method params (line 73) | def params(self): method tc (line 83) | def tc(self): method op (line 89) | def op(self): method mod (line 92) | def mod(self): method bytes (line 95) | def bytes(self): method flops (line 105) | def flops(self): method bar (line 115) | def bar(self): FILE: KoSentenceT5/apex/pyprof/prof/reduction.py class Mean (line 5) | class Mean(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method params (line 32) | def params(self): method tc (line 36) | def tc(self): method op (line 39) | def op(self): method mod (line 42) | def mod(self): method elems (line 45) | def elems(self): method bytes (line 48) | def bytes(self): method flops (line 54) | def flops(self): class Sum (line 60) | class Sum(OperatorLayerBase): method __init__ (line 62) | def __init__(self, d): method params (line 86) | def params(self): method tc (line 90) | def tc(self): method op (line 93) | def op(self): method mod (line 96) | def mod(self): method elems (line 99) | def elems(self): method flops (line 102) | def flops(self): method bytes (line 106) | def bytes(self): class Norm (line 109) | class Norm(OperatorLayerBase): method __init__ (line 111) | def __init__(self, d): method params (line 129) | def params(self): method elems (line 133) | def elems(self): method bytes (line 136) | def bytes(self): method flops (line 139) | def flops(self): method tc (line 143) | def tc(self): method op (line 146) | def op(self): method mod (line 149) | def mod(self): FILE: KoSentenceT5/apex/pyprof/prof/softmax.py class Softmax (line 5) | class Softmax(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method op (line 31) | def op(self): method mod (line 34) | def mod(self): method tc (line 37) | def tc(self): method params (line 40) | def params(self): method elems (line 44) | def elems(self): method flops (line 47) | def flops(self): method bytes (line 52) | def bytes(self): class LogSoftmax (line 57) | class LogSoftmax(OperatorLayerBase): method __init__ (line 59) | def __init__(self, d): method op (line 91) | def op(self): method mod (line 94) | def mod(self): method tc (line 97) | def tc(self): method params (line 100) | def params(self): method elems (line 104) | def elems(self): method flops (line 107) | def flops(self): method bytes (line 112) | def bytes(self): FILE: KoSentenceT5/apex/pyprof/prof/usage.py function parseArgs (line 4) | def parseArgs(): FILE: KoSentenceT5/apex/pyprof/prof/utility.py class Utility (line 3) | class Utility(object): method numElems (line 6) | def numElems(shape): method typeToBytes (line 11) | def typeToBytes(t): method typeToString (line 23) | def typeToString(t): method hasNVTX (line 45) | def hasNVTX(marker): method isscalar (line 59) | def isscalar(t): FILE: KoSentenceT5/apex/reparameterization/__init__.py function apply_weight_norm (line 4) | def apply_weight_norm(module, name='', dim=0, hook_child=True): function remove_weight_norm (line 50) | def remove_weight_norm(module, name='', remove_all=False): function apply_reparameterization (line 64) | def apply_reparameterization(module, reparameterization=None, name='', d... function remove_reparameterization (line 96) | def remove_reparameterization(module, reparameterization=Reparameterizat... FILE: KoSentenceT5/apex/reparameterization/reparameterization.py class Reparameterization (line 4) | class Reparameterization(object): method __init__ (line 19) | def __init__(self, name, dim, module, retain_forward=True): method compute_weight (line 28) | def compute_weight(self, module=None, name=None): method reparameterize (line 40) | def reparameterize(self, name, weight, dim): method apply (line 57) | def apply(module, name, dim, reparameterization=None, hook_child=True): method get_module_and_name (line 105) | def get_module_and_name(module, name): method get_params (line 123) | def get_params(self, module): method remove (line 127) | def remove(self, module): method __call__ (line 139) | def __call__(self, module, inputs): method backward_hook (line 147) | def backward_hook(self, module, grad_input, grad_output): FILE: KoSentenceT5/apex/reparameterization/weight_norm.py function _norm (line 8) | def _norm(p, dim): class WeightNorm (line 22) | class WeightNorm(Reparameterization): method compute_weight (line 39) | def compute_weight(self, module=None, name=None): method reparameterize (line 62) | def reparameterize(self, name, weight, dim): FILE: KoSentenceT5/data/dataloader.py class ModelDataLoader (line 10) | class ModelDataLoader(Dataset): method __init__ (line 11) | def __init__(self, file_path, args, metric, tokenizer, type_): method load_data (line 57) | def load_data(self, type): method data2tensor (line 70) | def data2tensor(self, line, type): method __getitem__ (line 133) | def __getitem__(self, index): method __len__ (line 175) | def __len__(self): function get_loader (line 183) | def get_loader(args, metric): FILE: KoSentenceT5/main.py function main (line 5) | def main(args, logger) -> None: FILE: KoSentenceT5/model/loss.py class Loss (line 12) | class Loss(): method __init__ (line 14) | def __init__(self, args): method train_loss_fct (line 19) | def train_loss_fct(self, config, inputs, a, p, n): method evaluation_during_training (line 31) | def evaluation_during_training(self, embeddings1, embeddings2, labels,... FILE: KoSentenceT5/model/setting.py class Arguments (line 8) | class Arguments(): method __init__ (line 10) | def __init__(self): method add_type_of_processing (line 13) | def add_type_of_processing(self): method add_hyper_parameters (line 21) | def add_hyper_parameters(self): method add_data_parameters (line 35) | def add_data_parameters(self): method print_args (line 45) | def print_args(self, args): method add_argument (line 51) | def add_argument(self, *args, **kw_args): method parse (line 54) | def parse(self): class Setting (line 61) | class Setting(): method set_logger (line 63) | def set_logger(self): method set_seed (line 77) | def set_seed(self, args): method run (line 91) | def run(self): FILE: KoSentenceT5/model/simcse/kost5.py class KoSentenceT5 (line 5) | class KoSentenceT5(nn.Module): method __init__ (line 6) | def __init__(self, model): method forward (line 10) | def forward(self, config, inputs, mode): method encode (line 44) | def encode(self, inputs, device): FILE: KoSentenceT5/model/simcse/processor.py class Processor (line 19) | class Processor(): method __init__ (line 21) | def __init__(self, args): method run (line 33) | def run(self, inputs, indicator=None, type=None): method progress (line 52) | def progress(self, loss): method progress_validation (line 56) | def progress_validation(self, score): method return_value (line 60) | def return_value(self): method get_object (line 66) | def get_object(self, tokenizer, model): method get_scheduler (line 81) | def get_scheduler(self, optim, train_loader): method model_setting (line 89) | def model_setting(self): method train (line 125) | def train(self, epoch): method valid (line 155) | def valid(self): method test (line 179) | def test(self): FILE: KoSentenceT5/model/utils.py class Metric (line 10) | class Metric(): method __init__ (line 12) | def __init__(self, args): method get_lr (line 15) | def get_lr(self, optimizer): method count_parameters (line 18) | def count_parameters(self, model): method cal_acc (line 21) | def cal_acc(self, yhat, y): method cal_time (line 28) | def cal_time(self, start_time, end_time): method cal_dev_score (line 35) | def cal_dev_score(self, score, indicator): method update_indicator (line 51) | def update_indicator(self, indicator, score): method draw_graph (line 70) | def draw_graph(self, cp): method performance_check (line 74) | def performance_check(self, cp, config): method print_size_of_model (line 80) | def print_size_of_model(self, model): method move2device (line 85) | def move2device(self, sample, device): method save_model (line 106) | def save_model(self, config, cp, pco): function pytorch_cos_sim (line 131) | def pytorch_cos_sim(a, b): FILE: KoSimCSE/SemanticSearch.py function main (line 6) | def main(): FILE: KoSimCSE/apex/RNN/RNNBackend.py function is_iterable (line 10) | def is_iterable(maybe_iterable): function flatten_list (line 14) | def flatten_list(tens_list): class bidirectionalRNN (line 25) | class bidirectionalRNN(nn.Module): method __init__ (line 29) | def __init__(self, inputRNN, num_layers=1, dropout = 0): method forward (line 37) | def forward(self, input, collect_hidden=False): method reset_parameters (line 52) | def reset_parameters(self): method init_hidden (line 59) | def init_hidden(self, bsz): method detach_hidden (line 66) | def detach_hidden(self): method reset_hidden (line 73) | def reset_hidden(self, bsz): method init_inference (line 80) | def init_inference(self, bsz): class stackedRNN (line 90) | class stackedRNN(nn.Module): method __init__ (line 94) | def __init__(self, inputRNN, num_layers=1, dropout=0): method forward (line 122) | def forward(self, input, collect_hidden=False, reverse=False): method reset_parameters (line 197) | def reset_parameters(self): method init_hidden (line 204) | def init_hidden(self, bsz): method detach_hidden (line 211) | def detach_hidden(self): method reset_hidden (line 218) | def reset_hidden(self, bsz): method init_inference (line 225) | def init_inference(self, bsz): class RNNCell (line 232) | class RNNCell(nn.Module): method __init__ (line 242) | def __init__(self, gate_multiplier, input_size, hidden_size, cell, n_h... method new_like (line 274) | def new_like(self, new_input_size=None): method reset_parameters (line 291) | def reset_parameters(self, gain=1): method init_hidden (line 309) | def init_hidden(self, bsz): method reset_hidden (line 330) | def reset_hidden(self, bsz): method detach_hidden (line 338) | def detach_hidden(self): method forward (line 348) | def forward(self, input): FILE: KoSimCSE/apex/RNN/cells.py class mLSTMRNNCell (line 12) | class mLSTMRNNCell(RNNCell): method __init__ (line 17) | def __init__(self, input_size, hidden_size, bias = False, output_size ... method forward (line 26) | def forward(self, input): method new_like (line 45) | def new_like(self, new_input_size=None): function mLSTMCell (line 55) | def mLSTMCell(input, hidden, w_ih, w_hh, w_mih, w_mhh, b_ih=None, b_hh=N... FILE: KoSimCSE/apex/RNN/models.py function toRNNBackend (line 8) | def toRNNBackend(inputRNN, num_layers, bidirectional=False, dropout = 0): function LSTM (line 19) | def LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=Fal... function GRU (line 26) | def GRU(input_size, hidden_size, num_layers, bias=True, batch_first=Fals... function ReLU (line 33) | def ReLU(input_size, hidden_size, num_layers, bias=True, batch_first=Fal... function Tanh (line 40) | def Tanh(input_size, hidden_size, num_layers, bias=True, batch_first=Fal... function mLSTM (line 47) | def mLSTM(input_size, hidden_size, num_layers, bias=True, batch_first=Fa... FILE: KoSimCSE/apex/amp/_amp_state.py class AmpState (line 18) | class AmpState(object): method __init__ (line 19) | def __init__(self): function warn_or_err (line 29) | def warn_or_err(msg): function maybe_print (line 39) | def maybe_print(msg, rank0=False): function master_params (line 60) | def master_params(optimizer): FILE: KoSimCSE/apex/amp/_initialize.py function to_type (line 21) | def to_type(dtype, t): function applier (line 39) | def applier(value, fn): function check_models (line 64) | def check_models(models): function check_params_fp32 (line 79) | def check_params_fp32(models): function check_optimizers (line 119) | def check_optimizers(optimizers): class O2StateDictHook (line 133) | class O2StateDictHook(object): method __init__ (line 134) | def __init__(self, fn): method __call__ (line 137) | def __call__(self, module, state_dict, prefix, local_metadata): function _initialize (line 145) | def _initialize(models, optimizers, properties, num_losses=1, cast_model... FILE: KoSimCSE/apex/amp/_process_optimizer.py class AmpOptimizerState (line 9) | class AmpOptimizerState(object): method __init__ (line 10) | def __init__(self): function _master_params_to_model_params (line 14) | def _master_params_to_model_params(self): function lazy_init_with_master_weights (line 28) | def lazy_init_with_master_weights(self): function post_backward_models_are_masters (line 93) | def post_backward_models_are_masters(scaler, params, stashed_grads, scal... function prepare_backward_with_master_weights (line 142) | def prepare_backward_with_master_weights(self): function post_backward_with_master_weights (line 161) | def post_backward_with_master_weights(self, scaler): function lazy_init_no_master_weights (line 205) | def lazy_init_no_master_weights(self): function prepare_backward_no_master_weights (line 224) | def prepare_backward_no_master_weights(self): function post_backward_no_master_weights (line 240) | def post_backward_no_master_weights(self, scaler): function prepare_backward_with_master_weights_FusedSGD (line 258) | def prepare_backward_with_master_weights_FusedSGD(self): function post_backward_with_master_weights_FusedSGD (line 277) | def post_backward_with_master_weights_FusedSGD(self, scaler): function prepare_backward_no_master_weights_FusedSGD (line 305) | def prepare_backward_no_master_weights_FusedSGD(self): function post_backward_no_master_weights_FusedSGD (line 309) | def post_backward_no_master_weights_FusedSGD(self, scaler): function _amp_lazy_init (line 313) | def _amp_lazy_init(self): function _process_optimizer (line 321) | def _process_optimizer(optimizer, properties): FILE: KoSimCSE/apex/amp/amp.py function _decorator_helper (line 18) | def _decorator_helper(orig_fn, cast_fn, wrap_fn): function half_function (line 30) | def half_function(fn): function float_function (line 35) | def float_function(fn): function promote_function (line 40) | def promote_function(fn): function register_half_function (line 46) | def register_half_function(module, name): function register_float_function (line 53) | def register_float_function(module, name): function register_promote_function (line 60) | def register_promote_function(module, name): function init (line 68) | def init(enabled=True, loss_scale="dynamic", enable_caching=True, verbos... FILE: KoSimCSE/apex/amp/compat.py function variable_is_tensor (line 4) | def variable_is_tensor(): function tensor_is_variable (line 8) | def tensor_is_variable(): function tensor_is_float_tensor (line 13) | def tensor_is_float_tensor(): function is_tensor_like (line 19) | def is_tensor_like(x): function is_floating_point (line 24) | def is_floating_point(x): function scalar_python_val (line 35) | def scalar_python_val(x): function filter_attrs (line 45) | def filter_attrs(module, attrs): FILE: KoSimCSE/apex/amp/frontend.py class Properties (line 7) | class Properties(object): method __init__ (line 13) | def __init__(self): method _update_options_dict (line 33) | def _update_options_dict(self, new_options): method __getattr__ (line 43) | def __getattr__(self, name): method __setattr__ (line 51) | def __setattr__(self, name, value): class O3 (line 102) | class O3: method __call__ (line 111) | def __call__(self, properties): class O2 (line 124) | class O2: method __call__ (line 134) | def __call__(self, properties): class O1 (line 147) | class O1: method __call__ (line 156) | def __call__(self, properties): class O0 (line 169) | class O0: method __call__ (line 175) | def __call__(self, properties): function initialize (line 195) | def initialize( function state_dict (line 361) | def state_dict(destination=None): function load_state_dict (line 373) | def load_state_dict(state_dict): FILE: KoSimCSE/apex/amp/handle.py function scale_loss (line 17) | def scale_loss(loss, function disable_casts (line 164) | def disable_casts(): class AmpHandle (line 170) | class AmpHandle(object): method __init__ (line 171) | def __init__(self, loss_scale="dynamic", enable_caching=True, verbose=... method is_active (line 179) | def is_active(self): method _disable_casts (line 183) | def _disable_casts(self): method wrap_optimizer (line 188) | def wrap_optimizer(self, optimizer, num_loss=1): method scale_loss (line 193) | def scale_loss(self, loss, optimizer): method _clear_cache (line 226) | def _clear_cache(self): method _save_func (line 230) | def _save_func(self, mod, fn, func): method _deactivate (line 233) | def _deactivate(self): method has_cache (line 239) | def has_cache(self): method cache (line 243) | def cache(self): method remove_cache (line 246) | def remove_cache(self, param): method verbose (line 251) | def verbose(self): class NoOpHandle (line 254) | class NoOpHandle(object): method is_active (line 255) | def is_active(self): method _disable_casts (line 259) | def _disable_casts(self): method wrap_optimizer (line 262) | def wrap_optimizer(self, optimizer, num_loss=1): method scale_loss (line 266) | def scale_loss(self, loss, optimizer): method has_cache (line 270) | def has_cache(self): method verbose (line 274) | def verbose(self): method _clear_cache (line 277) | def _clear_cache(self): method _deactivate (line 280) | def _deactivate(self): FILE: KoSimCSE/apex/amp/opt.py class OptimWrapper (line 9) | class OptimWrapper(object): method __init__ (line 10) | def __init__(self, optimizer, amp_handle, num_loss): method scale_loss (line 19) | def scale_loss(self, loss): method _cur_loss_scaler (line 55) | def _cur_loss_scaler(self): method step (line 59) | def step(self, closure=None): method __getattr__ (line 80) | def __getattr__(self, attr): method __getstate__ (line 84) | def __getstate__(self): method __setstate__ (line 87) | def __setstate__(self): method __repr__ (line 90) | def __repr__(self): method state_dict (line 93) | def state_dict(self): method load_state_dict (line 96) | def load_state_dict(self, state_dict): method zero_grad (line 99) | def zero_grad(self): method add_param_group (line 102) | def add_param_group(self, param_group): FILE: KoSimCSE/apex/amp/rnn_compat.py function _gen_VF_wrapper (line 7) | def _gen_VF_wrapper(name): class VariableFunctionsShim (line 17) | class VariableFunctionsShim(object): method __init__ (line 18) | def __init__(self): function has_old_rnns (line 24) | def has_old_rnns(): function whitelist_rnn_cells (line 31) | def whitelist_rnn_cells(handle, verbose): FILE: KoSimCSE/apex/amp/scaler.py function scale_check_overflow_python (line 6) | def scale_check_overflow_python(model_grad, master_grad, scale, check_ov... function axpby_check_overflow_python (line 19) | def axpby_check_overflow_python(model_grad, stashed_grad, master_grad, a... class LossScaler (line 33) | class LossScaler(object): method __init__ (line 38) | def __init__(self, method loss_scale (line 73) | def loss_scale(self): method unscale_python (line 76) | def unscale_python(self, model_grads, master_grads, scale): method unscale (line 94) | def unscale(self, model_grads, master_grads, unused_scale, models_are_... method unscale_with_stashed_python (line 126) | def unscale_with_stashed_python(self, method unscale_with_stashed (line 152) | def unscale_with_stashed(self, method clear_overflow_state (line 191) | def clear_overflow_state(self): method update_scale (line 197) | def update_scale(self): FILE: KoSimCSE/apex/amp/utils.py function is_cuda_enabled (line 8) | def is_cuda_enabled(): function get_cuda_version (line 11) | def get_cuda_version(): function is_fp_tensor (line 14) | def is_fp_tensor(x): function is_nested (line 23) | def is_nested(x): function should_cache (line 26) | def should_cache(x): function collect_fp_tensor_types (line 36) | def collect_fp_tensor_types(args, kwargs): function type_string (line 51) | def type_string(x): function maybe_half (line 54) | def maybe_half(x, name='', verbose=False): function maybe_float (line 65) | def maybe_float(x, name='', verbose=False): function casted_args (line 77) | def casted_args(cast_fn, args, kwargs): function cached_cast (line 90) | def cached_cast(cast_fn, x, cache): function verbosify (line 124) | def verbosify(cast_fn, fn_name, verbose): function as_inplace (line 130) | def as_inplace(fns): function has_func (line 134) | def has_func(mod, fn): function get_func (line 140) | def get_func(mod, fn): function set_func (line 146) | def set_func(mod, fn, new_fn): function set_func_save (line 152) | def set_func_save(handle, mod, fn, new_fn): function synthesize_flattened_rnn_weights (line 171) | def synthesize_flattened_rnn_weights(fp32_weights, function new_synthesize_flattened_rnn_weights (line 194) | def new_synthesize_flattened_rnn_weights(fp32_weights, FILE: KoSimCSE/apex/amp/wrap.py function make_cast_wrapper (line 10) | def make_cast_wrapper(orig_fn, cast_fn, handle, function cached_cast (line 31) | def cached_cast(mod, fn, cast_fn, handle, function make_promote_wrapper (line 44) | def make_promote_wrapper(orig_fn, cast_fn, handle=None): function promote (line 65) | def promote(mod, fn, handle, verbose=False): function sequence_promote (line 71) | def sequence_promote(mod, fn, handle, verbose=False): function promote_match_arg0 (line 92) | def promote_match_arg0(mod, fn, handle, verbose=False): function err_if_any_half (line 114) | def err_if_any_half(mod, fn, handle, custom_err_msg=None): function err_if_arg0_half (line 132) | def err_if_arg0_half(mod, fn, handle, verbose=False): function rnn_cast (line 157) | def rnn_cast(backend, fn, handle, verbose=False): function new_rnn_cast (line 222) | def new_rnn_cast(fn, handle, verbose=False): function disable_casts (line 267) | def disable_casts(mod, fn, handle): FILE: KoSimCSE/apex/contrib/bottleneck/bottleneck.py function kaiming_uniform_ (line 5) | def kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_rel... class FrozenBatchNorm2d (line 9) | class FrozenBatchNorm2d(torch.nn.Module): method __init__ (line 13) | def __init__(self, n): method get_scale_bias (line 20) | def get_scale_bias(self, nhwc=False): method forward (line 31) | def forward(self, x): function drelu_dscale1 (line 37) | def drelu_dscale1(grad_o, output, scale1): function drelu_dscale2 (line 44) | def drelu_dscale2(grad_o, output, scale1, scale2): class BottleneckFunction (line 51) | class BottleneckFunction(torch.autograd.Function): method forward (line 53) | def forward(ctx, nhwc, stride_1x1, scale, bias, x, *conv): method backward (line 75) | def backward(ctx, grad_o): function conv3x3 (line 102) | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): function conv1x1 (line 107) | def conv1x1(in_planes, out_planes, stride=1): class Bottleneck (line 111) | class Bottleneck(torch.nn.Module): method __init__ (line 119) | def __init__(self, in_channels, bottleneck_channels, out_channels, str... method forward (line 174) | def forward(self, x): FILE: KoSimCSE/apex/contrib/csrc/bottleneck/bottleneck.cpp function checkCudnnError (line 31) | int checkCudnnError(cudnnStatus_t code, const char* expr, const char* fi... function checkError (line 42) | void checkError(cudaError_t code, char const * func, const char *file, c... function generateStrides (line 55) | void generateStrides(const int64_t* dimA, int64_t* strideA, int nbDims, ... function getFwdConvDilatedFilterDim (line 75) | int getFwdConvDilatedFilterDim(int filterDim, int dilation) { function getFwdConvPaddedImageDim (line 79) | int getFwdConvPaddedImageDim(int tensorDim, int pad) { function getFwdConvOutputDim (line 83) | int getFwdConvOutputDim( function common_conv_descriptors (line 111) | common_conv_descriptors function common_convbias_descriptors (line 173) | common_convbias_descriptors function dconv_descriptors (line 294) | dconv_descriptors function getConvFusionString (line 377) | std::string getConvFusionString(int64_t* x_dim_padded, function run_conv_scale_bias_add_activation (line 469) | void function run_conv_scale_bias (line 630) | void function run_dconv_drelu_dscale (line 759) | void function run_dconv (line 886) | void function run_dconv_add (line 992) | void function bottleneck_forward (line 1104) | std::vector bottleneck_forward(bool explicit_nhwc, int strid... function bottleneck_backward (line 1287) | std::vector bottleneck_backward(bool explicit_nhwc, int stri... function PYBIND11_MODULE (line 1609) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/fmha/fmha_api.cpp function set_params (line 33) | void set_params(Fused_multihead_attention_fprop_params ¶ms, function mha_fwd (line 86) | std::vector function mha_bwd (line 182) | std::vector function mha_fwd_nl (line 262) | std::vector mha_fwd_nl(const at::Tensor &qkv, // tot... function mha_bwd_nl (line 342) | std::vector mha_bwd_nl(const at::Tensor &dout, // tot... function PYBIND11_MODULE (line 426) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha.h type Qkv_params (line 46) | struct Qkv_params { function Qkv_params (line 59) | struct Fused_multihead_attention_fprop_params : public Qkv_params { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/gemm.h function namespace (line 34) | namespace fmha { type Fragment_accumulator (line 145) | struct Fragment_accumulator function add (line 152) | void add(const Other_fragment_ &other) { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/gmem_tile.h function namespace (line 30) | namespace fmha { function __device__ (line 112) | inline __device__ void store(const uint4 (&data)[LDGS]) { function __device__ (line 123) | inline __device__ void move() { function __device__ (line 201) | inline __device__ void store(const uint4 (&src)[STGS_PER_LOOP], int mi) { function __device__ (line 222) | inline __device__ void move() { function __device__ (line 273) | __device__ Gmem_tile_mma_sd(void *ptr, const Params ¶ms, const int t... function __device__ (line 288) | inline __device__ void store(const Type &data, const int mi, const int n... function __device__ (line 300) | inline __device__ void move() { function Base (line 311) | struct Gmem_tile_mma_s : public Base { function Base (line 404) | struct Gmem_tile_dq : public Base { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/mask.h function namespace (line 30) | namespace fmha { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/smem_tile.h function namespace (line 33) | namespace fmha { function __device__ (line 396) | inline __device__ Smem_tile_row_a(void *smem, int tidx) : Base(smem, tid... function __device__ (line 462) | inline __device__ void reset_read_offset() { function __device__ (line 494) | inline __device__ Smem_tile_a(void *smem, int tidx) : Base(smem, tidx) { function __device__ (line 581) | inline __device__ Smem_tile_col_b(void *smem, int tidx) : Base(smem, tid... function __device__ (line 653) | inline __device__ void reset_read_offset() { function __device__ (line 685) | inline __device__ Smem_tile_b(void *smem, int tidx) : Base(smem, tidx) { function __device__ (line 748) | inline __device__ Smem_tile_row_b(void *smem, int tidx) : Base(smem, tid... function __device__ (line 892) | inline __device__ Smem_tile_b(void *smem, int tidx) : Base(smem, tidx) { function __device__ (line 912) | inline __device__ Smem_tile_v(void *smem, int tidx) : Base(smem, tidx) { function __device__ (line 1003) | inline __device__ Smem_tile_o(void *smem, int tidx) { function store (line 1057) | void store(const Accumulator (&acc)[M][N], int mi) { function __device__ (line 1129) | inline __device__ Smem_tile_mma(char *smem, int tidx) { function store (line 1147) | void store(const uint4 (®s)[M][N]) { function __device__ (line 1177) | inline __device__ Smem_tile_mma_transposed(char *smem, int tidx) : Base(... function load (line 1189) | void load(Fragment (&frag)[M][N]) { function __device__ (line 1223) | inline __device__ Smem_tile_mma_epilogue(char *smem, int tidx) : Base(sm... function store (line 1238) | void store(const Acc (&acc)[M][N]){ function store (line 1272) | void store(const uint4 (®s)[M][N]) { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/softmax.h function namespace (line 30) | namespace fmha { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/utils.h function namespace (line 38) | namespace fmha { function __device__ (line 247) | static inline __device__ uint32_t hadd2(uint32_t a, uint32_t b) { function __device__ (line 255) | static inline __device__ uint32_t hmin2(uint32_t a, uint32_t b) { function __device__ (line 263) | static inline __device__ uint32_t hmul2(uint32_t a, uint32_t b) { function __device__ (line 271) | static inline __device__ uint2 hmul4(uint2 a, uint2 b) { function __device__ (line 280) | static inline __device__ uint4 hmul8(uint4 a, uint4 b) { function __device__ (line 291) | static inline __device__ uint4 hmul8(uint32_t a, uint4 b) { function __device__ (line 317) | static inline __device__ uint32_t habs2(uint32_t x) { function __device__ (line 332) | static inline __device__ uint16_t clamp_to_zero(uint16_t x) { function __device__ (line 340) | static inline __device__ uint16_t float_to_half(float f) { function __device__ (line 348) | static inline __device__ uint32_t float2_to_half2(float a, float b) { function __device__ (line 362) | static inline __device__ uint32_t float_to_half2(float a) { function __device__ (line 368) | static inline __device__ uint32_t float2_to_half2(const float2 &f) { function __device__ (line 374) | static inline __device__ uint2 float4_to_half4(float x, float y, float z... function __device__ (line 383) | static inline __device__ uint32_t hfma2(uint32_t a, uint32_t b, uint32_t... function __device__ (line 391) | static inline __device__ uint32_t hfma2_relu(uint32_t a, uint32_t b, uin... function __device__ (line 403) | static inline __device__ uint32_t h0_h0(uint32_t x) { function __device__ (line 412) | static inline __device__ float h0_to_float(uint32_t h2) { function __device__ (line 424) | static inline __device__ uint32_t h1_h1(uint32_t x) { function __device__ (line 433) | static inline __device__ uint16_t hadd(uint16_t a, uint16_t b) { function __device__ (line 441) | static inline __device__ uint32_t hadd(uint32_t a, uint32_t b) { function __device__ (line 447) | static inline __device__ uint2 hadd4(uint2 a, uint2 b) { function __device__ (line 456) | static inline __device__ uint2 hadd(uint2 a, uint2 b) { function __device__ (line 462) | static inline __device__ uint4 hadd8(uint4 a, uint4 b) { function __device__ (line 473) | static inline __device__ uint4 fadd4(uint4 a, uint4 b) { function __device__ (line 484) | static inline __device__ uint4 hadd(uint4 a, uint4 b) { function __device__ (line 490) | static inline __device__ float half_to_float(uint16_t h) { function __device__ (line 498) | static inline __device__ float2 half2_to_float2(uint32_t x) { function __device__ (line 514) | static inline __device__ uint16_t hfma(uint16_t a, uint16_t b, uint16_t ... function __device__ (line 522) | static inline __device__ uint16_t hmul(uint16_t a, uint16_t b) { function __device__ (line 530) | static inline __device__ float sigmoid(float x) { function __device__ (line 685) | inline __device__ Ldg_functor(Data_type (&fetch)[N], const void* (&ptrs)... function __device__ (line 690) | inline __device__ void clear(int ii) { function __device__ (line 695) | inline __device__ void load(int ii, bool p) { function __device__ (line 847) | inline __device__ void stg(void *ptr, uint8_t val) { function __device__ (line 853) | inline __device__ void stg(void *ptr, uint16_t val) { function __device__ (line 859) | inline __device__ void stg(void *ptr, uint32_t val) { function __device__ (line 865) | inline __device__ void stg(void *ptr, uint2 val) { function __device__ (line 871) | inline __device__ void stg(void *ptr, uint4 val) { function __device__ (line 881) | inline __device__ void sts(uint32_t ptr, uint16_t val) { function __device__ (line 887) | inline __device__ void sts(uint32_t ptr, uint32_t val) { function __device__ (line 893) | inline __device__ void sts(uint32_t ptr, uint2 val) { function __device__ (line 903) | inline __device__ void sts(uint32_t ptr, uint4 val) { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_dgrad_kernel_1xN_reload.h function namespace (line 34) | namespace fmha { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_dgrad_kernel_1xN_reload_nl.h function namespace (line 34) | namespace fmha { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN.h function namespace (line 34) | namespace fmha { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN_nl.h function namespace (line 35) | namespace fmha { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN_reload_v.h function namespace (line 34) | namespace fmha { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_kernel.h function namespace (line 39) | namespace fmha { function __device__ (line 90) | inline __device__ Noloop_traits(const int bidc) function move_all (line 96) | void move_all(Tiles & ... tiles) const { function __device__ (line 113) | inline __device__ int offset_loop_count(const int l) { function __device__ (line 157) | inline __device__ int offset_loop_count(const int l) { FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_utils.h type Data_type (line 53) | enum Data_type { DATA_TYPE_FP16, DATA_TYPE_FP32, DATA_TYPE_INT32, DATA_T... function set_alpha (line 57) | static inline void set_alpha( uint32_t &alpha, float norm, Data_type dty... function get_size_in_bytes (line 75) | static inline size_t get_size_in_bytes( size_t n, Data_type dtype ) { FILE: KoSimCSE/apex/contrib/csrc/groupbn/batch_norm.h function class (line 41) | class NhwcBatchNorm { function createTensorDescriptor (line 193) | void createTensorDescriptor(cudnnTensorDescriptor_t *descriptor) { function destroyTensorDescriptor (line 199) | void destroyTensorDescriptor(cudnnTensorDescriptor_t descriptor) { type StorageType (line 223) | typedef uint16_t StorageType; function _fwdKernelLauncher (line 258) | void _fwdKernelLauncher(cudaStream_t stream, NhwcBatchNormFwdParams params, function _bwdKernelLauncher (line 338) | void _bwdKernelLauncher(cudaStream_t stream, NhwcBatchNormBwdParams params, function smem_driven_bwd_occupancy (line 469) | static int smem_driven_bwd_occupancy(int device_id, const int max_cta_pe... function std (line 478) | const std::vector NhwcBatchNorm::numWorkspaceBytes() const { function _setFwdParams (line 510) | void NhwcBatchNorm::_setFwdParams(NhwcBatchNormFwdParams *params) const { function _setFwdInferenceParams (line 534) | void NhwcBatchNorm::_setFwdInferenceParams(NhwcBatchNormFwdInferenceParams function _setBwdParams (line 548) | void NhwcBatchNorm::_setBwdParams(NhwcBatchNormBwdParams *params) const { function fwdInference (line 569) | void NhwcBatchNorm::fwdInference(cudaStream_t stream, bool use_relu) { function dim3 (line 612) | dim3 NhwcBatchNorm::calc_fwd_grid(int *loop, const int grid_dim_x) { function dim3 (line 635) | dim3 NhwcBatchNorm::calc_bwd_grid(int *loop, const int grid_dim_x) { function fwd (line 658) | void NhwcBatchNorm::fwd(cudaStream_t stream, bool use_relu, void* my_dat... function dgrad (line 697) | void NhwcBatchNorm::dgrad(cudaStream_t stream, bool use_relu, void* my_d... FILE: KoSimCSE/apex/contrib/csrc/groupbn/batch_norm_add_relu.h function class (line 41) | class NhwcBatchNormAddRelu { function createTensorDescriptor (line 197) | void createTensorDescriptor(cudnnTensorDescriptor_t *descriptor) { function destroyTensorDescriptor (line 203) | void destroyTensorDescriptor(cudnnTensorDescriptor_t descriptor) { type StorageType (line 228) | typedef uint16_t StorageType; function _fwdKernelLauncher (line 262) | void _fwdKernelLauncher(cudaStream_t stream, NhwcBatchNormFwdParams params, function _bwdKernelLauncher (line 332) | void _bwdKernelLauncher(cudaStream_t stream, NhwcBatchNormBwdParams params, function smem_driven_bwd_occupancy (line 409) | static int smem_driven_bwd_occupancy(int device_id, const int max_cta_pe... function std (line 418) | const std::vector NhwcBatchNormAddRelu::numWorkspaceBytes() const { function _setFwdParams (line 456) | void NhwcBatchNormAddRelu::_setFwdParams(NhwcBatchNormFwdParams *params)... function _setFwdInferenceParams (line 480) | void NhwcBatchNormAddRelu::_setFwdInferenceParams(NhwcBatchNormFwdInfere... function _setBwdParams (line 494) | void NhwcBatchNormAddRelu::_setBwdParams(NhwcBatchNormBwdParams *params)... function fwdInference (line 515) | void NhwcBatchNormAddRelu::fwdInference(cudaStream_t stream) { function dim3 (line 552) | dim3 NhwcBatchNormAddRelu::calc_fwd_grid(int *loop, const int grid_dim_x) { function dim3 (line 575) | dim3 NhwcBatchNormAddRelu::calc_bwd_grid(int *loop, const int grid_dim_x) { function fwd (line 598) | void NhwcBatchNormAddRelu::fwd(cudaStream_t stream, void* my_data, void*... function dgrad (line 640) | void NhwcBatchNormAddRelu::dgrad(cudaStream_t stream, void* my_data, voi... FILE: KoSimCSE/apex/contrib/csrc/groupbn/cuda_utils.h function namespace (line 5) | namespace at { FILE: KoSimCSE/apex/contrib/csrc/groupbn/interface.cpp function PYBIND11_MODULE (line 154) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/groupbn/nhwc_batch_norm_kernel.h type T (line 43) | typedef T Type; type Type (line 51) | typedef int Type; function DEVICE_FUNCTION (line 247) | DEVICE_FUNCTION void write_to_gmem(float *gmem, int idx, const float (&s... function DEVICE_FUNCTION (line 253) | DEVICE_FUNCTION void write_to_gmem(float *gmem, int idx, const float (&s... function DEVICE_FUNCTION (line 259) | DEVICE_FUNCTION void scaled_write_to_gmem(float *gmem, int idx, const fl... function DEVICE_FUNCTION (line 265) | DEVICE_FUNCTION void write_to_smem(float *smem, int idx, const float (&x... function DEVICE_FUNCTION (line 271) | DEVICE_FUNCTION void write_to_smem(int *smem, int idx, const int (&x)[1]) { function DEVICE_FUNCTION (line 277) | DEVICE_FUNCTION void write_to_smem(float *smem, int idx, const float (&x... function DEVICE_FUNCTION (line 283) | DEVICE_FUNCTION void write_to_smem(int *smem, int idx, const int (&x)[2]) { function Storage (line 351) | Storage relu(Storage in) { function parallel_sums (line 544) | void parallel_sums(float *smem, float (&x)[ELEMENTS_PER_LDG], int nhw) { type ParallelSums (line 637) | struct ParallelSums type ParallelSums (line 650) | struct ParallelSums function div_up (line 661) | static inline int div_up(int m, int n) { function DEVICE_FUNCTION (line 668) | DEVICE_FUNCTION void inter_block_sync(int* gmem_retired_ctas, int expect... type NhwcBatchNormFwdInferenceParams (line 697) | struct NhwcBatchNormFwdInferenceParams { type NhwcBatchNormFwdParams (line 799) | struct NhwcBatchNormFwdParams { type PackedStorage (line 870) | typedef PackedStorage PackedStorage_; type typename (line 872) | typedef typename PackedStorage_::Type PackedStorageType; type NhwcBatchNormBwdParams (line 1388) | struct NhwcBatchNormBwdParams { function nhwc_batch_norm_bwd (line 1528) | void nhwc_batch_norm_bwd(NhwcBatchNormBwdParams params) { function nhwc_batch_norm_bwd_relu (line 1892) | void nhwc_batch_norm_bwd_relu(NhwcBatchNormBwdParams params) { function nhwc_batch_norm_bwd_add_relu (line 2280) | void nhwc_batch_norm_bwd_add_relu(NhwcBatchNormBwdParams params) { FILE: KoSimCSE/apex/contrib/csrc/layer_norm/ln_api.cpp function ln_fwd (line 15) | std::vector ln_fwd(const at::Tensor &x, // BxSxhidden_size function ln_bwd (line 58) | std::vector ln_bwd(const at::Tensor &dw, // BxSxhidden_size function PYBIND11_MODULE (line 102) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/additive_masked_softmax_dropout.cpp type multihead_attn (line 5) | namespace multihead_attn { type fused_softmax (line 6) | namespace fused_softmax { type additive_mask_softmax_dropout (line 7) | namespace additive_mask_softmax_dropout { function fwd (line 31) | std::vector fwd( function bwd (line 57) | torch::Tensor bwd( function PYBIND11_MODULE (line 87) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/encdec_multihead_attn.cpp type multihead_attn (line 4) | namespace multihead_attn { type encdec (line 5) | namespace encdec { type cublas_gemmex (line 6) | namespace cublas_gemmex { function fwd (line 43) | std::vector fwd( function bwd (line 88) | std::vector bwd( function PYBIND11_MODULE (line 153) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/encdec_multihead_attn_norm_add.cpp type multihead_attn (line 4) | namespace multihead_attn { type encdec_norm_add (line 5) | namespace encdec_norm_add { type cublas_gemmex (line 6) | namespace cublas_gemmex { function fwd (line 52) | std::vector fwd( function bwd (line 105) | std::vector bwd( function PYBIND11_MODULE (line 194) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/layer_norm.h function rsqrt (line 230) | float rsqrt(float v) { function rsqrt (line 233) | double rsqrt(double v) { function float (line 256) | struct SharedMemory function double (line 266) | struct SharedMemory function stream (line 653) | auto stream = at::cuda::getCurrentCUDAStream().stream(); FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/masked_softmax_dropout.cpp type multihead_attn (line 4) | namespace multihead_attn { type fused_softmax (line 5) | namespace fused_softmax { type mask_softmax_dropout (line 6) | namespace mask_softmax_dropout { function fwd (line 31) | std::vector fwd( function bwd (line 57) | torch::Tensor bwd( function PYBIND11_MODULE (line 89) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/philox.h function class (line 4) | class Philox { function __device__ (line 17) | __device__ inline uint4 operator()() { function __device__ (line 45) | __device__ inline void incr_n(unsigned long long n) { function __device__ (line 58) | __device__ inline void incr() { function mulhilo32 (line 67) | __device__ unsigned int mulhilo32(unsigned int a, unsigned int b, function __device__ (line 72) | __device__ inline uint4 single_round(uint4 ctr, uint2 key) { function __device__ (line 87) | __device__ __inline__ float4 uniform4(uint4 x) { FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/self_multihead_attn.cpp type multihead_attn (line 4) | namespace multihead_attn { type self (line 5) | namespace self { type cublas_gemmex (line 6) | namespace cublas_gemmex { function fwd (line 39) | std::vector fwd( function bwd (line 75) | std::vector bwd( function PYBIND11_MODULE (line 128) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/self_multihead_attn_bias.cpp type multihead_attn (line 4) | namespace multihead_attn { type self_bias (line 5) | namespace self_bias { type cublas_gemmex (line 6) | namespace cublas_gemmex { function fwd (line 43) | std::vector fwd( function bwd (line 82) | std::vector bwd( function PYBIND11_MODULE (line 135) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/self_multihead_attn_bias_additive_mask.cpp type multihead_attn (line 5) | namespace multihead_attn { type self_bias_additive_mask (line 6) | namespace self_bias_additive_mask { type cublas_gemmex (line 7) | namespace cublas_gemmex { function fwd (line 46) | std::vector fwd( function bwd (line 86) | std::vector bwd( function PYBIND11_MODULE (line 139) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/self_multihead_attn_norm_add.cpp type multihead_attn (line 4) | namespace multihead_attn { type self_norm_add (line 5) | namespace self_norm_add { type cublas_gemmex (line 6) | namespace cublas_gemmex { function fwd (line 47) | std::vector fwd( function bwd (line 93) | std::vector bwd( function PYBIND11_MODULE (line 169) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/softmax.h function acc_t (line 139) | acc_t sum[WARP_BATCH] { 0.0f }; function acc_t (line 363) | acc_t sum[WARP_BATCH] { 0.0f }; function additive_masked_softmax_dropout_warp_forward (line 429) | void additive_masked_softmax_dropout_warp_forward(output_t *dst, uint8_t... function softmax_warp_backward (line 2244) | void softmax_warp_backward(__half *gradInput, const __half *grad, const ... function masked_softmax_warp_backward (line 2455) | void masked_softmax_warp_backward(__half *gradInput, const __half *grad,... FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/strided_batched_gemm.h function cublasOperation_t (line 21) | cublasOperation_t convertTransToCublasOperation(char trans) { function CublasStridedBatchedGemm (line 31) | void CublasStridedBatchedGemm(THCState *state, char transa, char transb,... type cutlass (line 78) | typedef cutlass::gemm::Gemm Gemm; function gemm_switch_fp32accum (line 149) | void gemm_switch_fp32accum(THCState *state, char transa, char transb, lo... function adjustLdLevel3 (line 278) | void adjustLdLevel3(char transa, char transb, int64_t m, int64_t n, int6... function HgemmStridedBatched (line 312) | void HgemmStridedBatched(THCState *state, char transa, char transb, long... FILE: KoSimCSE/apex/contrib/csrc/optimizers/fused_adam_cuda.cpp function strided_check_finite (line 20) | void strided_check_finite( function adam (line 29) | void adam(at::Tensor & p, at::Tensor & p_copy, at::Tensor & m, at::Tenso... function reversible_adam (line 43) | void reversible_adam(at::Tensor & p, at::Tensor & p_copy, at::Tensor & m... function maybe_adam_undo (line 57) | void maybe_adam_undo(at::Tensor & overflow_flag, at::Tensor & p, at::Ten... function maybe_cast (line 69) | void maybe_cast(at::Tensor & overflow_flag, at::Tensor & p_in, at::Tenso... function PYBIND11_MODULE (line 78) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/optimizers/fused_lamb_cuda.cpp function PYBIND11_MODULE (line 19) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/optimizers/multi_tensor_distopt_adam.cpp function PYBIND11_MODULE (line 17) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/optimizers/multi_tensor_distopt_lamb.cpp function PYBIND11_MODULE (line 31) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/transducer/transducer_joint.cpp function transducer_joint_forward (line 33) | std::vector transducer_joint_forward( function transducer_joint_backward (line 67) | std::vector transducer_joint_backward( function PYBIND11_MODULE (line 95) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/transducer/transducer_loss.cpp function transducer_loss_forward (line 35) | std::vector transducer_loss_forward( function transducer_loss_backward (line 65) | torch::Tensor transducer_loss_backward( function PYBIND11_MODULE (line 106) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/csrc/xentropy/interface.cpp function softmax_xentropy_forward (line 24) | std::vector softmax_xentropy_forward( function softmax_xentropy_backward (line 35) | at::Tensor softmax_xentropy_backward( function PYBIND11_MODULE (line 49) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { FILE: KoSimCSE/apex/contrib/fmha/fmha.py class FMHAFun (line 33) | class FMHAFun(torch.autograd.Function): method forward (line 35) | def forward(ctx, qkv, cu_seqlens, p_dropout, max_s, is_training): method backward (line 48) | def backward(ctx, dout): class FMHA (line 58) | class FMHA(torch.nn.Module): method __init__ (line 60) | def __init__(self, config): method forward (line 70) | def forward(self, qkv, cu_seqlens, max_s, is_training=True): FILE: KoSimCSE/apex/contrib/groupbn/batch_norm.py class bn_NHWC_impl (line 7) | class bn_NHWC_impl(torch.autograd.Function): method forward (line 9) | def forward(ctx, x, s, b, rm, riv, mini_m, mini_riv, ret_cta, mom, eps... method backward (line 32) | def backward(ctx, grad_y): class bn_addrelu_NHWC_impl (line 53) | class bn_addrelu_NHWC_impl(torch.autograd.Function): method forward (line 55) | def forward(ctx, x, z, s, b, rm, riv, mini_m, mini_riv, grid_dim_y, re... method backward (line 78) | def backward(ctx, grad_y): class BatchNorm2d_NHWC (line 101) | class BatchNorm2d_NHWC(_BatchNorm): method __init__ (line 103) | def __init__(self, num_features, fuse_relu=False, bn_group=1, max_cta_... method forward (line 196) | def forward(self, x, z=None): method __del__ (line 219) | def __del__(self): FILE: KoSimCSE/apex/contrib/layer_norm/layer_norm.py class FastLayerNormFN (line 6) | class FastLayerNormFN(torch.autograd.Function): method forward (line 8) | def forward(ctx, x, gamma, beta, epsilon): method backward (line 19) | def backward(ctx, dy): class FastLayerNorm (line 31) | class FastLayerNorm(torch.nn.Module): method __init__ (line 32) | def __init__(self, hidden_size, eps=1e-5): method reset_parameters (line 39) | def reset_parameters(self): method forward (line 43) | def forward(self, x): FILE: KoSimCSE/apex/contrib/multihead_attn/encdec_multihead_attn.py function jit_dropout_add (line 19) | def jit_dropout_add(x, residual, prob, is_training): class EncdecMultiheadAttn (line 26) | class EncdecMultiheadAttn(nn.Module): method __init__ (line 31) | def __init__(self, embed_dim, num_heads, dropout=0., bias=False, inclu... method reset_parameters (line 79) | def reset_parameters(self): method forward (line 98) | def forward(self, query, key, value, key_padding_mask=None, need_weigh... FILE: KoSimCSE/apex/contrib/multihead_attn/encdec_multihead_attn_func.py class EncdecAttnFunc (line 5) | class EncdecAttnFunc(torch.autograd.Function): method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, scale, inputs_q, i... method backward (line 135) | def backward(ctx, output_grads): FILE: KoSimCSE/apex/contrib/multihead_attn/fast_encdec_multihead_attn_func.py class FastEncdecAttnFunc (line 5) | class FastEncdecAttnFunc(torch.autograd.Function): method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, inputs_q, inputs_k... method backward (line 50) | def backward(ctx, output_grads): FILE: KoSimCSE/apex/contrib/multihead_attn/fast_encdec_multihead_attn_norm_add_func.py class FastEncdecAttnNormAddFunc (line 12) | class FastEncdecAttnNormAddFunc(torch.autograd.Function): method forward (line 14) | def forward(ctx, use_time_mask, is_training, heads, inputs_q, inputs_k... method backward (line 69) | def backward(ctx, output_grads): FILE: KoSimCSE/apex/contrib/multihead_attn/fast_self_multihead_attn_func.py class FastSelfAttnFunc (line 6) | class FastSelfAttnFunc(torch.autograd.Function) : method forward (line 8) | def forward(ctx, use_time_mask, is_training, heads, inputs, input_weig... method backward (line 120) | def backward(ctx, output_grads): FILE: KoSimCSE/apex/contrib/multihead_attn/fast_self_multihead_attn_norm_add_func.py class FastSelfAttnNormAddFunc (line 5) | class FastSelfAttnNormAddFunc(torch.autograd.Function): method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, inputs, lyr_nrm_ga... method backward (line 56) | def backward(ctx, output_grads): FILE: KoSimCSE/apex/contrib/multihead_attn/mask_softmax_dropout_func.py class MaskSoftmaxDropout (line 6) | class MaskSoftmaxDropout(torch.autograd.Function) : method forward (line 8) | def forward(ctx, is_training, heads, inputs, pad_mask, mask_additive, ... method backward (line 51) | def backward(ctx, output_grads): FILE: KoSimCSE/apex/contrib/multihead_attn/self_multihead_attn.py function jit_dropout_add (line 19) | def jit_dropout_add(x, residual, prob, is_training): class SelfMultiheadAttn (line 26) | class SelfMultiheadAttn(nn.Module): method __init__ (line 31) | def __init__(self, embed_dim, num_heads, dropout=0., bias=False, inclu... method reset_parameters (line 97) | def reset_parameters(self): method forward (line 124) | def forward(self, query, key, value, key_padding_mask=None, need_weigh... FILE: KoSimCSE/apex/contrib/multihead_attn/self_multihead_attn_func.py class SelfAttnFunc (line 4) | class SelfAttnFunc(torch.autograd.Function): method forward (line 6) | def forward(ctx, use_time_mask, is_training, heads, scale, inputs, method backward (line 121) | def backward(ctx, output_grads): FILE: KoSimCSE/apex/contrib/optimizers/distributed_fused_adam.py class DistributedFusedAdam (line 9) | class DistributedFusedAdam(torch.optim.Optimizer): method __init__ (line 55) | def __init__(self, params, method _first_step_init (line 128) | def _first_step_init(self): method _init_everything (line 373) | def _init_everything(self): method set_last_step (line 378) | def set_last_step(self, last_step): method _get_flush_block (line 381) | def _get_flush_block(self): method _pipeline_block_reductions (line 397) | def _pipeline_block_reductions(self, block_id): method __launch_step_kernel (line 443) | def __launch_step_kernel(self): method _pipeline_step (line 469) | def _pipeline_step(self): method _flatten_grad_mt (line 479) | def _flatten_grad_mt(self, scale): method _do_overlapped_reduction (line 489) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of... method set_global_scale (line 504) | def set_global_scale(self, global_scale): method global_scale (line 510) | def global_scale(self): method has_overflow (line 514) | def has_overflow(self): method peek_overflow (line 523) | def peek_overflow(self): method strided_check_finite (line 529) | def strided_check_finite(self, output_params, stride=1, start=-1, end=... method L2_grad_norm (line 545) | def L2_grad_norm(self): method complete_reductions (line 552) | def complete_reductions(self): method step (line 577) | def step(self, closure=None): method state_dict (line 598) | def state_dict(self): method load_state_dict (line 615) | def load_state_dict(self, state_dict): FILE: KoSimCSE/apex/contrib/optimizers/distributed_fused_adam_v2.py class DistributedFusedAdamV2 (line 7) | class DistributedFusedAdamV2(torch.optim.Optimizer): method __init__ (line 43) | def __init__(self, params, method set_last_step (line 351) | def set_last_step(self, last_step): method _get_flush_block (line 354) | def _get_flush_block(self): method _pipeline_block_reductions (line 370) | def _pipeline_block_reductions(self, block_id): method __launch_step_kernel (line 406) | def __launch_step_kernel(self, p, p_copy, m, v, g): method _pipeline_block_step (line 425) | def _pipeline_block_step(self, block_id): method _pipeline_step (line 445) | def _pipeline_step(self): method _flatten_grad_mt (line 460) | def _flatten_grad_mt(self, scale): method _do_overlapped_reduction (line 470) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of... method set_global_scale (line 487) | def set_global_scale(self, global_scale): method global_scale (line 493) | def global_scale(self): method has_overflow (line 497) | def has_overflow(self): method peek_overflow (line 506) | def peek_overflow(self): method strided_check_finite (line 512) | def strided_check_finite(self, output_params, stride=1, start=-1, end=... method L2_grad_norm (line 528) | def L2_grad_norm(self): method complete_reductions (line 535) | def complete_reductions(self): method revert_step (line 560) | def revert_step(self): method step (line 586) | def step(self, closure=None, skip_overflow_check=False): FILE: KoSimCSE/apex/contrib/optimizers/distributed_fused_adam_v3.py class DistributedFusedAdamV3 (line 7) | class DistributedFusedAdamV3(torch.optim.Optimizer): method __init__ (line 43) | def __init__(self, params, method has_overflow (line 196) | def has_overflow(self): method set_last_step (line 199) | def set_last_step(self, last_step): method _get_flush_block (line 202) | def _get_flush_block(self): method __launch_step_kernel (line 218) | def __launch_step_kernel(self, p, p_copy, m, v, g): method _flatten_grad_mt (line 237) | def _flatten_grad_mt(self, scale): method _do_overlapped_reduction (line 247) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of... method set_global_scale (line 268) | def set_global_scale(self, global_scale): method global_scale (line 274) | def global_scale(self): method L2_grad_norm (line 278) | def L2_grad_norm(self): method complete_reductions (line 282) | def complete_reductions(self): method step (line 306) | def step(self, closure=None, skip_overflow_check=False): FILE: KoSimCSE/apex/contrib/optimizers/distributed_fused_lamb.py class DistributedFusedLAMB (line 9) | class DistributedFusedLAMB(torch.optim.Optimizer): class AtomicCounter (line 70) | class AtomicCounter(object): method __init__ (line 71) | def __init__(self): method add (line 77) | def add(self, idx): method __init__ (line 82) | def __init__(self, params, method _lazy_init_stage1 (line 210) | def _lazy_init_stage1(self): method _lazy_init_stage2 (line 330) | def _lazy_init_stage2(self): method set_is_accumulation_step (line 451) | def set_is_accumulation_step(self, is_accumulation_step): method set_last_step (line 454) | def set_last_step(self, last_step): method _get_flush_block (line 457) | def _get_flush_block(self): method _pipeline_block_reductions (line 473) | def _pipeline_block_reductions(self, block_id): method __compute_contrib_param_norm (line 556) | def __compute_contrib_param_norm(self): method __compute_contrib_update_norm (line 569) | def __compute_contrib_update_norm(self): method _pipeline_step (line 577) | def _pipeline_step(self): method _flatten_grad_mt (line 633) | def _flatten_grad_mt(self, scale): method _do_overlapped_reduction (line 651) | def _do_overlapped_reduction(self, param_i, param): method set_global_scale (line 667) | def set_global_scale(self, global_scale): method global_scale (line 673) | def global_scale(self): method L2_grad_norm (line 677) | def L2_grad_norm(self): method complete_reductions (line 681) | def complete_reductions(self): method step (line 704) | def step(self, closure=None, grad_scaler=None): method state_dict (line 740) | def state_dict(self): method load_state_dict (line 757) | def load_state_dict(self, state_dict): FILE: KoSimCSE/apex/contrib/optimizers/fp16_optimizer.py class FP16_Optimizer (line 4) | class FP16_Optimizer(object): method __init__ (line 25) | def __init__(self, method zero_grad (line 79) | def zero_grad(self, set_grads_to_None=True): method step (line 94) | def step(self, closure=None): method backward (line 132) | def backward(self, loss): method _update_scale (line 142) | def _update_scale(self, skip): method _get_state (line 161) | def _get_state(self): method _set_state (line 164) | def _set_state(self, value): method _get_param_groups (line 171) | def _get_param_groups(self): method _set_param_groups (line 174) | def _set_param_groups(self, value): method state_dict (line 179) | def state_dict(self): method load_state_dict (line 202) | def load_state_dict(self, state_dict): FILE: KoSimCSE/apex/contrib/optimizers/fused_adam.py class FusedAdam (line 6) | class FusedAdam(torch.optim.Optimizer): method __init__ (line 38) | def __init__(self, params, method step (line 64) | def step(self, closure=None, grads=None, output_params=None, scale=1.,... FILE: KoSimCSE/apex/contrib/optimizers/fused_lamb.py class FusedLAMB (line 6) | class FusedLAMB(torch.optim.Optimizer): method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True, method zero_grad (line 87) | def zero_grad(self): method step (line 95) | def step(self, closure=None): FILE: KoSimCSE/apex/contrib/optimizers/fused_sgd.py class FusedSGD (line 7) | class FusedSGD(Optimizer): method __init__ (line 66) | def __init__(self, params, lr=required, momentum=0, dampening=0, method __setstate__ (line 93) | def __setstate__(self, state): method get_momentums (line 98) | def get_momentums(self, params): method step (line 115) | def step(self, closure=None, grads=None, output_params=None, scale=1.,... FILE: KoSimCSE/apex/contrib/sparsity/asp.py function eligible_modules (line 12) | def eligible_modules(model, whitelist_layer_types, allowed_layer_names, ... class ASP (line 21) | class ASP: method init_model_for_pruning (line 29) | def init_model_for_pruning(cls, model, mask_calculator="m4n2_1d", method init_optimizer_for_pruning (line 127) | def init_optimizer_for_pruning(cls, optimizer): method compute_sparse_masks (line 155) | def compute_sparse_masks(cls): method restore_pruned_weights (line 176) | def restore_pruned_weights(cls): method is_sparsity_enabled (line 191) | def is_sparsity_enabled(cls): method prune_trained_model (line 212) | def prune_trained_model(cls, model, optimizer): FILE: KoSimCSE/apex/contrib/sparsity/sparse_masklib.py function fill (line 9) | def fill(x): function reshape_1d (line 13) | def reshape_1d(matrix, m): function compute_valid_1d_patterns (line 25) | def compute_valid_1d_patterns(m,n): function mn_1d_best (line 37) | def mn_1d_best(matrix, m, n): function m4n2_1d (line 49) | def m4n2_1d(mat, density): function mn_2d_greedy (line 67) | def mn_2d_greedy(matrix, m, n): function m4n2_2d_greedy (line 98) | def m4n2_2d_greedy(mat, density): function compute_valid_2d_patterns (line 103) | def compute_valid_2d_patterns(m,n): function mn_2d_best (line 122) | def mn_2d_best(matrix, m, n): function m4n2_2d_best (line 140) | def m4n2_2d_best(mat, density): function create_mask (line 145) | def create_mask(tensor, pattern="m4n2_1d", density=0.5): FILE: KoSimCSE/apex/contrib/sparsity/test/checkpointing_test_part1.py function build_model (line 7) | def build_model(args): function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step): function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps): function main (line 38) | def main(args): class Args (line 76) | class Args: FILE: KoSimCSE/apex/contrib/sparsity/test/checkpointing_test_part2.py function build_model (line 7) | def build_model(args): function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step): function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps): function main (line 38) | def main(step, args, model_state_dict, optimizer_state_dict): class Args (line 61) | class Args: FILE: KoSimCSE/apex/contrib/sparsity/test/checkpointing_test_reference.py function build_model (line 11) | def build_model(args): function train_step (line 25) | def train_step(args, model, optimizer, input_batch, target_batch, step): function train_loop (line 35) | def train_loop(args, model, optimizer, step, num_steps): function main (line 42) | def main(args): class Args (line 79) | class Args: FILE: KoSimCSE/apex/contrib/sparsity/test/toy_problem.py function build_model (line 7) | def build_model(args): function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step): function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps): function main (line 38) | def main(args): class Args (line 75) | class Args: FILE: KoSimCSE/apex/contrib/test/fmha/test_fmha.py function py_mha (line 37) | def py_mha(qkv, amask, b, s, h, d): class TestFMHA (line 52) | class TestFMHA(unittest.TestCase): method run_test (line 54) | def run_test(self, s, b): method test_128 (line 106) | def test_128(self): method test_256 (line 109) | def test_256(self): method test_384 (line 112) | def test_384(self): method test_512 (line 115) | def test_512(self): FILE: KoSimCSE/apex/contrib/test/layer_norm/test_fast_layer_norm.py class GPUTimer (line 12) | class GPUTimer: method __init__ (line 13) | def __init__(self, stream): method start (line 17) | def start(self): method stop (line 19) | def stop(self): method sync (line 21) | def sync(self): method millis (line 23) | def millis(self): function size_in_bytes (line 26) | def size_in_bytes(t): function abs_err (line 28) | def abs_err(x, y): class TestFastLayerNorm (line 35) | class TestFastLayerNorm(unittest.TestCase): method setUp (line 37) | def setUp(self, seed=1234): method test_ln_fp32 (line 42) | def test_ln_fp32(self): method test_ln_fp16 (line 44) | def test_ln_fp16(self): method run_test_layer_norm (line 47) | def run_test_layer_norm(self, dtype, atol, rtol=1e-5): method test_performance (line 94) | def test_performance(self): FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_encdec_multihead_attn.py class EncdecMultiheadAttnTest (line 7) | class EncdecMultiheadAttnTest(unittest.TestCase): method setUp (line 8) | def setUp(self, seed=1234): method test_encdec_multihead_attn (line 49) | def test_encdec_multihead_attn(self) : method test_encdec_multihead_attn_time_mask (line 76) | def test_encdec_multihead_attn_time_mask(self) : method test_encdec_multihead_attn_pad_mask (line 105) | def test_encdec_multihead_attn_pad_mask(self) : FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_encdec_multihead_attn_norm_add.py class EncdecMultiheadAttnNormAddTest (line 7) | class EncdecMultiheadAttnNormAddTest(unittest.TestCase): method setUp (line 8) | def setUp(self, seed=1234): method test_encdec_multihead_attn_norm_add (line 49) | def test_encdec_multihead_attn_norm_add(self) : FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_fast_self_multihead_attn_bias.py class SelfMultiheadAttnTest (line 7) | class SelfMultiheadAttnTest(unittest.TestCase): method setUp (line 8) | def setUp(self, seed=1234): method test_self_multihead_attn_additive_mask (line 48) | def test_self_multihead_attn_additive_mask(self) : FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_mha_fused_softmax.py class FusedSoftmaxTest (line 6) | class FusedSoftmaxTest(unittest.TestCase): method setUp (line 7) | def setUp(self, seed=1234): method test_fused_softmax (line 24) | def test_fused_softmax(self) : FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_self_multihead_attn.py class SelfMultiheadAttnTest (line 7) | class SelfMultiheadAttnTest(unittest.TestCase): method setUp (line 8) | def setUp(self, seed=1234): method test_self_multihead_attn (line 45) | def test_self_multihead_attn(self) : method test_self_multihead_attn_time_mask (line 71) | def test_self_multihead_attn_time_mask(self) : method test_self_multihead_attn_pad_mask (line 100) | def test_self_multihead_attn_pad_mask(self) : FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_self_multihead_attn_norm_add.py class SelfMultiheadAttnNormAddTest (line 7) | class SelfMultiheadAttnNormAddTest(unittest.TestCase): method setUp (line 8) | def setUp(self, seed=1234): method test_self_multihead_attn_norm_add (line 45) | def test_self_multihead_attn_norm_add(self) : FILE: KoSimCSE/apex/contrib/test/test_label_smoothing.py function label_smoothing_raw (line 10) | def label_smoothing_raw(x, target, padding_idx, smoothing): function label_smoothing_opt_1 (line 20) | def label_smoothing_opt_1(x, target, padding_idx, smoothing): class LabelSmoothingTest (line 30) | class LabelSmoothingTest(unittest.TestCase): method setUp (line 31) | def setUp(self, seed=1234): method gen_test_inputs (line 40) | def gen_test_inputs(self, N, T, H, smoothing, padding_idx): method print_max_diff_elem (line 50) | def print_max_diff_elem(self, ref, tst): method test_label_smoothing_function (line 57) | def test_label_smoothing_function(self): method test_label_smoothing_perf (line 91) | def test_label_smoothing_perf(self): FILE: KoSimCSE/apex/contrib/test/transducer/test_transducer_joint.py class TransducerJointTest (line 6) | class TransducerJointTest(unittest.TestCase): method setUp (line 7) | def setUp(self, seed=1234): method gen_input (line 11) | def gen_input(self, for_vector_kernel): method _pack (line 41) | def _pack(self, x, f_len, g_len): method _unpack (line 53) | def _unpack(self, x, f_len, g_len): method run_transducer_joint (line 67) | def run_transducer_joint(self, for_vector_kernel, pack_output, relu, d... method test_transducer_joint (line 118) | def test_transducer_joint(self): method test_transducer_joint_vec (line 121) | def test_transducer_joint_vec(self): method test_transducer_joint_pack (line 124) | def test_transducer_joint_pack(self): method test_transducer_joint_vec_pack (line 127) | def test_transducer_joint_vec_pack(self): method test_transducer_joint_relu (line 130) | def test_transducer_joint_relu(self): method test_transducer_joint_vec_relu (line 133) | def test_transducer_joint_vec_relu(self): method test_transducer_joint_pack_relu (line 136) | def test_transducer_joint_pack_relu(self): method test_transducer_joint_vec_pack_relu (line 139) | def test_transducer_joint_vec_pack_relu(self): method test_transducer_joint_relu_dropout (line 142) | def test_transducer_joint_relu_dropout(self): method test_transducer_joint_vec_relu_dropout (line 145) | def test_transducer_joint_vec_relu_dropout(self): method test_transducer_joint_pack_relu_dropout (line 148) | def test_transducer_joint_pack_relu_dropout(self): method test_transducer_joint_vec_pack_relu_dropout (line 151) | def test_transducer_joint_vec_pack_relu_dropout(self): FILE: KoSimCSE/apex/contrib/test/transducer/test_transducer_loss.py class TransducerLossTest (line 6) | class TransducerLossTest(unittest.TestCase): method setUp (line 7) | def setUp(self, seed=1234): method gen_input (line 11) | def gen_input(self, scalar_t, for_vector_kernel): method _pack (line 41) | def _pack(self, x): method _unpack (line 52) | def _unpack(self, x): method run_transducer_loss (line 64) | def run_transducer_loss(self, scalar_t, fuse_softmax_backward, packed_... method test_transducer_loss_fp32 (line 90) | def test_transducer_loss_fp32(self): method test_transducer_loss_fp16 (line 98) | def test_transducer_loss_fp16(self): method test_transducer_loss_fp16_backward_fusion (line 106) | def test_transducer_loss_fp16_backward_fusion(self): method test_transducer_loss_fp16_backward_fusion_packed (line 114) | def test_transducer_loss_fp16_backward_fusion_packed(self): method test_transducer_loss_fp16_backward_fusion_packed_vec (line 122) | def test_transducer_loss_fp16_backward_fusion_packed_vec(self): FILE: KoSimCSE/apex/contrib/test/transducer/transducer_ref.py function transducer_loss_reference (line 5) | def transducer_loss_reference(x, label, f_len, y_len, blank_idx, loss_gr... function transducer_joint_reference (line 79) | def transducer_joint_reference(f, g, h_grad, f_len, g_len, pack_output, ... FILE: KoSimCSE/apex/contrib/transducer/transducer.py class TransducerJoint (line 5) | class TransducerJoint(torch.nn.Module): method __init__ (line 27) | def __init__(self, pack_output=False, relu=False, dropout=False, opt=1... method forward (line 43) | def forward(self, f, g, f_len, g_len, batch_offset=None, packed_batch=0): class TransducerLoss (line 68) | class TransducerLoss(torch.nn.Module): method __init__ (line 81) | def __init__(self, fuse_softmax_backward=True, opt=1, packed_input=Fal... method forward (line 89) | def forward(self, x, label, f_len, y_len, blank_idx, batch_offset=None... class TransducerLossFunc (line 127) | class TransducerLossFunc(torch.autograd.Function): method forward (line 129) | def forward(ctx, x, label, f_len, y_len, batch_offset, max_f_len, blan... method backward (line 149) | def backward(ctx, loss_grad): class TransducerJointFunc (line 158) | class TransducerJointFunc(torch.autograd.Function): method forward (line 160) | def forward(ctx, f, g, f_len, g_len, pack_output, relu, dropout, batch... method backward (line 180) | def backward(ctx, loss_grad): FILE: KoSimCSE/apex/contrib/xentropy/softmax_xentropy.py class SoftmaxCrossEntropyLoss (line 4) | class SoftmaxCrossEntropyLoss(torch.autograd.Function): method forward (line 6) | def forward(ctx, logits, labels, smoothing=0.0, padding_idx=0, half_to... method backward (line 18) | def backward(ctx, grad_loss): FILE: KoSimCSE/apex/fp16_utils/fp16_optimizer.py class FP16_Optimizer (line 13) | class FP16_Optimizer(object): method __init__ (line 14) | def __init__(self, method maybe_print (line 110) | def maybe_print(self, msg): method __getstate__ (line 114) | def __getstate__(self): method __setstate__ (line 117) | def __setstate__(self, state): method zero_grad (line 120) | def zero_grad(self, set_grads_to_None=False): method _master_params_to_model_params (line 160) | def _master_params_to_model_params(self): method clip_master_grads (line 185) | def clip_master_grads(self, max_norm, norm_type=2): method state_dict (line 209) | def state_dict(self): method load_state_dict (line 230) | def load_state_dict(self, state_dict): method step (line 272) | def step(self, closure=None): # could add clip option. method _step_with_closure (line 334) | def _step_with_closure(self, closure): method backward (line 373) | def backward(self, loss, update_master_grads=True, retain_graph=False): method update_master_grads (line 436) | def update_master_grads(self): method inspect_master_grad_data (line 493) | def inspect_master_grad_data(self): method _get_loss_scale (line 528) | def _get_loss_scale(self): method _set_loss_scale (line 531) | def _set_loss_scale(self, value): method _get_state (line 537) | def _get_state(self): method _set_state (line 540) | def _set_state(self, value): method _get_param_groups (line 547) | def _get_param_groups(self): method _set_param_groups (line 550) | def _set_param_groups(self, value): FILE: KoSimCSE/apex/fp16_utils/fp16util.py class tofp16 (line 7) | class tofp16(nn.Module): method __init__ (line 15) | def __init__(self): method forward (line 18) | def forward(self, input): function BN_convert_float (line 22) | def BN_convert_float(module): function network_to_half (line 35) | def network_to_half(network): function convert_module (line 44) | def convert_module(module, dtype): function convert_network (line 60) | def convert_network(network, dtype): class FP16Model (line 73) | class FP16Model(nn.Module): method __init__ (line 78) | def __init__(self, network): method forward (line 82) | def forward(self, *inputs): function backwards_debug_hook (line 87) | def backwards_debug_hook(grad): function prep_param_lists (line 90) | def prep_param_lists(model, flat_master=False): function model_grads_to_master_grads (line 136) | def model_grads_to_master_grads(model_params, master_params, flat_master... function master_params_to_model_params (line 158) | def master_params_to_model_params(model_params, master_params, flat_mast... function to_python_float (line 176) | def to_python_float(t): FILE: KoSimCSE/apex/fp16_utils/loss_scaler.py function to_python_float (line 4) | def to_python_float(t): class LossScaler (line 10) | class LossScaler: method __init__ (line 22) | def __init__(self, scale=1): method has_overflow (line 26) | def has_overflow(self, params): method _has_inf_or_nan (line 30) | def _has_inf_or_nan(x): method update_scale (line 33) | def update_scale(self, overflow): method loss_scale (line 37) | def loss_scale(self): method scale_gradient (line 40) | def scale_gradient(self, module, grad_in, grad_out): method backward (line 43) | def backward(self, loss, retain_graph=False): class DynamicLossScaler (line 47) | class DynamicLossScaler: method __init__ (line 73) | def __init__(self, method has_overflow (line 84) | def has_overflow(self, params): method _has_inf_or_nan (line 92) | def _has_inf_or_nan(x): method update_scale (line 113) | def update_scale(self, overflow): method loss_scale (line 124) | def loss_scale(self): method scale_gradient (line 127) | def scale_gradient(self, module, grad_in, grad_out): method backward (line 130) | def backward(self, loss, retain_graph=False): FILE: KoSimCSE/apex/mlp/mlp.py class MlpFunction (line 8) | class MlpFunction(torch.autograd.Function): method forward (line 10) | def forward(ctx, bias, activation, *args): method backward (line 19) | def backward(ctx, grad_o): class MLP (line 26) | class MLP(torch.nn.Module): method __init__ (line 34) | def __init__(self, mlp_sizes, bias=True, activation='relu'): method reset_parameters (line 64) | def reset_parameters(self): method forward (line 74) | def forward(self, input): method extra_repr (line 77) | def extra_repr(self): FILE: KoSimCSE/apex/multi_tensor_apply/multi_tensor_apply.py class MultiTensorApply (line 3) | class MultiTensorApply(object): method __init__ (line 7) | def __init__(self, chunk_size): method check_avail (line 16) | def check_avail(self): method __call__ (line 24) | def __call__(self, op, noop_flag_buffer, tensor_lists, *args): FILE: KoSimCSE/apex/normalization/fused_layer_norm.py class FusedLayerNormAffineFunction (line 12) | class FusedLayerNormAffineFunction(torch.autograd.Function): method forward (line 15) | def forward(ctx, input, weight, bias, normalized_shape, eps): method backward (line 30) | def backward(ctx, grad_output): class FusedLayerNormFunction (line 39) | class FusedLayerNormFunction(torch.autograd.Function): method forward (line 42) | def forward(ctx, input, normalized_shape, eps): method backward (line 55) | def backward(ctx, grad_output): function fused_layer_norm_affine (line 64) | def fused_layer_norm_affine(input, normalized_shape, weight, bias, eps=1... function fused_layer_norm (line 67) | def fused_layer_norm(input, normalized_shape, eps=1e-6): class FusedLayerNorm (line 70) | class FusedLayerNorm(torch.nn.Module): method __init__ (line 129) | def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True): method reset_parameters (line 148) | def reset_parameters(self): method forward (line 153) | def forward(self, input): method extra_repr (line 163) | def extra_repr(self): FILE: KoSimCSE/apex/optimizers/fused_adagrad.py class FusedAdagrad (line 5) | class FusedAdagrad(torch.optim.Optimizer): method __init__ (line 43) | def __init__(self, params, lr=1e-2, eps=1e-10, method zero_grad (line 59) | def zero_grad(self): method step (line 67) | def step(self, closure=None): FILE: KoSimCSE/apex/optimizers/fused_adam.py class FusedAdam (line 4) | class FusedAdam(torch.optim.Optimizer): method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True, method zero_grad (line 82) | def zero_grad(self): method step (line 90) | def step(self, closure=None, grads=None, output_params=None, scale=Non... FILE: KoSimCSE/apex/optimizers/fused_lamb.py class FusedLAMB (line 4) | class FusedLAMB(torch.optim.Optimizer): method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True, method zero_grad (line 88) | def zero_grad(self): method step (line 96) | def step(self, closure=None): FILE: KoSimCSE/apex/optimizers/fused_novograd.py class FusedNovoGrad (line 4) | class FusedNovoGrad(torch.optim.Optimizer): method __init__ (line 67) | def __init__(self, params, lr=1e-3, bias_correction=True, method zero_grad (line 92) | def zero_grad(self): method load_state_dict (line 100) | def load_state_dict(self, state_dict): method step (line 108) | def step(self, closure=None): FILE: KoSimCSE/apex/optimizers/fused_sgd.py class FusedSGD (line 6) | class FusedSGD(Optimizer): method __init__ (line 76) | def __init__(self, params, lr=required, momentum=0, dampening=0, method __setstate__ (line 108) | def __setstate__(self, state): method zero_grad (line 113) | def zero_grad(self): method get_momentums (line 121) | def get_momentums(self, params): method step (line 138) | def step(self, closure=None): FILE: KoSimCSE/apex/parallel/LARC.py class LARC (line 5) | class LARC(object): method __init__ (line 39) | def __init__(self, optimizer, trust_coefficient=0.02, clip=True, eps=1... method __getstate__ (line 45) | def __getstate__(self): method __setstate__ (line 48) | def __setstate__(self, state): method state (line 52) | def state(self): method __repr__ (line 55) | def __repr__(self): method param_groups (line 59) | def param_groups(self): method param_groups (line 63) | def param_groups(self, value): method state_dict (line 66) | def state_dict(self): method load_state_dict (line 69) | def load_state_dict(self, state_dict): method zero_grad (line 72) | def zero_grad(self): method add_param_group (line 75) | def add_param_group(self, param_group): method step (line 78) | def step(self): FILE: KoSimCSE/apex/parallel/__init__.py function convert_syncbn_model (line 21) | def convert_syncbn_model(module, process_group=None, channel_last=False): function create_syncbn_process_group (line 58) | def create_syncbn_process_group(group_size): FILE: KoSimCSE/apex/parallel/distributed.py function import_flatten_impl (line 13) | def import_flatten_impl(): function flatten (line 25) | def flatten(bucket): function unflatten (line 30) | def unflatten(coalesced, bucket): function apply_flat_dist_call (line 36) | def apply_flat_dist_call(bucket, call, extra_args=None): function split_half_float_double (line 51) | def split_half_float_double(tensors): function split_by_type (line 60) | def split_by_type(tensors): function flat_dist_call (line 70) | def flat_dist_call(tensors, call, extra_args=None): function extract_tensors (line 78) | def extract_tensors(maybe_tensor, tensor_list): class Reducer (line 89) | class Reducer(object): method __init__ (line 111) | def __init__(self, module_or_grads_list): method reduce (line 121) | def reduce(self): class DistributedDataParallel (line 129) | class DistributedDataParallel(Module): method __init__ (line 162) | def __init__(self, method __setstate__ (line 256) | def __setstate__(self, state): method __getstate__ (line 268) | def __getstate__(self): method enable_allreduce (line 275) | def enable_allreduce(self): method disable_allreduce (line 278) | def disable_allreduce(self): method sync_bucket_structure (line 283) | def sync_bucket_structure(self): method create_hooks (line 319) | def create_hooks(self): method _stream_this_bucket (line 411) | def _stream_this_bucket(self, bucket_idx): method _event_this_bucket (line 418) | def _event_this_bucket(self, bucket_idx): method allreduce_bucket (line 425) | def allreduce_bucket(self, bucket, bucket_idx, force_default_stream): method allreduce_maybe_retain (line 478) | def allreduce_maybe_retain(self, bucket, bucket_idx, force_default_str... method allreduce_fallback (line 491) | def allreduce_fallback(self): method comm_ready_buckets (line 513) | def comm_ready_buckets(self, param): method forward (line 559) | def forward(self, *inputs, **kwargs): FILE: KoSimCSE/apex/parallel/multiproc.py function docstring_hack (line 5) | def docstring_hack(): FILE: KoSimCSE/apex/parallel/optimized_sync_batchnorm.py class SyncBatchNorm (line 9) | class SyncBatchNorm(_BatchNorm): method __init__ (line 58) | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, ... method _specify_process_group (line 64) | def _specify_process_group(self, process_group): method _specify_channel_last (line 67) | def _specify_channel_last(self, channel_last): method forward (line 70) | def forward(self, input, z = None): FILE: KoSimCSE/apex/parallel/optimized_sync_batchnorm_kernel.py class SyncBatchnormFunction (line 7) | class SyncBatchnormFunction(Function): method forward (line 10) | def forward(ctx, input, z, weight, bias, running_mean, running_varianc... method backward (line 75) | def backward(ctx, grad_output): FILE: KoSimCSE/apex/parallel/sync_batchnorm.py class SyncBatchNorm (line 9) | class SyncBatchNorm(_BatchNorm): method __init__ (line 51) | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, ... method _specify_process_group (line 65) | def _specify_process_group(self, process_group): method forward (line 68) | def forward(self, input): FILE: KoSimCSE/apex/parallel/sync_batchnorm_kernel.py class SyncBatchnormFunction (line 7) | class SyncBatchnormFunction(Function): method forward (line 10) | def forward(ctx, input, weight, bias, running_mean, running_variance, ... method backward (line 33) | def backward(ctx, grad_output): FILE: KoSimCSE/apex/pyprof/examples/custom_func_module/custom_function.py class Foo (line 9) | class Foo(torch.autograd.Function): method forward (line 11) | def forward(ctx, in1, in2): method backward (line 16) | def backward(ctx, grad): FILE: KoSimCSE/apex/pyprof/examples/custom_func_module/custom_module.py class Foo (line 8) | class Foo(torch.nn.Module): method __init__ (line 9) | def __init__(self, size): method forward (line 14) | def forward(self, input): FILE: KoSimCSE/apex/pyprof/examples/imagenet/imagenet.py function parseArgs (line 17) | def parseArgs(): function main (line 89) | def main(): FILE: KoSimCSE/apex/pyprof/examples/jit/jit_script_function.py function foo (line 11) | def foo(x, y): FILE: KoSimCSE/apex/pyprof/examples/jit/jit_script_method.py class Foo (line 7) | class Foo(torch.jit.ScriptModule): method __init__ (line 8) | def __init__(self, size): method forward (line 14) | def forward(self, input): FILE: KoSimCSE/apex/pyprof/examples/jit/jit_trace_function.py function foo (line 7) | def foo(x, y): FILE: KoSimCSE/apex/pyprof/examples/jit/jit_trace_method.py class Foo (line 7) | class Foo(torch.nn.Module): method __init__ (line 8) | def __init__(self, size): method forward (line 13) | def forward(self, input): FILE: KoSimCSE/apex/pyprof/examples/lenet.py class LeNet5 (line 12) | class LeNet5(nn.Module): method __init__ (line 13) | def __init__(self): method forward (line 24) | def forward(self, x): method num_flat_features (line 35) | def num_flat_features(self, x): FILE: KoSimCSE/apex/pyprof/examples/user_annotation/resnet.py function conv3x3 (line 15) | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): function conv1x1 (line 20) | def conv1x1(in_planes, out_planes, stride=1): class Bottleneck (line 24) | class Bottleneck(nn.Module): method __init__ (line 28) | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, method forward (line 48) | def forward(self, x): class ResNet (line 102) | class ResNet(nn.Module): method __init__ (line 104) | def __init__(self, block, layers, num_classes=1000, method _make_layer (line 134) | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): method forward (line 158) | def forward(self, x): function resnet50 (line 193) | def resnet50(): FILE: KoSimCSE/apex/pyprof/nvtx/nvmarker.py function isfunc (line 27) | def isfunc(mod, f): function traceMarker (line 46) | def traceMarker(stack): function modMarker (line 56) | def modMarker(mod, fn_name, args): function add_wrapper (line 67) | def add_wrapper(mod, fn_name): function argMarker (line 110) | def argMarker(mod, op, args, kwargs): function patchClass (line 201) | def patchClass(cls): function init (line 206) | def init(): FILE: KoSimCSE/apex/pyprof/parse/db.py class DB (line 3) | class DB(object): method __init__ (line 9) | def __init__(self, dbFile): method select (line 21) | def select(self, cmd): method insert (line 36) | def insert(self, cmd, data): method execute (line 46) | def execute(self, cmd): method commit (line 56) | def commit(self): method close (line 59) | def close(self): FILE: KoSimCSE/apex/pyprof/parse/kernel.py function demangle (line 5) | def demangle(name): function encode_object_id (line 11) | def encode_object_id(pid, tid): function getShortName (line 20) | def getShortName(name): class Kernel (line 33) | class Kernel(object): method __init__ (line 41) | def __init__(self): method setKernelInfo (line 77) | def setKernelInfo(self, info): method setKernelName (line 93) | def setKernelName(self, name): method setRunTimeInfo (line 98) | def setRunTimeInfo(self, info): method setMarkerInfo (line 107) | def setMarkerInfo(self, info): method setDirection (line 111) | def setDirection(self): method setOp (line 123) | def setOp(self): method print (line 180) | def print(self): FILE: KoSimCSE/apex/pyprof/parse/nvvp.py class NVVP (line 3) | class NVVP(object): method __init__ (line 14) | def __init__(self, db): method getProfileStart (line 18) | def getProfileStart(self): method getString (line 36) | def getString(self, id_): method createMarkerTable (line 45) | def createMarkerTable(self): method getCPUInfo (line 65) | def getCPUInfo(self, corrId): method getKernelInfo (line 91) | def getKernelInfo(self): method getMarkerInfo (line 99) | def getMarkerInfo(self, objId, startTime, endTime): FILE: KoSimCSE/apex/pyprof/parse/parse.py function parseArgs (line 15) | def parseArgs(): function main (line 25) | def main(): FILE: KoSimCSE/apex/pyprof/prof/activation.py class Activation (line 5) | class Activation(OperatorLayerBase): method __init__ (line 12) | def __init__(self, d): method params (line 35) | def params(self): method flops (line 39) | def flops(self): method bytes (line 48) | def bytes(self): method tc (line 58) | def tc(self): method op (line 61) | def op(self): method mod (line 64) | def mod(self): FILE: KoSimCSE/apex/pyprof/prof/base.py class OperatorLayerBase (line 3) | class OperatorLayerBase(ABC): method tc (line 10) | def tc(self): method params (line 18) | def params(self): method flops (line 25) | def flops(self): method bytes (line 32) | def bytes(self): method mod (line 36) | def mod(self): method op (line 43) | def op(self): FILE: KoSimCSE/apex/pyprof/prof/blas.py class Addmm (line 8) | class Addmm(OperatorLayerBase): method __init__ (line 10) | def __init__(self, d): method tc (line 63) | def tc(self): method bytes (line 69) | def bytes(self): method flops (line 73) | def flops(self): method op (line 76) | def op(self): method mod (line 79) | def mod(self): method params (line 82) | def params(self): class Bmm (line 86) | class Bmm(OperatorLayerBase): method __init__ (line 88) | def __init__(self, d): method tc (line 123) | def tc(self): method params (line 129) | def params(self): method flops (line 134) | def flops(self): method bytes (line 137) | def bytes(self): method op (line 141) | def op(self): method mod (line 144) | def mod(self): class Matmul (line 147) | class Matmul(OperatorLayerBase): method __init__ (line 152) | def __init__(self, d): method params (line 252) | def params(self): method tc (line 255) | def tc(self): method bytes (line 264) | def bytes(self): method flops (line 272) | def flops(self): method op (line 279) | def op(self): method mod (line 282) | def mod(self): class Mm (line 285) | class Mm(OperatorLayerBase): method __init__ (line 287) | def __init__(self, d): method params (line 319) | def params(self): method tc (line 323) | def tc(self): method bytes (line 329) | def bytes(self): method flops (line 333) | def flops(self): method op (line 336) | def op(self): method mod (line 339) | def mod(self): FILE: KoSimCSE/apex/pyprof/prof/conv.py class Conv (line 5) | class Conv(OperatorLayerBase): method __init__ (line 26) | def __init__(self, d): method params (line 180) | def params(self): method conv_bytes_flops (line 184) | def conv_bytes_flops(self, N, C, H, W, K, P, Q, R, S, g, t): method bytes_flops (line 190) | def bytes_flops(self): method bytes (line 218) | def bytes(self): method flops (line 222) | def flops(self): method tc (line 226) | def tc(self): method op (line 232) | def op(self): method mod (line 235) | def mod(self): FILE: KoSimCSE/apex/pyprof/prof/convert.py class Convert (line 5) | class Convert(OperatorLayerBase): method __init__ (line 11) | def __init__(self, d): method params (line 41) | def params(self): method op (line 45) | def op(self): method mod (line 48) | def mod(self): method tc (line 51) | def tc(self): method elems (line 54) | def elems(self): method flops (line 57) | def flops(self): method bytes (line 60) | def bytes(self): FILE: KoSimCSE/apex/pyprof/prof/data.py class Data (line 3) | class Data(object): method __init__ (line 7) | def __init__(self, kernel): method setParams (line 41) | def setParams(self, params): FILE: KoSimCSE/apex/pyprof/prof/dropout.py class Dropout (line 5) | class Dropout(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method params (line 28) | def params(self): method op (line 32) | def op(self): method mod (line 35) | def mod(self): method tc (line 38) | def tc(self): method elems (line 41) | def elems(self): method bytes (line 44) | def bytes(self): method flops (line 48) | def flops(self): FILE: KoSimCSE/apex/pyprof/prof/embedding.py class Embedding (line 5) | class Embedding(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method params (line 33) | def params(self): method op (line 37) | def op(self): method mod (line 40) | def mod(self): method tc (line 43) | def tc(self): method bytes (line 46) | def bytes(self): method flops (line 69) | def flops(self): FILE: KoSimCSE/apex/pyprof/prof/index_slice_join_mutate.py class Cat (line 6) | class Cat(OperatorLayerBase): method __init__ (line 8) | def __init__(self, d): method params (line 34) | def params(self): method flops (line 38) | def flops(self): method tc (line 41) | def tc(self): method op (line 44) | def op(self): method mod (line 47) | def mod(self): method bytes (line 50) | def bytes(self): class Reshape (line 56) | class Reshape(OperatorLayerBase): method __init__ (line 58) | def __init__(self, d): method params (line 82) | def params(self): method flops (line 86) | def flops(self): method tc (line 89) | def tc(self): method op (line 92) | def op(self): method mod (line 95) | def mod(self): method bytes (line 98) | def bytes(self): class Gather (line 101) | class Gather(OperatorLayerBase): method __init__ (line 103) | def __init__(self, d): method params (line 132) | def params(self): method flops (line 136) | def flops(self): method tc (line 139) | def tc(self): method op (line 142) | def op(self): method mod (line 145) | def mod(self): method bytes (line 148) | def bytes(self): class MaskedScatter (line 151) | class MaskedScatter(OperatorLayerBase): method __init__ (line 153) | def __init__(self, d): method params (line 178) | def params(self): method flops (line 182) | def flops(self): method tc (line 185) | def tc(self): method op (line 188) | def op(self): method mod (line 191) | def mod(self): method bytes (line 194) | def bytes(self): class Nonzero (line 207) | class Nonzero(OperatorLayerBase): method __init__ (line 209) | def __init__(self, d): method params (line 229) | def params(self): method flops (line 233) | def flops(self): method tc (line 236) | def tc(self): method op (line 239) | def op(self): method mod (line 242) | def mod(self): method bytes (line 245) | def bytes(self): class IndexSelect (line 260) | class IndexSelect(OperatorLayerBase): method __init__ (line 262) | def __init__(self, d): method params (line 311) | def params(self): method tc (line 315) | def tc(self): method op (line 318) | def op(self): method mod (line 321) | def mod(self): method flops (line 324) | def flops(self): method bytes (line 327) | def bytes(self): class MaskedSelect (line 343) | class MaskedSelect(OperatorLayerBase): method __init__ (line 345) | def __init__(self, d): method params (line 393) | def params(self): method tc (line 397) | def tc(self): method op (line 400) | def op(self): method mod (line 403) | def mod(self): method bytes (line 406) | def bytes(self): method flops (line 418) | def flops(self): FILE: KoSimCSE/apex/pyprof/prof/linear.py class Linear (line 5) | class Linear(OperatorLayerBase): method setXWBMNK (line 17) | def setXWBMNK(self, args): method tc (line 63) | def tc(self): method __init__ (line 69) | def __init__(self, d): method params (line 118) | def params(self): method op (line 145) | def op(self): method bytesFlops (line 148) | def bytesFlops(self): method bytes (line 179) | def bytes(self): method flops (line 183) | def flops(self): method mod (line 187) | def mod(self): FILE: KoSimCSE/apex/pyprof/prof/loss.py class MSELoss (line 7) | class MSELoss(OperatorLayerBase): method __init__ (line 9) | def __init__(self, d): method params (line 51) | def params(self): method elems (line 55) | def elems(self): method bytes (line 71) | def bytes(self): method flops (line 74) | def flops(self): method tc (line 77) | def tc(self): method op (line 80) | def op(self): method mod (line 83) | def mod(self): FILE: KoSimCSE/apex/pyprof/prof/misc.py class Foo (line 5) | class Foo(OperatorLayerBase): method __init__ (line 9) | def __init__(self, d): method params (line 31) | def params(self): method tc (line 35) | def tc(self): method op (line 38) | def op(self): method mod (line 41) | def mod(self): method flops (line 44) | def flops(self): method bytes (line 47) | def bytes(self): class Copy (line 50) | class Copy(OperatorLayerBase): method __init__ (line 52) | def __init__(self, d): method params (line 75) | def params(self): method tc (line 80) | def tc(self): method op (line 83) | def op(self): method mod (line 86) | def mod(self): method flops (line 89) | def flops(self): method elems (line 92) | def elems(self): method bytes (line 95) | def bytes(self): class Clone (line 98) | class Clone(OperatorLayerBase): method __init__ (line 100) | def __init__(self, d): method params (line 118) | def params(self): method flops (line 122) | def flops(self): method tc (line 125) | def tc(self): method op (line 128) | def op(self): method mod (line 131) | def mod(self): method elems (line 134) | def elems(self): method bytes (line 137) | def bytes(self): class Contiguous (line 140) | class Contiguous(OperatorLayerBase): method __init__ (line 142) | def __init__(self, d): method params (line 160) | def params(self): method flops (line 164) | def flops(self): method bytes (line 167) | def bytes(self): method tc (line 170) | def tc(self): method op (line 173) | def op(self): method mod (line 176) | def mod(self): class Any (line 179) | class Any(OperatorLayerBase): method __init__ (line 181) | def __init__(self, d): method params (line 202) | def params(self): method op (line 206) | def op(self): method mod (line 209) | def mod(self): method tc (line 212) | def tc(self): method flops (line 215) | def flops(self): method bytes (line 218) | def bytes(self): FILE: KoSimCSE/apex/pyprof/prof/normalization.py class BatchNorm (line 5) | class BatchNorm(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method params (line 27) | def params(self): method tc (line 31) | def tc(self): method op (line 34) | def op(self): method mod (line 37) | def mod(self): method elems (line 40) | def elems(self): method flops (line 43) | def flops(self): method bytes (line 47) | def bytes(self): FILE: KoSimCSE/apex/pyprof/prof/optim.py class Adam (line 7) | class Adam(OperatorLayerBase): method __init__ (line 9) | def __init__(self, d): method params (line 31) | def params(self): method flops (line 35) | def flops(self): method bytes (line 38) | def bytes(self): method tc (line 58) | def tc(self): method op (line 61) | def op(self): method mod (line 64) | def mod(self): FILE: KoSimCSE/apex/pyprof/prof/output.py class Output (line 3) | class Output(): method __init__ (line 33) | def __init__(self, args): method foo (line 77) | def foo(self, cadena, pformat): method header (line 99) | def header(self): method data (line 107) | def data(self, a): FILE: KoSimCSE/apex/pyprof/prof/pointwise.py class Pointwise (line 6) | class Pointwise(OperatorLayerBase): method foo (line 26) | def foo(d): method __init__ (line 29) | def __init__(self, d): method params (line 84) | def params(self): method tc (line 88) | def tc(self): method op (line 91) | def op(self): method mod (line 94) | def mod(self): method elems (line 97) | def elems(self): method bytes (line 138) | def bytes(self): method flops (line 141) | def flops(self): FILE: KoSimCSE/apex/pyprof/prof/pooling.py class MaxPool2d (line 7) | class MaxPool2d(object): method parse (line 9) | def parse(marker): FILE: KoSimCSE/apex/pyprof/prof/prof.py function findFpropKernel (line 39) | def findFpropKernel(seq): function foo (line 56) | def foo(mod, op, d): function main (line 171) | def main(): FILE: KoSimCSE/apex/pyprof/prof/randomSample.py class RandPerm (line 5) | class RandPerm(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method params (line 25) | def params(self): method tc (line 29) | def tc(self): method op (line 32) | def op(self): method mod (line 35) | def mod(self): method bytes (line 38) | def bytes(self): method flops (line 41) | def flops(self): FILE: KoSimCSE/apex/pyprof/prof/recurrentCell.py function hasTileSize (line 5) | def hasTileSize(name): function ctaTile (line 11) | def ctaTile(name): class RNNCell (line 21) | class RNNCell(OperatorLayerBase): method __init__ (line 26) | def __init__(self, d): method params (line 73) | def params(self): method tc (line 83) | def tc(self): method op (line 89) | def op(self): method mod (line 92) | def mod(self): method bytes (line 95) | def bytes(self): method flops (line 105) | def flops(self): method bar (line 115) | def bar(self): FILE: KoSimCSE/apex/pyprof/prof/reduction.py class Mean (line 5) | class Mean(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method params (line 32) | def params(self): method tc (line 36) | def tc(self): method op (line 39) | def op(self): method mod (line 42) | def mod(self): method elems (line 45) | def elems(self): method bytes (line 48) | def bytes(self): method flops (line 54) | def flops(self): class Sum (line 60) | class Sum(OperatorLayerBase): method __init__ (line 62) | def __init__(self, d): method params (line 86) | def params(self): method tc (line 90) | def tc(self): method op (line 93) | def op(self): method mod (line 96) | def mod(self): method elems (line 99) | def elems(self): method flops (line 102) | def flops(self): method bytes (line 106) | def bytes(self): class Norm (line 109) | class Norm(OperatorLayerBase): method __init__ (line 111) | def __init__(self, d): method params (line 129) | def params(self): method elems (line 133) | def elems(self): method bytes (line 136) | def bytes(self): method flops (line 139) | def flops(self): method tc (line 143) | def tc(self): method op (line 146) | def op(self): method mod (line 149) | def mod(self): FILE: KoSimCSE/apex/pyprof/prof/softmax.py class Softmax (line 5) | class Softmax(OperatorLayerBase): method __init__ (line 7) | def __init__(self, d): method op (line 31) | def op(self): method mod (line 34) | def mod(self): method tc (line 37) | def tc(self): method params (line 40) | def params(self): method elems (line 44) | def elems(self): method flops (line 47) | def flops(self): method bytes (line 52) | def bytes(self): class LogSoftmax (line 57) | class LogSoftmax(OperatorLayerBase): method __init__ (line 59) | def __init__(self, d): method op (line 91) | def op(self): method mod (line 94) | def mod(self): method tc (line 97) | def tc(self): method params (line 100) | def params(self): method elems (line 104) | def elems(self): method flops (line 107) | def flops(self): method bytes (line 112) | def bytes(self): FILE: KoSimCSE/apex/pyprof/prof/usage.py function parseArgs (line 4) | def parseArgs(): FILE: KoSimCSE/apex/pyprof/prof/utility.py class Utility (line 3) | class Utility(object): method numElems (line 6) | def numElems(shape): method typeToBytes (line 11) | def typeToBytes(t): method typeToString (line 23) | def typeToString(t): method hasNVTX (line 45) | def hasNVTX(marker): method isscalar (line 59) | def isscalar(t): FILE: KoSimCSE/apex/reparameterization/__init__.py function apply_weight_norm (line 4) | def apply_weight_norm(module, name='', dim=0, hook_child=True): function remove_weight_norm (line 50) | def remove_weight_norm(module, name='', remove_all=False): function apply_reparameterization (line 64) | def apply_reparameterization(module, reparameterization=None, name='', d... function remove_reparameterization (line 96) | def remove_reparameterization(module, reparameterization=Reparameterizat... FILE: KoSimCSE/apex/reparameterization/reparameterization.py class Reparameterization (line 4) | class Reparameterization(object): method __init__ (line 19) | def __init__(self, name, dim, module, retain_forward=True): method compute_weight (line 28) | def compute_weight(self, module=None, name=None): method reparameterize (line 40) | def reparameterize(self, name, weight, dim): method apply (line 57) | def apply(module, name, dim, reparameterization=None, hook_child=True): method get_module_and_name (line 105) | def get_module_and_name(module, name): method get_params (line 123) | def get_params(self, module): method remove (line 127) | def remove(self, module): method __call__ (line 139) | def __call__(self, module, inputs): method backward_hook (line 147) | def backward_hook(self, module, grad_input, grad_output): FILE: KoSimCSE/apex/reparameterization/weight_norm.py function _norm (line 8) | def _norm(p, dim): class WeightNorm (line 22) | class WeightNorm(Reparameterization): method compute_weight (line 39) | def compute_weight(self, module=None, name=None): method reparameterize (line 62) | def reparameterize(self, name, weight, dim): FILE: KoSimCSE/data/dataloader.py class ModelDataLoader (line 10) | class ModelDataLoader(Dataset): method __init__ (line 11) | def __init__(self, file_path, args, metric, tokenizer, type_): method load_data (line 43) | def load_data(self, type): method data2tensor (line 56) | def data2tensor(self, line, type): method __getitem__ (line 104) | def __getitem__(self, index): method __len__ (line 146) | def __len__(self): function get_loader (line 154) | def get_loader(args, metric): function convert_to_tensor (line 190) | def convert_to_tensor(corpus, tokenizer, device): function example_model_setting (line 208) | def example_model_setting(model_ckpt, model_name): FILE: KoSimCSE/main.py function main (line 5) | def main(args, logger) -> None: FILE: KoSimCSE/model/loss.py class Loss (line 12) | class Loss(): method __init__ (line 14) | def __init__(self, args): method train_loss_fct (line 19) | def train_loss_fct(self, config, inputs, a, p, n): method evaluation_during_training (line 31) | def evaluation_during_training(self, embeddings1, embeddings2, labels,... FILE: KoSimCSE/model/setting.py class Arguments (line 8) | class Arguments(): method __init__ (line 10) | def __init__(self): method add_type_of_processing (line 13) | def add_type_of_processing(self): method add_hyper_parameters (line 20) | def add_hyper_parameters(self): method add_data_parameters (line 34) | def add_data_parameters(self): method print_args (line 44) | def print_args(self, args): method add_argument (line 50) | def add_argument(self, *args, **kw_args): method parse (line 53) | def parse(self): class Setting (line 60) | class Setting(): method set_logger (line 62) | def set_logger(self): method set_seed (line 76) | def set_seed(self, args): method run (line 90) | def run(self): FILE: KoSimCSE/model/simcse/bert.py class BERT (line 5) | class BERT(nn.Module): method __init__ (line 6) | def __init__(self, bert): method forward (line 10) | def forward(self, config, inputs, mode): method encode (line 45) | def encode(self, inputs, device): FILE: KoSimCSE/model/simcse/processor.py class Processor (line 18) | class Processor(): method __init__ (line 20) | def __init__(self, args): method run (line 32) | def run(self, inputs, indicator=None, type=None): method progress (line 50) | def progress(self, loss): method progress_validation (line 54) | def progress_validation(self, score): method return_value (line 58) | def return_value(self): method get_object (line 64) | def get_object(self, tokenizer, model): method get_scheduler (line 79) | def get_scheduler(self, optim, train_loader): method model_setting (line 87) | def model_setting(self): method train (line 116) | def train(self, epoch): method valid (line 144) | def valid(self): method test (line 168) | def test(self): FILE: KoSimCSE/model/utils.py class Metric (line 10) | class Metric(): method __init__ (line 12) | def __init__(self, args): method get_lr (line 15) | def get_lr(self, optimizer): method count_parameters (line 18) | def count_parameters(self, model): method cal_acc (line 21) | def cal_acc(self, yhat, y): method cal_time (line 28) | def cal_time(self, start_time, end_time): method cal_dev_score (line 35) | def cal_dev_score(self, score, indicator): method update_indicator (line 51) | def update_indicator(self, indicator, score): method draw_graph (line 70) | def draw_graph(self, cp): method performance_check (line 74) | def performance_check(self, cp, config): method print_size_of_model (line 80) | def print_size_of_model(self, model): method move2device (line 85) | def move2device(self, sample, device): method save_model (line 106) | def save_model(self, config, cp, pco): function pytorch_cos_sim (line 123) | def pytorch_cos_sim(a, b):