SYMBOL INDEX (2719 symbols across 329 files)

FILE: KoSentenceT5/apex/RNN/RNNBackend.py
  function is_iterable (line 10) | def is_iterable(maybe_iterable):
  function flatten_list (line 14) | def flatten_list(tens_list):
  class bidirectionalRNN (line 25) | class bidirectionalRNN(nn.Module):
    method __init__ (line 29) | def __init__(self, inputRNN, num_layers=1, dropout = 0):
    method forward (line 37) | def forward(self, input, collect_hidden=False):
    method reset_parameters (line 52) | def reset_parameters(self):
    method init_hidden (line 59) | def init_hidden(self, bsz):
    method detach_hidden (line 66) | def detach_hidden(self):
    method reset_hidden (line 73) | def reset_hidden(self, bsz):
    method init_inference (line 80) | def init_inference(self, bsz):
  class stackedRNN (line 90) | class stackedRNN(nn.Module):
    method __init__ (line 94) | def __init__(self, inputRNN, num_layers=1, dropout=0):
    method forward (line 122) | def forward(self, input, collect_hidden=False, reverse=False):
    method reset_parameters (line 197) | def reset_parameters(self):
    method init_hidden (line 204) | def init_hidden(self, bsz):
    method detach_hidden (line 211) | def detach_hidden(self):
    method reset_hidden (line 218) | def reset_hidden(self, bsz):
    method init_inference (line 225) | def init_inference(self, bsz):
  class RNNCell (line 232) | class RNNCell(nn.Module):
    method __init__ (line 242) | def __init__(self, gate_multiplier, input_size, hidden_size, cell, n_h...
    method new_like (line 274) | def new_like(self, new_input_size=None):
    method reset_parameters (line 291) | def reset_parameters(self, gain=1):
    method init_hidden (line 309) | def init_hidden(self, bsz):
    method reset_hidden (line 330) | def reset_hidden(self, bsz):
    method detach_hidden (line 338) | def detach_hidden(self):
    method forward (line 348) | def forward(self, input):

FILE: KoSentenceT5/apex/RNN/cells.py
  class mLSTMRNNCell (line 12) | class mLSTMRNNCell(RNNCell):
    method __init__ (line 17) | def __init__(self, input_size, hidden_size, bias = False, output_size ...
    method forward (line 26) | def forward(self, input):
    method new_like (line 45) | def new_like(self, new_input_size=None):
  function mLSTMCell (line 55) | def mLSTMCell(input, hidden, w_ih, w_hh, w_mih, w_mhh, b_ih=None, b_hh=N...

FILE: KoSentenceT5/apex/RNN/models.py
  function toRNNBackend (line 8) | def toRNNBackend(inputRNN, num_layers, bidirectional=False, dropout = 0):
  function LSTM (line 19) | def LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=Fal...
  function GRU (line 26) | def GRU(input_size, hidden_size, num_layers, bias=True, batch_first=Fals...
  function ReLU (line 33) | def ReLU(input_size, hidden_size, num_layers, bias=True, batch_first=Fal...
  function Tanh (line 40) | def Tanh(input_size, hidden_size, num_layers, bias=True, batch_first=Fal...
  function mLSTM (line 47) | def mLSTM(input_size, hidden_size, num_layers, bias=True, batch_first=Fa...

FILE: KoSentenceT5/apex/amp/_amp_state.py
  class AmpState (line 18) | class AmpState(object):
    method __init__ (line 19) | def __init__(self):
  function warn_or_err (line 29) | def warn_or_err(msg):
  function maybe_print (line 39) | def maybe_print(msg, rank0=False):
  function master_params (line 60) | def master_params(optimizer):

FILE: KoSentenceT5/apex/amp/_initialize.py
  function to_type (line 21) | def to_type(dtype, t):
  function applier (line 39) | def applier(value, fn):
  function check_models (line 64) | def check_models(models):
  function check_params_fp32 (line 79) | def check_params_fp32(models):
  function check_optimizers (line 119) | def check_optimizers(optimizers):
  class O2StateDictHook (line 133) | class O2StateDictHook(object):
    method __init__ (line 134) | def __init__(self, fn):
    method __call__ (line 137) | def __call__(self, module, state_dict, prefix, local_metadata):
  function _initialize (line 145) | def _initialize(models, optimizers, properties, num_losses=1, cast_model...

FILE: KoSentenceT5/apex/amp/_process_optimizer.py
  class AmpOptimizerState (line 9) | class AmpOptimizerState(object):
    method __init__ (line 10) | def __init__(self):
  function _master_params_to_model_params (line 14) | def _master_params_to_model_params(self):
  function lazy_init_with_master_weights (line 28) | def lazy_init_with_master_weights(self):
  function post_backward_models_are_masters (line 93) | def post_backward_models_are_masters(scaler, params, stashed_grads, scal...
  function prepare_backward_with_master_weights (line 142) | def prepare_backward_with_master_weights(self):
  function post_backward_with_master_weights (line 161) | def post_backward_with_master_weights(self, scaler):
  function lazy_init_no_master_weights (line 205) | def lazy_init_no_master_weights(self):
  function prepare_backward_no_master_weights (line 224) | def prepare_backward_no_master_weights(self):
  function post_backward_no_master_weights (line 240) | def post_backward_no_master_weights(self, scaler):
  function prepare_backward_with_master_weights_FusedSGD (line 258) | def prepare_backward_with_master_weights_FusedSGD(self):
  function post_backward_with_master_weights_FusedSGD (line 277) | def post_backward_with_master_weights_FusedSGD(self, scaler):
  function prepare_backward_no_master_weights_FusedSGD (line 305) | def prepare_backward_no_master_weights_FusedSGD(self):
  function post_backward_no_master_weights_FusedSGD (line 309) | def post_backward_no_master_weights_FusedSGD(self, scaler):
  function _amp_lazy_init (line 313) | def _amp_lazy_init(self):
  function _process_optimizer (line 321) | def _process_optimizer(optimizer, properties):

FILE: KoSentenceT5/apex/amp/amp.py
  function _decorator_helper (line 18) | def _decorator_helper(orig_fn, cast_fn, wrap_fn):
  function half_function (line 30) | def half_function(fn):
  function float_function (line 35) | def float_function(fn):
  function promote_function (line 40) | def promote_function(fn):
  function register_half_function (line 46) | def register_half_function(module, name):
  function register_float_function (line 53) | def register_float_function(module, name):
  function register_promote_function (line 60) | def register_promote_function(module, name):
  function init (line 68) | def init(enabled=True, loss_scale="dynamic", enable_caching=True, verbos...

FILE: KoSentenceT5/apex/amp/compat.py
  function variable_is_tensor (line 4) | def variable_is_tensor():
  function tensor_is_variable (line 8) | def tensor_is_variable():
  function tensor_is_float_tensor (line 13) | def tensor_is_float_tensor():
  function is_tensor_like (line 19) | def is_tensor_like(x):
  function is_floating_point (line 24) | def is_floating_point(x):
  function scalar_python_val (line 35) | def scalar_python_val(x):
  function filter_attrs (line 45) | def filter_attrs(module, attrs):

FILE: KoSentenceT5/apex/amp/frontend.py
  class Properties (line 7) | class Properties(object):
    method __init__ (line 13) | def __init__(self):
    method _update_options_dict (line 33) | def _update_options_dict(self, new_options):
    method __getattr__ (line 43) | def __getattr__(self, name):
    method __setattr__ (line 51) | def __setattr__(self, name, value):
  class O3 (line 102) | class O3:
    method __call__ (line 111) | def __call__(self, properties):
  class O2 (line 124) | class O2:
    method __call__ (line 134) | def __call__(self, properties):
  class O1 (line 147) | class O1:
    method __call__ (line 156) | def __call__(self, properties):
  class O0 (line 169) | class O0:
    method __call__ (line 175) | def __call__(self, properties):
  function initialize (line 195) | def initialize(
  function state_dict (line 361) | def state_dict(destination=None):
  function load_state_dict (line 373) | def load_state_dict(state_dict):

FILE: KoSentenceT5/apex/amp/handle.py
  function scale_loss (line 17) | def scale_loss(loss,
  function disable_casts (line 164) | def disable_casts():
  class AmpHandle (line 170) | class AmpHandle(object):
    method __init__ (line 171) | def __init__(self, loss_scale="dynamic", enable_caching=True, verbose=...
    method is_active (line 179) | def is_active(self):
    method _disable_casts (line 183) | def _disable_casts(self):
    method wrap_optimizer (line 188) | def wrap_optimizer(self, optimizer, num_loss=1):
    method scale_loss (line 193) | def scale_loss(self, loss, optimizer):
    method _clear_cache (line 226) | def _clear_cache(self):
    method _save_func (line 230) | def _save_func(self, mod, fn, func):
    method _deactivate (line 233) | def _deactivate(self):
    method has_cache (line 239) | def has_cache(self):
    method cache (line 243) | def cache(self):
    method remove_cache (line 246) | def remove_cache(self, param):
    method verbose (line 251) | def verbose(self):
  class NoOpHandle (line 254) | class NoOpHandle(object):
    method is_active (line 255) | def is_active(self):
    method _disable_casts (line 259) | def _disable_casts(self):
    method wrap_optimizer (line 262) | def wrap_optimizer(self, optimizer, num_loss=1):
    method scale_loss (line 266) | def scale_loss(self, loss, optimizer):
    method has_cache (line 270) | def has_cache(self):
    method verbose (line 274) | def verbose(self):
    method _clear_cache (line 277) | def _clear_cache(self):
    method _deactivate (line 280) | def _deactivate(self):

FILE: KoSentenceT5/apex/amp/opt.py
  class OptimWrapper (line 9) | class OptimWrapper(object):
    method __init__ (line 10) | def __init__(self, optimizer, amp_handle, num_loss):
    method scale_loss (line 19) | def scale_loss(self, loss):
    method _cur_loss_scaler (line 55) | def _cur_loss_scaler(self):
    method step (line 59) | def step(self, closure=None):
    method __getattr__ (line 80) | def __getattr__(self, attr):
    method __getstate__ (line 84) | def __getstate__(self):
    method __setstate__ (line 87) | def __setstate__(self):
    method __repr__ (line 90) | def __repr__(self):
    method state_dict (line 93) | def state_dict(self):
    method load_state_dict (line 96) | def load_state_dict(self, state_dict):
    method zero_grad (line 99) | def zero_grad(self):
    method add_param_group (line 102) | def add_param_group(self, param_group):

FILE: KoSentenceT5/apex/amp/rnn_compat.py
  function _gen_VF_wrapper (line 7) | def _gen_VF_wrapper(name):
  class VariableFunctionsShim (line 17) | class VariableFunctionsShim(object):
    method __init__ (line 18) | def __init__(self):
  function has_old_rnns (line 24) | def has_old_rnns():
  function whitelist_rnn_cells (line 31) | def whitelist_rnn_cells(handle, verbose):

FILE: KoSentenceT5/apex/amp/scaler.py
  function scale_check_overflow_python (line 6) | def scale_check_overflow_python(model_grad, master_grad, scale, check_ov...
  function axpby_check_overflow_python (line 19) | def axpby_check_overflow_python(model_grad, stashed_grad, master_grad, a...
  class LossScaler (line 33) | class LossScaler(object):
    method __init__ (line 38) | def __init__(self,
    method loss_scale (line 73) | def loss_scale(self):
    method unscale_python (line 76) | def unscale_python(self, model_grads, master_grads, scale):
    method unscale (line 94) | def unscale(self, model_grads, master_grads, unused_scale, models_are_...
    method unscale_with_stashed_python (line 126) | def unscale_with_stashed_python(self,
    method unscale_with_stashed (line 152) | def unscale_with_stashed(self,
    method clear_overflow_state (line 191) | def clear_overflow_state(self):
    method update_scale (line 197) | def update_scale(self):

FILE: KoSentenceT5/apex/amp/utils.py
  function is_cuda_enabled (line 8) | def is_cuda_enabled():
  function get_cuda_version (line 11) | def get_cuda_version():
  function is_fp_tensor (line 14) | def is_fp_tensor(x):
  function is_nested (line 23) | def is_nested(x):
  function should_cache (line 26) | def should_cache(x):
  function collect_fp_tensor_types (line 36) | def collect_fp_tensor_types(args, kwargs):
  function type_string (line 51) | def type_string(x):
  function maybe_half (line 54) | def maybe_half(x, name='', verbose=False):
  function maybe_float (line 65) | def maybe_float(x, name='', verbose=False):
  function casted_args (line 77) | def casted_args(cast_fn, args, kwargs):
  function cached_cast (line 90) | def cached_cast(cast_fn, x, cache):
  function verbosify (line 124) | def verbosify(cast_fn, fn_name, verbose):
  function as_inplace (line 130) | def as_inplace(fns):
  function has_func (line 134) | def has_func(mod, fn):
  function get_func (line 140) | def get_func(mod, fn):
  function set_func (line 146) | def set_func(mod, fn, new_fn):
  function set_func_save (line 152) | def set_func_save(handle, mod, fn, new_fn):
  function synthesize_flattened_rnn_weights (line 171) | def synthesize_flattened_rnn_weights(fp32_weights,
  function new_synthesize_flattened_rnn_weights (line 194) | def new_synthesize_flattened_rnn_weights(fp32_weights,

FILE: KoSentenceT5/apex/amp/wrap.py
  function make_cast_wrapper (line 10) | def make_cast_wrapper(orig_fn, cast_fn, handle,
  function cached_cast (line 31) | def cached_cast(mod, fn, cast_fn, handle,
  function make_promote_wrapper (line 44) | def make_promote_wrapper(orig_fn, cast_fn, handle=None):
  function promote (line 65) | def promote(mod, fn, handle, verbose=False):
  function sequence_promote (line 71) | def sequence_promote(mod, fn, handle, verbose=False):
  function promote_match_arg0 (line 92) | def promote_match_arg0(mod, fn, handle, verbose=False):
  function err_if_any_half (line 114) | def err_if_any_half(mod, fn, handle, custom_err_msg=None):
  function err_if_arg0_half (line 132) | def err_if_arg0_half(mod, fn, handle, verbose=False):
  function rnn_cast (line 157) | def rnn_cast(backend, fn, handle, verbose=False):
  function new_rnn_cast (line 222) | def new_rnn_cast(fn, handle, verbose=False):
  function disable_casts (line 267) | def disable_casts(mod, fn, handle):

FILE: KoSentenceT5/apex/contrib/bottleneck/bottleneck.py
  function kaiming_uniform_ (line 5) | def kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_rel...
  class FrozenBatchNorm2d (line 9) | class FrozenBatchNorm2d(torch.nn.Module):
    method __init__ (line 13) | def __init__(self, n):
    method get_scale_bias (line 20) | def get_scale_bias(self, nhwc=False):
    method forward (line 31) | def forward(self, x):
  function drelu_dscale1 (line 37) | def drelu_dscale1(grad_o, output, scale1):
  function drelu_dscale2 (line 44) | def drelu_dscale2(grad_o, output, scale1, scale2):
  class BottleneckFunction (line 51) | class BottleneckFunction(torch.autograd.Function):
    method forward (line 53) | def forward(ctx, nhwc, stride_1x1, scale, bias, x, *conv):
    method backward (line 75) | def backward(ctx, grad_o):
  function conv3x3 (line 102) | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
  function conv1x1 (line 107) | def conv1x1(in_planes, out_planes, stride=1):
  class Bottleneck (line 111) | class Bottleneck(torch.nn.Module):
    method __init__ (line 119) | def __init__(self, in_channels, bottleneck_channels, out_channels, str...
    method forward (line 174) | def forward(self, x):

FILE: KoSentenceT5/apex/contrib/csrc/bottleneck/bottleneck.cpp
  function checkCudnnError (line 31) | int checkCudnnError(cudnnStatus_t code, const char* expr, const char* fi...
  function checkError (line 42) | void checkError(cudaError_t code, char const * func, const char *file, c...
  function generateStrides (line 55) | void generateStrides(const int64_t* dimA, int64_t* strideA, int nbDims, ...
  function getFwdConvDilatedFilterDim (line 75) | int getFwdConvDilatedFilterDim(int filterDim, int dilation) {
  function getFwdConvPaddedImageDim (line 79) | int getFwdConvPaddedImageDim(int tensorDim, int pad) {
  function getFwdConvOutputDim (line 83) | int getFwdConvOutputDim(
  function common_conv_descriptors (line 111) | common_conv_descriptors
  function common_convbias_descriptors (line 173) | common_convbias_descriptors
  function dconv_descriptors (line 294) | dconv_descriptors
  function getConvFusionString (line 377) | std::string getConvFusionString(int64_t* x_dim_padded,
  function run_conv_scale_bias_add_activation (line 469) | void
  function run_conv_scale_bias (line 630) | void
  function run_dconv_drelu_dscale (line 759) | void
  function run_dconv (line 886) | void
  function run_dconv_add (line 992) | void
  function bottleneck_forward (line 1104) | std::vector<at::Tensor> bottleneck_forward(bool explicit_nhwc, int strid...
  function bottleneck_backward (line 1287) | std::vector<at::Tensor> bottleneck_backward(bool explicit_nhwc, int stri...
  function PYBIND11_MODULE (line 1609) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/fmha_api.cpp
  function set_params (line 33) | void set_params(Fused_multihead_attention_fprop_params &params,
  function mha_fwd (line 86) | std::vector<at::Tensor>
  function mha_bwd (line 182) | std::vector<at::Tensor>
  function mha_fwd_nl (line 262) | std::vector<at::Tensor> mha_fwd_nl(const at::Tensor &qkv,         // tot...
  function mha_bwd_nl (line 342) | std::vector<at::Tensor> mha_bwd_nl(const at::Tensor &dout,        // tot...
  function PYBIND11_MODULE (line 426) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha.h
  type Qkv_params (line 46) | struct Qkv_params {
  function Qkv_params (line 59) | struct Fused_multihead_attention_fprop_params : public Qkv_params {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/gemm.h
  function namespace (line 34) | namespace fmha {
  type Fragment_accumulator (line 145) | struct Fragment_accumulator
  function add (line 152) | void add(const Other_fragment_ &other) {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/gmem_tile.h
  function namespace (line 30) | namespace fmha {
  function __device__ (line 112) | inline __device__ void store(const uint4 (&data)[LDGS]) {
  function __device__ (line 123) | inline __device__ void move() {
  function __device__ (line 201) | inline __device__ void store(const uint4 (&src)[STGS_PER_LOOP], int mi) {
  function __device__ (line 222) | inline __device__ void move() {
  function __device__ (line 273) | __device__ Gmem_tile_mma_sd(void *ptr, const Params &params, const int t...
  function __device__ (line 288) | inline __device__ void store(const Type &data, const int mi, const int n...
  function __device__ (line 300) | inline __device__ void move() {
  function Base (line 311) | struct Gmem_tile_mma_s : public Base {
  function Base (line 404) | struct Gmem_tile_dq : public Base {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/mask.h
  function namespace (line 30) | namespace fmha {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/smem_tile.h
  function namespace (line 33) | namespace fmha {
  function __device__ (line 396) | inline __device__ Smem_tile_row_a(void *smem, int tidx) : Base(smem, tid...
  function __device__ (line 462) | inline __device__ void reset_read_offset() {
  function __device__ (line 494) | inline __device__ Smem_tile_a(void *smem, int tidx) : Base(smem, tidx) {
  function __device__ (line 581) | inline __device__ Smem_tile_col_b(void *smem, int tidx) : Base(smem, tid...
  function __device__ (line 653) | inline __device__ void reset_read_offset() {
  function __device__ (line 685) | inline __device__ Smem_tile_b(void *smem, int tidx) : Base(smem, tidx) {
  function __device__ (line 748) | inline __device__ Smem_tile_row_b(void *smem, int tidx) : Base(smem, tid...
  function __device__ (line 892) | inline __device__ Smem_tile_b(void *smem, int tidx) : Base(smem, tidx) {
  function __device__ (line 912) | inline __device__ Smem_tile_v(void *smem, int tidx) : Base(smem, tidx) {
  function __device__ (line 1003) | inline __device__ Smem_tile_o(void *smem, int tidx) {
  function store (line 1057) | void store(const Accumulator (&acc)[M][N], int mi) {
  function __device__ (line 1129) | inline __device__ Smem_tile_mma(char *smem, int tidx) {
  function store (line 1147) | void store(const uint4 (&regs)[M][N]) {
  function __device__ (line 1177) | inline __device__ Smem_tile_mma_transposed(char *smem, int tidx) : Base(...
  function load (line 1189) | void load(Fragment (&frag)[M][N]) {
  function __device__ (line 1223) | inline __device__ Smem_tile_mma_epilogue(char *smem, int tidx) : Base(sm...
  function store (line 1238) | void store(const Acc (&acc)[M][N]){
  function store (line 1272) | void store(const uint4 (&regs)[M][N]) {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/softmax.h
  function namespace (line 30) | namespace fmha {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha/utils.h
  function namespace (line 38) | namespace fmha {
  function __device__ (line 247) | static inline __device__ uint32_t hadd2(uint32_t a, uint32_t b) {
  function __device__ (line 255) | static inline __device__ uint32_t hmin2(uint32_t a, uint32_t b) {
  function __device__ (line 263) | static inline __device__ uint32_t hmul2(uint32_t a, uint32_t b) {
  function __device__ (line 271) | static inline __device__ uint2 hmul4(uint2 a, uint2 b) {
  function __device__ (line 280) | static inline __device__ uint4 hmul8(uint4 a, uint4 b) {
  function __device__ (line 291) | static inline __device__ uint4 hmul8(uint32_t a, uint4 b) {
  function __device__ (line 317) | static inline __device__ uint32_t habs2(uint32_t x) {
  function __device__ (line 332) | static inline __device__ uint16_t clamp_to_zero(uint16_t x) {
  function __device__ (line 340) | static inline __device__ uint16_t float_to_half(float f) {
  function __device__ (line 348) | static inline __device__ uint32_t float2_to_half2(float a, float b) {
  function __device__ (line 362) | static inline __device__ uint32_t float_to_half2(float a) {
  function __device__ (line 368) | static inline __device__ uint32_t float2_to_half2(const float2 &f) {
  function __device__ (line 374) | static inline __device__ uint2 float4_to_half4(float x, float y, float z...
  function __device__ (line 383) | static inline __device__ uint32_t hfma2(uint32_t a, uint32_t b, uint32_t...
  function __device__ (line 391) | static inline __device__ uint32_t hfma2_relu(uint32_t a, uint32_t b, uin...
  function __device__ (line 403) | static inline __device__ uint32_t h0_h0(uint32_t x) {
  function __device__ (line 412) | static inline __device__ float h0_to_float(uint32_t h2) {
  function __device__ (line 424) | static inline __device__ uint32_t h1_h1(uint32_t x) {
  function __device__ (line 433) | static inline __device__ uint16_t hadd(uint16_t a, uint16_t b) {
  function __device__ (line 441) | static inline __device__ uint32_t hadd(uint32_t a, uint32_t b) {
  function __device__ (line 447) | static inline __device__ uint2 hadd4(uint2 a, uint2 b) {
  function __device__ (line 456) | static inline __device__ uint2 hadd(uint2 a, uint2 b) {
  function __device__ (line 462) | static inline __device__ uint4 hadd8(uint4 a, uint4 b) {
  function __device__ (line 473) | static inline __device__ uint4 fadd4(uint4 a, uint4 b) {
  function __device__ (line 484) | static inline __device__ uint4 hadd(uint4 a, uint4 b) {
  function __device__ (line 490) | static inline __device__ float half_to_float(uint16_t h) {
  function __device__ (line 498) | static inline __device__ float2 half2_to_float2(uint32_t x) {
  function __device__ (line 514) | static inline __device__ uint16_t hfma(uint16_t a, uint16_t b, uint16_t ...
  function __device__ (line 522) | static inline __device__ uint16_t hmul(uint16_t a, uint16_t b) {
  function __device__ (line 530) | static inline __device__ float sigmoid(float x) {
  function __device__ (line 685) | inline __device__ Ldg_functor(Data_type (&fetch)[N], const void* (&ptrs)...
  function __device__ (line 690) | inline __device__ void clear(int ii) {
  function __device__ (line 695) | inline __device__ void load(int ii, bool p) {
  function __device__ (line 847) | inline __device__ void stg(void *ptr, uint8_t val) {
  function __device__ (line 853) | inline __device__ void stg(void *ptr, uint16_t val) {
  function __device__ (line 859) | inline __device__ void stg(void *ptr, uint32_t val) {
  function __device__ (line 865) | inline __device__ void stg(void *ptr, uint2 val) {
  function __device__ (line 871) | inline __device__ void stg(void *ptr, uint4 val) {
  function __device__ (line 881) | inline __device__ void sts(uint32_t ptr, uint16_t val) {
  function __device__ (line 887) | inline __device__ void sts(uint32_t ptr, uint32_t val) {
  function __device__ (line 893) | inline __device__ void sts(uint32_t ptr, uint2 val) {
  function __device__ (line 903) | inline __device__ void sts(uint32_t ptr, uint4 val) {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_dgrad_kernel_1xN_reload.h
  function namespace (line 34) | namespace fmha {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_dgrad_kernel_1xN_reload_nl.h
  function namespace (line 34) | namespace fmha {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN.h
  function namespace (line 34) | namespace fmha {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN_nl.h
  function namespace (line 35) | namespace fmha {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN_reload_v.h
  function namespace (line 34) | namespace fmha {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_kernel.h
  function namespace (line 39) | namespace fmha {
  function __device__ (line 90) | inline __device__ Noloop_traits(const int bidc)
  function move_all (line 96) | void move_all(Tiles & ... tiles) const {
  function __device__ (line 113) | inline __device__ int offset_loop_count(const int l) {
  function __device__ (line 157) | inline __device__ int offset_loop_count(const int l) {

FILE: KoSentenceT5/apex/contrib/csrc/fmha/src/fmha_utils.h
  type Data_type (line 53) | enum Data_type { DATA_TYPE_FP16, DATA_TYPE_FP32, DATA_TYPE_INT32, DATA_T...
  function set_alpha (line 57) | static inline void set_alpha( uint32_t &alpha, float norm, Data_type dty...
  function get_size_in_bytes (line 75) | static inline size_t get_size_in_bytes( size_t n, Data_type dtype ) {

FILE: KoSentenceT5/apex/contrib/csrc/groupbn/batch_norm.h
  function class (line 41) | class NhwcBatchNorm {
  function createTensorDescriptor (line 193) | void createTensorDescriptor(cudnnTensorDescriptor_t *descriptor) {
  function destroyTensorDescriptor (line 199) | void destroyTensorDescriptor(cudnnTensorDescriptor_t descriptor) {
  type StorageType (line 223) | typedef uint16_t StorageType;
  function _fwdKernelLauncher (line 258) | void _fwdKernelLauncher(cudaStream_t stream, NhwcBatchNormFwdParams params,
  function _bwdKernelLauncher (line 338) | void _bwdKernelLauncher(cudaStream_t stream, NhwcBatchNormBwdParams params,
  function smem_driven_bwd_occupancy (line 469) | static int smem_driven_bwd_occupancy(int device_id, const int max_cta_pe...
  function std (line 478) | const std::vector<size_t> NhwcBatchNorm::numWorkspaceBytes() const {
  function _setFwdParams (line 510) | void NhwcBatchNorm::_setFwdParams(NhwcBatchNormFwdParams *params) const {
  function _setFwdInferenceParams (line 534) | void NhwcBatchNorm::_setFwdInferenceParams(NhwcBatchNormFwdInferenceParams
  function _setBwdParams (line 548) | void NhwcBatchNorm::_setBwdParams(NhwcBatchNormBwdParams *params) const {
  function fwdInference (line 569) | void NhwcBatchNorm::fwdInference(cudaStream_t stream, bool use_relu) {
  function dim3 (line 612) | dim3 NhwcBatchNorm::calc_fwd_grid(int *loop, const int grid_dim_x) {
  function dim3 (line 635) | dim3 NhwcBatchNorm::calc_bwd_grid(int *loop, const int grid_dim_x) {
  function fwd (line 658) | void NhwcBatchNorm::fwd(cudaStream_t stream, bool use_relu, void* my_dat...
  function dgrad (line 697) | void NhwcBatchNorm::dgrad(cudaStream_t stream, bool use_relu, void* my_d...

FILE: KoSentenceT5/apex/contrib/csrc/groupbn/batch_norm_add_relu.h
  function class (line 41) | class NhwcBatchNormAddRelu {
  function createTensorDescriptor (line 197) | void createTensorDescriptor(cudnnTensorDescriptor_t *descriptor) {
  function destroyTensorDescriptor (line 203) | void destroyTensorDescriptor(cudnnTensorDescriptor_t descriptor) {
  type StorageType (line 228) | typedef uint16_t StorageType;
  function _fwdKernelLauncher (line 262) | void _fwdKernelLauncher(cudaStream_t stream, NhwcBatchNormFwdParams params,
  function _bwdKernelLauncher (line 332) | void _bwdKernelLauncher(cudaStream_t stream, NhwcBatchNormBwdParams params,
  function smem_driven_bwd_occupancy (line 409) | static int smem_driven_bwd_occupancy(int device_id, const int max_cta_pe...
  function std (line 418) | const std::vector<size_t> NhwcBatchNormAddRelu::numWorkspaceBytes() const {
  function _setFwdParams (line 456) | void NhwcBatchNormAddRelu::_setFwdParams(NhwcBatchNormFwdParams *params)...
  function _setFwdInferenceParams (line 480) | void NhwcBatchNormAddRelu::_setFwdInferenceParams(NhwcBatchNormFwdInfere...
  function _setBwdParams (line 494) | void NhwcBatchNormAddRelu::_setBwdParams(NhwcBatchNormBwdParams *params)...
  function fwdInference (line 515) | void NhwcBatchNormAddRelu::fwdInference(cudaStream_t stream) {
  function dim3 (line 552) | dim3 NhwcBatchNormAddRelu::calc_fwd_grid(int *loop, const int grid_dim_x) {
  function dim3 (line 575) | dim3 NhwcBatchNormAddRelu::calc_bwd_grid(int *loop, const int grid_dim_x) {
  function fwd (line 598) | void NhwcBatchNormAddRelu::fwd(cudaStream_t stream, void* my_data, void*...
  function dgrad (line 640) | void NhwcBatchNormAddRelu::dgrad(cudaStream_t stream, void* my_data, voi...

FILE: KoSentenceT5/apex/contrib/csrc/groupbn/cuda_utils.h
  function namespace (line 5) | namespace at {

FILE: KoSentenceT5/apex/contrib/csrc/groupbn/interface.cpp
  function PYBIND11_MODULE (line 154) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/groupbn/nhwc_batch_norm_kernel.h
  type T (line 43) | typedef T Type;
  type Type (line 51) | typedef int Type;
  function DEVICE_FUNCTION (line 247) | DEVICE_FUNCTION void write_to_gmem(float *gmem, int idx, const float (&s...
  function DEVICE_FUNCTION (line 253) | DEVICE_FUNCTION void write_to_gmem(float *gmem, int idx, const float (&s...
  function DEVICE_FUNCTION (line 259) | DEVICE_FUNCTION void scaled_write_to_gmem(float *gmem, int idx, const fl...
  function DEVICE_FUNCTION (line 265) | DEVICE_FUNCTION void write_to_smem(float *smem, int idx, const float (&x...
  function DEVICE_FUNCTION (line 271) | DEVICE_FUNCTION void write_to_smem(int *smem, int idx, const int (&x)[1]) {
  function DEVICE_FUNCTION (line 277) | DEVICE_FUNCTION void write_to_smem(float *smem, int idx, const float (&x...
  function DEVICE_FUNCTION (line 283) | DEVICE_FUNCTION void write_to_smem(int *smem, int idx, const int (&x)[2]) {
  function Storage (line 351) | Storage relu(Storage in) {
  function parallel_sums (line 544) | void parallel_sums(float *smem, float (&x)[ELEMENTS_PER_LDG], int nhw) {
  type ParallelSums (line 637) | struct ParallelSums
  type ParallelSums (line 650) | struct ParallelSums
  function div_up (line 661) | static inline int div_up(int m, int n) {
  function DEVICE_FUNCTION (line 668) | DEVICE_FUNCTION void inter_block_sync(int* gmem_retired_ctas, int expect...
  type NhwcBatchNormFwdInferenceParams (line 697) | struct NhwcBatchNormFwdInferenceParams {
  type NhwcBatchNormFwdParams (line 799) | struct NhwcBatchNormFwdParams {
  type PackedStorage (line 870) | typedef PackedStorage<Storage, ELEMENTS_PER_LDG> PackedStorage_;
  type typename (line 872) | typedef typename PackedStorage_::Type PackedStorageType;
  type NhwcBatchNormBwdParams (line 1388) | struct NhwcBatchNormBwdParams {
  function nhwc_batch_norm_bwd (line 1528) | void nhwc_batch_norm_bwd(NhwcBatchNormBwdParams params) {
  function nhwc_batch_norm_bwd_relu (line 1892) | void nhwc_batch_norm_bwd_relu(NhwcBatchNormBwdParams params) {
  function nhwc_batch_norm_bwd_add_relu (line 2280) | void nhwc_batch_norm_bwd_add_relu(NhwcBatchNormBwdParams params) {

FILE: KoSentenceT5/apex/contrib/csrc/layer_norm/ln_api.cpp
  function ln_fwd (line 15) | std::vector<at::Tensor> ln_fwd(const at::Tensor &x,      // BxSxhidden_size
  function ln_bwd (line 58) | std::vector<at::Tensor> ln_bwd(const at::Tensor &dw,     // BxSxhidden_size
  function PYBIND11_MODULE (line 102) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/additive_masked_softmax_dropout.cpp
  type multihead_attn (line 5) | namespace multihead_attn {
    type fused_softmax (line 6) | namespace fused_softmax {
      type additive_mask_softmax_dropout (line 7) | namespace additive_mask_softmax_dropout {
        function fwd (line 31) | std::vector<torch::Tensor> fwd(
        function bwd (line 57) | torch::Tensor bwd(
  function PYBIND11_MODULE (line 87) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/encdec_multihead_attn.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type encdec (line 5) | namespace encdec {
      type cublas_gemmex (line 6) | namespace cublas_gemmex {
        function fwd (line 43) | std::vector<torch::Tensor> fwd(
        function bwd (line 88) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 153) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/encdec_multihead_attn_norm_add.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type encdec_norm_add (line 5) | namespace encdec_norm_add {
      type cublas_gemmex (line 6) | namespace cublas_gemmex {
        function fwd (line 52) | std::vector<torch::Tensor> fwd(
        function bwd (line 105) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 194) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/layer_norm.h
  function rsqrt (line 230) | float rsqrt(float v) {
  function rsqrt (line 233) | double rsqrt(double v) {
  function float (line 256) | struct SharedMemory <float>
  function double (line 266) | struct SharedMemory <double>
  function stream (line 653) | auto stream = at::cuda::getCurrentCUDAStream().stream();

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/masked_softmax_dropout.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type fused_softmax (line 5) | namespace fused_softmax {
      type mask_softmax_dropout (line 6) | namespace mask_softmax_dropout {
        function fwd (line 31) | std::vector<torch::Tensor> fwd(
        function bwd (line 57) | torch::Tensor bwd(
  function PYBIND11_MODULE (line 89) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/philox.h
  function class (line 4) | class Philox {
  function __device__ (line 17) | __device__ inline uint4 operator()() {
  function __device__ (line 45) | __device__ inline void incr_n(unsigned long long n) {
  function __device__ (line 58) | __device__ inline void incr() {
  function mulhilo32 (line 67) | __device__ unsigned int mulhilo32(unsigned int a, unsigned int b,
  function __device__ (line 72) | __device__ inline uint4 single_round(uint4 ctr, uint2 key) {
  function __device__ (line 87) | __device__  __inline__ float4 uniform4(uint4 x) {

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/self_multihead_attn.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type self (line 5) | namespace self {
      type cublas_gemmex (line 6) | namespace cublas_gemmex {
        function fwd (line 39) | std::vector<torch::Tensor> fwd(
        function bwd (line 75) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 128) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/self_multihead_attn_bias.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type self_bias (line 5) | namespace self_bias {
      type cublas_gemmex (line 6) | namespace cublas_gemmex {
        function fwd (line 43) | std::vector<torch::Tensor> fwd(
        function bwd (line 82) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 135) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/self_multihead_attn_bias_additive_mask.cpp
  type multihead_attn (line 5) | namespace multihead_attn {
    type self_bias_additive_mask (line 6) | namespace self_bias_additive_mask {
      type cublas_gemmex (line 7) | namespace cublas_gemmex {
        function fwd (line 46) | std::vector<torch::Tensor> fwd(
        function bwd (line 86) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 139) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/self_multihead_attn_norm_add.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type self_norm_add (line 5) | namespace self_norm_add {
      type cublas_gemmex (line 6) | namespace cublas_gemmex {
        function fwd (line 47) | std::vector<torch::Tensor> fwd(
        function bwd (line 93) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 169) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/softmax.h
  function acc_t (line 139) | acc_t sum[WARP_BATCH] { 0.0f };
  function acc_t (line 363) | acc_t sum[WARP_BATCH] { 0.0f };
  function additive_masked_softmax_dropout_warp_forward (line 429) | void additive_masked_softmax_dropout_warp_forward(output_t *dst, uint8_t...
  function softmax_warp_backward (line 2244) | void softmax_warp_backward(__half *gradInput, const __half *grad, const ...
  function masked_softmax_warp_backward (line 2455) | void masked_softmax_warp_backward(__half *gradInput, const __half *grad,...

FILE: KoSentenceT5/apex/contrib/csrc/multihead_attn/strided_batched_gemm.h
  function cublasOperation_t (line 21) | cublasOperation_t convertTransToCublasOperation(char trans) {
  function CublasStridedBatchedGemm (line 31) | void CublasStridedBatchedGemm(THCState *state, char transa, char transb,...
  type cutlass (line 78) | typedef cutlass::gemm::Gemm<WmmaGemmTraits> Gemm;
  function gemm_switch_fp32accum (line 149) | void gemm_switch_fp32accum(THCState *state, char transa, char transb, lo...
  function adjustLdLevel3 (line 278) | void adjustLdLevel3(char transa, char transb, int64_t m, int64_t n, int6...
  function HgemmStridedBatched (line 312) | void HgemmStridedBatched(THCState *state, char transa, char transb, long...

FILE: KoSentenceT5/apex/contrib/csrc/optimizers/fused_adam_cuda.cpp
  function strided_check_finite (line 20) | void strided_check_finite(
  function adam (line 29) | void adam(at::Tensor & p, at::Tensor & p_copy, at::Tensor & m, at::Tenso...
  function reversible_adam (line 43) | void reversible_adam(at::Tensor & p, at::Tensor & p_copy, at::Tensor & m...
  function maybe_adam_undo (line 57) | void maybe_adam_undo(at::Tensor & overflow_flag, at::Tensor & p, at::Ten...
  function maybe_cast (line 69) | void maybe_cast(at::Tensor & overflow_flag, at::Tensor & p_in, at::Tenso...
  function PYBIND11_MODULE (line 78) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/optimizers/fused_lamb_cuda.cpp
  function PYBIND11_MODULE (line 19) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/optimizers/multi_tensor_distopt_adam.cpp
  function PYBIND11_MODULE (line 17) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/optimizers/multi_tensor_distopt_lamb.cpp
  function PYBIND11_MODULE (line 31) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/transducer/transducer_joint.cpp
  function transducer_joint_forward (line 33) | std::vector<torch::Tensor> transducer_joint_forward(
  function transducer_joint_backward (line 67) | std::vector<torch::Tensor> transducer_joint_backward(
  function PYBIND11_MODULE (line 95) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/transducer/transducer_loss.cpp
  function transducer_loss_forward (line 35) | std::vector<torch::Tensor> transducer_loss_forward(
  function transducer_loss_backward (line 65) | torch::Tensor transducer_loss_backward(
  function PYBIND11_MODULE (line 106) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/csrc/xentropy/interface.cpp
  function softmax_xentropy_forward (line 24) | std::vector<at::Tensor> softmax_xentropy_forward(
  function softmax_xentropy_backward (line 35) | at::Tensor softmax_xentropy_backward(
  function PYBIND11_MODULE (line 49) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSentenceT5/apex/contrib/fmha/fmha.py
  class FMHAFun (line 33) | class FMHAFun(torch.autograd.Function):
    method forward (line 35) | def forward(ctx, qkv, cu_seqlens, p_dropout, max_s, is_training):
    method backward (line 48) | def backward(ctx, dout):
  class FMHA (line 58) | class FMHA(torch.nn.Module):
    method __init__ (line 60) | def __init__(self, config):
    method forward (line 70) | def forward(self, qkv, cu_seqlens, max_s, is_training=True):

FILE: KoSentenceT5/apex/contrib/groupbn/batch_norm.py
  class bn_NHWC_impl (line 7) | class bn_NHWC_impl(torch.autograd.Function):
    method forward (line 9) | def forward(ctx, x, s, b, rm, riv, mini_m, mini_riv, ret_cta, mom, eps...
    method backward (line 32) | def backward(ctx, grad_y):
  class bn_addrelu_NHWC_impl (line 53) | class bn_addrelu_NHWC_impl(torch.autograd.Function):
    method forward (line 55) | def forward(ctx, x, z, s, b, rm, riv, mini_m, mini_riv, grid_dim_y, re...
    method backward (line 78) | def backward(ctx, grad_y):
  class BatchNorm2d_NHWC (line 101) | class BatchNorm2d_NHWC(_BatchNorm):
    method __init__ (line 103) | def __init__(self, num_features, fuse_relu=False, bn_group=1, max_cta_...
    method forward (line 196) | def forward(self, x, z=None):
    method __del__ (line 219) | def __del__(self):

FILE: KoSentenceT5/apex/contrib/layer_norm/layer_norm.py
  class FastLayerNormFN (line 6) | class FastLayerNormFN(torch.autograd.Function):
    method forward (line 8) | def forward(ctx, x, gamma, beta, epsilon):
    method backward (line 19) | def backward(ctx, dy):
  class FastLayerNorm (line 31) | class FastLayerNorm(torch.nn.Module):
    method __init__ (line 32) | def __init__(self, hidden_size, eps=1e-5):
    method reset_parameters (line 39) | def reset_parameters(self):
    method forward (line 43) | def forward(self, x):

FILE: KoSentenceT5/apex/contrib/multihead_attn/encdec_multihead_attn.py
  function jit_dropout_add (line 19) | def jit_dropout_add(x, residual, prob, is_training):
  class EncdecMultiheadAttn (line 26) | class EncdecMultiheadAttn(nn.Module):
    method __init__ (line 31) | def __init__(self, embed_dim, num_heads, dropout=0., bias=False, inclu...
    method reset_parameters (line 79) | def reset_parameters(self):
    method forward (line 98) | def forward(self, query, key, value, key_padding_mask=None, need_weigh...

FILE: KoSentenceT5/apex/contrib/multihead_attn/encdec_multihead_attn_func.py
  class EncdecAttnFunc (line 5) | class EncdecAttnFunc(torch.autograd.Function):
    method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, scale, inputs_q, i...
    method backward (line 135) | def backward(ctx, output_grads):

FILE: KoSentenceT5/apex/contrib/multihead_attn/fast_encdec_multihead_attn_func.py
  class FastEncdecAttnFunc (line 5) | class FastEncdecAttnFunc(torch.autograd.Function):
    method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, inputs_q, inputs_k...
    method backward (line 50) | def backward(ctx, output_grads):

FILE: KoSentenceT5/apex/contrib/multihead_attn/fast_encdec_multihead_attn_norm_add_func.py
  class FastEncdecAttnNormAddFunc (line 12) | class FastEncdecAttnNormAddFunc(torch.autograd.Function):
    method forward (line 14) | def forward(ctx, use_time_mask, is_training, heads, inputs_q, inputs_k...
    method backward (line 69) | def backward(ctx, output_grads):

FILE: KoSentenceT5/apex/contrib/multihead_attn/fast_self_multihead_attn_func.py
  class FastSelfAttnFunc (line 6) | class FastSelfAttnFunc(torch.autograd.Function) :
    method forward (line 8) | def forward(ctx, use_time_mask, is_training, heads, inputs, input_weig...
    method backward (line 120) | def backward(ctx, output_grads):

FILE: KoSentenceT5/apex/contrib/multihead_attn/fast_self_multihead_attn_norm_add_func.py
  class FastSelfAttnNormAddFunc (line 5) | class FastSelfAttnNormAddFunc(torch.autograd.Function):
    method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, inputs, lyr_nrm_ga...
    method backward (line 56) | def backward(ctx, output_grads):

FILE: KoSentenceT5/apex/contrib/multihead_attn/mask_softmax_dropout_func.py
  class MaskSoftmaxDropout (line 6) | class MaskSoftmaxDropout(torch.autograd.Function) :
    method forward (line 8) | def forward(ctx, is_training, heads, inputs, pad_mask, mask_additive, ...
    method backward (line 51) | def backward(ctx, output_grads):

FILE: KoSentenceT5/apex/contrib/multihead_attn/self_multihead_attn.py
  function jit_dropout_add (line 19) | def jit_dropout_add(x, residual, prob, is_training):
  class SelfMultiheadAttn (line 26) | class SelfMultiheadAttn(nn.Module):
    method __init__ (line 31) | def __init__(self, embed_dim, num_heads, dropout=0., bias=False, inclu...
    method reset_parameters (line 97) | def reset_parameters(self):
    method forward (line 124) | def forward(self, query, key, value, key_padding_mask=None, need_weigh...

FILE: KoSentenceT5/apex/contrib/multihead_attn/self_multihead_attn_func.py
  class SelfAttnFunc (line 4) | class SelfAttnFunc(torch.autograd.Function):
    method forward (line 6) | def forward(ctx, use_time_mask, is_training, heads, scale, inputs,
    method backward (line 121) | def backward(ctx, output_grads):

FILE: KoSentenceT5/apex/contrib/optimizers/distributed_fused_adam.py
  class DistributedFusedAdam (line 9) | class DistributedFusedAdam(torch.optim.Optimizer):
    method __init__ (line 55) | def __init__(self, params,
    method _first_step_init (line 128) | def _first_step_init(self):
    method _init_everything (line 373) | def _init_everything(self):
    method set_last_step (line 378) | def set_last_step(self, last_step):
    method _get_flush_block (line 381) | def _get_flush_block(self):
    method _pipeline_block_reductions (line 397) | def _pipeline_block_reductions(self, block_id):
    method __launch_step_kernel (line 443) | def __launch_step_kernel(self):
    method _pipeline_step (line 469) | def _pipeline_step(self):
    method _flatten_grad_mt (line 479) | def _flatten_grad_mt(self, scale):
    method _do_overlapped_reduction (line 489) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of...
    method set_global_scale (line 504) | def set_global_scale(self, global_scale):
    method global_scale (line 510) | def global_scale(self):
    method has_overflow (line 514) | def has_overflow(self):
    method peek_overflow (line 523) | def peek_overflow(self):
    method strided_check_finite (line 529) | def strided_check_finite(self, output_params, stride=1, start=-1, end=...
    method L2_grad_norm (line 545) | def L2_grad_norm(self):
    method complete_reductions (line 552) | def complete_reductions(self):
    method step (line 577) | def step(self, closure=None):
    method state_dict (line 598) | def state_dict(self):
    method load_state_dict (line 615) | def load_state_dict(self, state_dict):

FILE: KoSentenceT5/apex/contrib/optimizers/distributed_fused_adam_v2.py
  class DistributedFusedAdamV2 (line 7) | class DistributedFusedAdamV2(torch.optim.Optimizer):
    method __init__ (line 43) | def __init__(self, params,
    method set_last_step (line 351) | def set_last_step(self, last_step):
    method _get_flush_block (line 354) | def _get_flush_block(self):
    method _pipeline_block_reductions (line 370) | def _pipeline_block_reductions(self, block_id):
    method __launch_step_kernel (line 406) | def __launch_step_kernel(self, p, p_copy, m, v, g):
    method _pipeline_block_step (line 425) | def _pipeline_block_step(self, block_id):
    method _pipeline_step (line 445) | def _pipeline_step(self):
    method _flatten_grad_mt (line 460) | def _flatten_grad_mt(self, scale):
    method _do_overlapped_reduction (line 470) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of...
    method set_global_scale (line 487) | def set_global_scale(self, global_scale):
    method global_scale (line 493) | def global_scale(self):
    method has_overflow (line 497) | def has_overflow(self):
    method peek_overflow (line 506) | def peek_overflow(self):
    method strided_check_finite (line 512) | def strided_check_finite(self, output_params, stride=1, start=-1, end=...
    method L2_grad_norm (line 528) | def L2_grad_norm(self):
    method complete_reductions (line 535) | def complete_reductions(self):
    method revert_step (line 560) | def revert_step(self):
    method step (line 586) | def step(self, closure=None, skip_overflow_check=False):

FILE: KoSentenceT5/apex/contrib/optimizers/distributed_fused_adam_v3.py
  class DistributedFusedAdamV3 (line 7) | class DistributedFusedAdamV3(torch.optim.Optimizer):
    method __init__ (line 43) | def __init__(self, params,
    method has_overflow (line 196) | def has_overflow(self):
    method set_last_step (line 199) | def set_last_step(self, last_step):
    method _get_flush_block (line 202) | def _get_flush_block(self):
    method __launch_step_kernel (line 218) | def __launch_step_kernel(self, p, p_copy, m, v, g):
    method _flatten_grad_mt (line 237) | def _flatten_grad_mt(self, scale):
    method _do_overlapped_reduction (line 247) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of...
    method set_global_scale (line 268) | def set_global_scale(self, global_scale):
    method global_scale (line 274) | def global_scale(self):
    method L2_grad_norm (line 278) | def L2_grad_norm(self):
    method complete_reductions (line 282) | def complete_reductions(self):
    method step (line 306) | def step(self, closure=None, skip_overflow_check=False):

FILE: KoSentenceT5/apex/contrib/optimizers/distributed_fused_lamb.py
  class DistributedFusedLAMB (line 9) | class DistributedFusedLAMB(torch.optim.Optimizer):
    class AtomicCounter (line 70) | class AtomicCounter(object):
      method __init__ (line 71) | def __init__(self):
      method add (line 77) | def add(self, idx):
    method __init__ (line 82) | def __init__(self, params,
    method _lazy_init_stage1 (line 210) | def _lazy_init_stage1(self):
    method _lazy_init_stage2 (line 330) | def _lazy_init_stage2(self):
    method set_is_accumulation_step (line 451) | def set_is_accumulation_step(self, is_accumulation_step):
    method set_last_step (line 454) | def set_last_step(self, last_step):
    method _get_flush_block (line 457) | def _get_flush_block(self):
    method _pipeline_block_reductions (line 473) | def _pipeline_block_reductions(self, block_id):
    method __compute_contrib_param_norm (line 556) | def __compute_contrib_param_norm(self):
    method __compute_contrib_update_norm (line 569) | def __compute_contrib_update_norm(self):
    method _pipeline_step (line 577) | def _pipeline_step(self):
    method _flatten_grad_mt (line 633) | def _flatten_grad_mt(self, scale):
    method _do_overlapped_reduction (line 651) | def _do_overlapped_reduction(self, param_i, param):
    method set_global_scale (line 667) | def set_global_scale(self, global_scale):
    method global_scale (line 673) | def global_scale(self):
    method L2_grad_norm (line 677) | def L2_grad_norm(self):
    method complete_reductions (line 681) | def complete_reductions(self):
    method step (line 704) | def step(self, closure=None, grad_scaler=None):
    method state_dict (line 740) | def state_dict(self):
    method load_state_dict (line 757) | def load_state_dict(self, state_dict):

FILE: KoSentenceT5/apex/contrib/optimizers/fp16_optimizer.py
  class FP16_Optimizer (line 4) | class FP16_Optimizer(object):
    method __init__ (line 25) | def __init__(self,
    method zero_grad (line 79) | def zero_grad(self, set_grads_to_None=True):
    method step (line 94) | def step(self, closure=None):
    method backward (line 132) | def backward(self, loss):
    method _update_scale (line 142) | def _update_scale(self, skip):
    method _get_state (line 161) | def _get_state(self):
    method _set_state (line 164) | def _set_state(self, value):
    method _get_param_groups (line 171) | def _get_param_groups(self):
    method _set_param_groups (line 174) | def _set_param_groups(self, value):
    method state_dict (line 179) | def state_dict(self):
    method load_state_dict (line 202) | def load_state_dict(self, state_dict):

FILE: KoSentenceT5/apex/contrib/optimizers/fused_adam.py
  class FusedAdam (line 6) | class FusedAdam(torch.optim.Optimizer):
    method __init__ (line 38) | def __init__(self, params,
    method step (line 64) | def step(self, closure=None, grads=None, output_params=None, scale=1.,...

FILE: KoSentenceT5/apex/contrib/optimizers/fused_lamb.py
  class FusedLAMB (line 6) | class FusedLAMB(torch.optim.Optimizer):
    method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True,
    method zero_grad (line 87) | def zero_grad(self):
    method step (line 95) | def step(self, closure=None):

FILE: KoSentenceT5/apex/contrib/optimizers/fused_sgd.py
  class FusedSGD (line 7) | class FusedSGD(Optimizer):
    method __init__ (line 66) | def __init__(self, params, lr=required, momentum=0, dampening=0,
    method __setstate__ (line 93) | def __setstate__(self, state):
    method get_momentums (line 98) | def get_momentums(self, params):
    method step (line 115) | def step(self, closure=None, grads=None, output_params=None, scale=1.,...

FILE: KoSentenceT5/apex/contrib/sparsity/asp.py
  function eligible_modules (line 12) | def eligible_modules(model, whitelist_layer_types, allowed_layer_names, ...
  class ASP (line 21) | class ASP:
    method init_model_for_pruning (line 29) | def init_model_for_pruning(cls, model, mask_calculator="m4n2_1d",
    method init_optimizer_for_pruning (line 127) | def init_optimizer_for_pruning(cls, optimizer):
    method compute_sparse_masks (line 155) | def compute_sparse_masks(cls):
    method restore_pruned_weights (line 176) | def restore_pruned_weights(cls):
    method is_sparsity_enabled (line 191) | def is_sparsity_enabled(cls):
    method prune_trained_model (line 212) | def prune_trained_model(cls, model, optimizer):

FILE: KoSentenceT5/apex/contrib/sparsity/sparse_masklib.py
  function fill (line 9) | def fill(x):
  function reshape_1d (line 13) | def reshape_1d(matrix, m):
  function compute_valid_1d_patterns (line 25) | def compute_valid_1d_patterns(m,n):
  function mn_1d_best (line 37) | def mn_1d_best(matrix, m, n):
  function m4n2_1d (line 49) | def m4n2_1d(mat, density):
  function mn_2d_greedy (line 67) | def mn_2d_greedy(matrix, m, n):
  function m4n2_2d_greedy (line 98) | def m4n2_2d_greedy(mat, density):
  function compute_valid_2d_patterns (line 103) | def compute_valid_2d_patterns(m,n):
  function mn_2d_best (line 122) | def mn_2d_best(matrix, m, n):
  function m4n2_2d_best (line 140) | def m4n2_2d_best(mat, density):
  function create_mask (line 145) | def create_mask(tensor, pattern="m4n2_1d", density=0.5):

FILE: KoSentenceT5/apex/contrib/sparsity/test/checkpointing_test_part1.py
  function build_model (line 7) | def build_model(args):
  function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step):
  function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps):
  function main (line 38) | def main(args):
  class Args (line 76) | class Args:

FILE: KoSentenceT5/apex/contrib/sparsity/test/checkpointing_test_part2.py
  function build_model (line 7) | def build_model(args):
  function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step):
  function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps):
  function main (line 38) | def main(step, args, model_state_dict, optimizer_state_dict):
  class Args (line 61) | class Args:

FILE: KoSentenceT5/apex/contrib/sparsity/test/checkpointing_test_reference.py
  function build_model (line 11) | def build_model(args):
  function train_step (line 25) | def train_step(args, model, optimizer, input_batch, target_batch, step):
  function train_loop (line 35) | def train_loop(args, model, optimizer, step, num_steps):
  function main (line 42) | def main(args):
  class Args (line 79) | class Args:

FILE: KoSentenceT5/apex/contrib/sparsity/test/toy_problem.py
  function build_model (line 7) | def build_model(args):
  function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step):
  function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps):
  function main (line 38) | def main(args):
  class Args (line 75) | class Args:

FILE: KoSentenceT5/apex/contrib/test/fmha/test_fmha.py
  function py_mha (line 37) | def py_mha(qkv, amask, b, s, h, d):
  class TestFMHA (line 52) | class TestFMHA(unittest.TestCase):
    method run_test (line 54) | def run_test(self, s, b):
    method test_128 (line 106) | def test_128(self):
    method test_256 (line 109) | def test_256(self):
    method test_384 (line 112) | def test_384(self):
    method test_512 (line 115) | def test_512(self):

FILE: KoSentenceT5/apex/contrib/test/layer_norm/test_fast_layer_norm.py
  class GPUTimer (line 12) | class GPUTimer:
    method __init__ (line 13) | def __init__(self, stream):
    method start (line 17) | def start(self):
    method stop (line 19) | def stop(self):
    method sync (line 21) | def sync(self):
    method millis (line 23) | def millis(self):
  function size_in_bytes (line 26) | def size_in_bytes(t):
  function abs_err (line 28) | def abs_err(x, y):
  class TestFastLayerNorm (line 35) | class TestFastLayerNorm(unittest.TestCase):
    method setUp (line 37) | def setUp(self, seed=1234):
    method test_ln_fp32 (line 42) | def test_ln_fp32(self):
    method test_ln_fp16 (line 44) | def test_ln_fp16(self):
    method run_test_layer_norm (line 47) | def run_test_layer_norm(self, dtype, atol, rtol=1e-5):
    method test_performance (line 94) | def test_performance(self):

FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_encdec_multihead_attn.py
  class EncdecMultiheadAttnTest (line 7) | class EncdecMultiheadAttnTest(unittest.TestCase):
    method setUp (line 8) | def setUp(self, seed=1234):
    method test_encdec_multihead_attn (line 49) | def test_encdec_multihead_attn(self) :
    method test_encdec_multihead_attn_time_mask (line 76) | def test_encdec_multihead_attn_time_mask(self) :
    method test_encdec_multihead_attn_pad_mask (line 105) | def test_encdec_multihead_attn_pad_mask(self) :

FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_encdec_multihead_attn_norm_add.py
  class EncdecMultiheadAttnNormAddTest (line 7) | class EncdecMultiheadAttnNormAddTest(unittest.TestCase):
    method setUp (line 8) | def setUp(self, seed=1234):
    method test_encdec_multihead_attn_norm_add (line 49) | def test_encdec_multihead_attn_norm_add(self) :

FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_fast_self_multihead_attn_bias.py
  class SelfMultiheadAttnTest (line 7) | class SelfMultiheadAttnTest(unittest.TestCase):
    method setUp (line 8) | def setUp(self, seed=1234):
    method test_self_multihead_attn_additive_mask (line 48) | def test_self_multihead_attn_additive_mask(self) :

FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_mha_fused_softmax.py
  class FusedSoftmaxTest (line 6) | class FusedSoftmaxTest(unittest.TestCase):
    method setUp (line 7) | def setUp(self, seed=1234):
    method test_fused_softmax (line 24) | def test_fused_softmax(self) :

FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_self_multihead_attn.py
  class SelfMultiheadAttnTest (line 7) | class SelfMultiheadAttnTest(unittest.TestCase):
    method setUp (line 8) | def setUp(self, seed=1234):
    method test_self_multihead_attn (line 45) | def test_self_multihead_attn(self) :
    method test_self_multihead_attn_time_mask (line 71) | def test_self_multihead_attn_time_mask(self) :
    method test_self_multihead_attn_pad_mask (line 100) | def test_self_multihead_attn_pad_mask(self) :

FILE: KoSentenceT5/apex/contrib/test/multihead_attn/test_self_multihead_attn_norm_add.py
  class SelfMultiheadAttnNormAddTest (line 7) | class SelfMultiheadAttnNormAddTest(unittest.TestCase):
    method setUp (line 8) | def setUp(self, seed=1234):
    method test_self_multihead_attn_norm_add (line 45) | def test_self_multihead_attn_norm_add(self) :

FILE: KoSentenceT5/apex/contrib/test/test_label_smoothing.py
  function label_smoothing_raw (line 10) | def label_smoothing_raw(x, target, padding_idx, smoothing):
  function label_smoothing_opt_1 (line 20) | def label_smoothing_opt_1(x, target, padding_idx, smoothing):
  class LabelSmoothingTest (line 30) | class LabelSmoothingTest(unittest.TestCase):
    method setUp (line 31) | def setUp(self, seed=1234):
    method gen_test_inputs (line 40) | def gen_test_inputs(self, N, T, H, smoothing, padding_idx):
    method print_max_diff_elem (line 50) | def print_max_diff_elem(self, ref, tst):
    method test_label_smoothing_function (line 57) | def test_label_smoothing_function(self):
    method test_label_smoothing_perf (line 91) | def test_label_smoothing_perf(self):

FILE: KoSentenceT5/apex/contrib/test/transducer/test_transducer_joint.py
  class TransducerJointTest (line 6) | class TransducerJointTest(unittest.TestCase):
    method setUp (line 7) | def setUp(self, seed=1234):
    method gen_input (line 11) | def gen_input(self, for_vector_kernel):
    method _pack (line 41) | def _pack(self, x, f_len, g_len):
    method _unpack (line 53) | def _unpack(self, x, f_len, g_len):
    method run_transducer_joint (line 67) | def run_transducer_joint(self, for_vector_kernel, pack_output, relu, d...
    method test_transducer_joint (line 118) | def test_transducer_joint(self):
    method test_transducer_joint_vec (line 121) | def test_transducer_joint_vec(self):
    method test_transducer_joint_pack (line 124) | def test_transducer_joint_pack(self):
    method test_transducer_joint_vec_pack (line 127) | def test_transducer_joint_vec_pack(self):
    method test_transducer_joint_relu (line 130) | def test_transducer_joint_relu(self):
    method test_transducer_joint_vec_relu (line 133) | def test_transducer_joint_vec_relu(self):
    method test_transducer_joint_pack_relu (line 136) | def test_transducer_joint_pack_relu(self):
    method test_transducer_joint_vec_pack_relu (line 139) | def test_transducer_joint_vec_pack_relu(self):
    method test_transducer_joint_relu_dropout (line 142) | def test_transducer_joint_relu_dropout(self):
    method test_transducer_joint_vec_relu_dropout (line 145) | def test_transducer_joint_vec_relu_dropout(self):
    method test_transducer_joint_pack_relu_dropout (line 148) | def test_transducer_joint_pack_relu_dropout(self):
    method test_transducer_joint_vec_pack_relu_dropout (line 151) | def test_transducer_joint_vec_pack_relu_dropout(self):

FILE: KoSentenceT5/apex/contrib/test/transducer/test_transducer_loss.py
  class TransducerLossTest (line 6) | class TransducerLossTest(unittest.TestCase):
    method setUp (line 7) | def setUp(self, seed=1234):
    method gen_input (line 11) | def gen_input(self, scalar_t, for_vector_kernel):
    method _pack (line 41) | def _pack(self, x):
    method _unpack (line 52) | def _unpack(self, x):
    method run_transducer_loss (line 64) | def run_transducer_loss(self, scalar_t, fuse_softmax_backward, packed_...
    method test_transducer_loss_fp32 (line 90) | def test_transducer_loss_fp32(self):
    method test_transducer_loss_fp16 (line 98) | def test_transducer_loss_fp16(self):
    method test_transducer_loss_fp16_backward_fusion (line 106) | def test_transducer_loss_fp16_backward_fusion(self):
    method test_transducer_loss_fp16_backward_fusion_packed (line 114) | def test_transducer_loss_fp16_backward_fusion_packed(self):
    method test_transducer_loss_fp16_backward_fusion_packed_vec (line 122) | def test_transducer_loss_fp16_backward_fusion_packed_vec(self):

FILE: KoSentenceT5/apex/contrib/test/transducer/transducer_ref.py
  function transducer_loss_reference (line 5) | def transducer_loss_reference(x, label, f_len, y_len, blank_idx, loss_gr...
  function transducer_joint_reference (line 79) | def transducer_joint_reference(f, g, h_grad, f_len, g_len, pack_output, ...

FILE: KoSentenceT5/apex/contrib/transducer/transducer.py
  class TransducerJoint (line 5) | class TransducerJoint(torch.nn.Module):
    method __init__ (line 27) | def __init__(self, pack_output=False, relu=False, dropout=False, opt=1...
    method forward (line 43) | def forward(self, f, g, f_len, g_len, batch_offset=None, packed_batch=0):
  class TransducerLoss (line 68) | class TransducerLoss(torch.nn.Module):
    method __init__ (line 81) | def __init__(self, fuse_softmax_backward=True, opt=1, packed_input=Fal...
    method forward (line 89) | def forward(self, x, label, f_len, y_len, blank_idx, batch_offset=None...
  class TransducerLossFunc (line 127) | class TransducerLossFunc(torch.autograd.Function):
    method forward (line 129) | def forward(ctx, x, label, f_len, y_len, batch_offset, max_f_len, blan...
    method backward (line 149) | def backward(ctx, loss_grad):
  class TransducerJointFunc (line 158) | class TransducerJointFunc(torch.autograd.Function):
    method forward (line 160) | def forward(ctx, f, g, f_len, g_len, pack_output, relu, dropout, batch...
    method backward (line 180) | def backward(ctx, loss_grad):

FILE: KoSentenceT5/apex/contrib/xentropy/softmax_xentropy.py
  class SoftmaxCrossEntropyLoss (line 4) | class SoftmaxCrossEntropyLoss(torch.autograd.Function):
    method forward (line 6) | def forward(ctx, logits, labels, smoothing=0.0, padding_idx=0, half_to...
    method backward (line 18) | def backward(ctx, grad_loss):

FILE: KoSentenceT5/apex/fp16_utils/fp16_optimizer.py
  class FP16_Optimizer (line 13) | class FP16_Optimizer(object):
    method __init__ (line 14) | def __init__(self,
    method maybe_print (line 110) | def maybe_print(self, msg):
    method __getstate__ (line 114) | def __getstate__(self):
    method __setstate__ (line 117) | def __setstate__(self, state):
    method zero_grad (line 120) | def zero_grad(self, set_grads_to_None=False):
    method _master_params_to_model_params (line 160) | def _master_params_to_model_params(self):
    method clip_master_grads (line 185) | def clip_master_grads(self, max_norm, norm_type=2):
    method state_dict (line 209) | def state_dict(self):
    method load_state_dict (line 230) | def load_state_dict(self, state_dict):
    method step (line 272) | def step(self, closure=None): # could add clip option.
    method _step_with_closure (line 334) | def _step_with_closure(self, closure):
    method backward (line 373) | def backward(self, loss, update_master_grads=True, retain_graph=False):
    method update_master_grads (line 436) | def update_master_grads(self):
    method inspect_master_grad_data (line 493) | def inspect_master_grad_data(self):
    method _get_loss_scale (line 528) | def _get_loss_scale(self):
    method _set_loss_scale (line 531) | def _set_loss_scale(self, value):
    method _get_state (line 537) | def _get_state(self):
    method _set_state (line 540) | def _set_state(self, value):
    method _get_param_groups (line 547) | def _get_param_groups(self):
    method _set_param_groups (line 550) | def _set_param_groups(self, value):

FILE: KoSentenceT5/apex/fp16_utils/fp16util.py
  class tofp16 (line 7) | class tofp16(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method forward (line 18) | def forward(self, input):
  function BN_convert_float (line 22) | def BN_convert_float(module):
  function network_to_half (line 35) | def network_to_half(network):
  function convert_module (line 44) | def convert_module(module, dtype):
  function convert_network (line 60) | def convert_network(network, dtype):
  class FP16Model (line 73) | class FP16Model(nn.Module):
    method __init__ (line 78) | def __init__(self, network):
    method forward (line 82) | def forward(self, *inputs):
  function backwards_debug_hook (line 87) | def backwards_debug_hook(grad):
  function prep_param_lists (line 90) | def prep_param_lists(model, flat_master=False):
  function model_grads_to_master_grads (line 136) | def model_grads_to_master_grads(model_params, master_params, flat_master...
  function master_params_to_model_params (line 158) | def master_params_to_model_params(model_params, master_params, flat_mast...
  function to_python_float (line 176) | def to_python_float(t):

FILE: KoSentenceT5/apex/fp16_utils/loss_scaler.py
  function to_python_float (line 4) | def to_python_float(t):
  class LossScaler (line 10) | class LossScaler:
    method __init__ (line 22) | def __init__(self, scale=1):
    method has_overflow (line 26) | def has_overflow(self, params):
    method _has_inf_or_nan (line 30) | def _has_inf_or_nan(x):
    method update_scale (line 33) | def update_scale(self, overflow):
    method loss_scale (line 37) | def loss_scale(self):
    method scale_gradient (line 40) | def scale_gradient(self, module, grad_in, grad_out):
    method backward (line 43) | def backward(self, loss, retain_graph=False):
  class DynamicLossScaler (line 47) | class DynamicLossScaler:
    method __init__ (line 73) | def __init__(self,
    method has_overflow (line 84) | def has_overflow(self, params):
    method _has_inf_or_nan (line 92) | def _has_inf_or_nan(x):
    method update_scale (line 113) | def update_scale(self, overflow):
    method loss_scale (line 124) | def loss_scale(self):
    method scale_gradient (line 127) | def scale_gradient(self, module, grad_in, grad_out):
    method backward (line 130) | def backward(self, loss, retain_graph=False):

FILE: KoSentenceT5/apex/mlp/mlp.py
  class MlpFunction (line 8) | class MlpFunction(torch.autograd.Function):
    method forward (line 10) | def forward(ctx, bias, activation, *args):
    method backward (line 19) | def backward(ctx, grad_o):
  class MLP (line 26) | class MLP(torch.nn.Module):
    method __init__ (line 34) | def __init__(self, mlp_sizes, bias=True, activation='relu'):
    method reset_parameters (line 64) | def reset_parameters(self):
    method forward (line 74) | def forward(self, input):
    method extra_repr (line 77) | def extra_repr(self):

FILE: KoSentenceT5/apex/multi_tensor_apply/multi_tensor_apply.py
  class MultiTensorApply (line 3) | class MultiTensorApply(object):
    method __init__ (line 7) | def __init__(self, chunk_size):
    method check_avail (line 16) | def check_avail(self):
    method __call__ (line 24) | def __call__(self, op, noop_flag_buffer, tensor_lists, *args):

FILE: KoSentenceT5/apex/normalization/fused_layer_norm.py
  class FusedLayerNormAffineFunction (line 12) | class FusedLayerNormAffineFunction(torch.autograd.Function):
    method forward (line 15) | def forward(ctx, input, weight, bias, normalized_shape, eps):
    method backward (line 30) | def backward(ctx, grad_output):
  class FusedLayerNormFunction (line 39) | class FusedLayerNormFunction(torch.autograd.Function):
    method forward (line 42) | def forward(ctx, input, normalized_shape, eps):
    method backward (line 55) | def backward(ctx, grad_output):
  function fused_layer_norm_affine (line 64) | def fused_layer_norm_affine(input, normalized_shape, weight, bias, eps=1...
  function fused_layer_norm (line 67) | def fused_layer_norm(input, normalized_shape, eps=1e-6):
  class FusedLayerNorm (line 70) | class FusedLayerNorm(torch.nn.Module):
    method __init__ (line 129) | def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
    method reset_parameters (line 148) | def reset_parameters(self):
    method forward (line 153) | def forward(self, input):
    method extra_repr (line 163) | def extra_repr(self):

FILE: KoSentenceT5/apex/optimizers/fused_adagrad.py
  class FusedAdagrad (line 5) | class FusedAdagrad(torch.optim.Optimizer):
    method __init__ (line 43) | def __init__(self, params, lr=1e-2, eps=1e-10,
    method zero_grad (line 59) | def zero_grad(self):
    method step (line 67) | def step(self, closure=None):

FILE: KoSentenceT5/apex/optimizers/fused_adam.py
  class FusedAdam (line 4) | class FusedAdam(torch.optim.Optimizer):
    method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True,
    method zero_grad (line 82) | def zero_grad(self):
    method step (line 90) | def step(self, closure=None, grads=None, output_params=None, scale=Non...

FILE: KoSentenceT5/apex/optimizers/fused_lamb.py
  class FusedLAMB (line 4) | class FusedLAMB(torch.optim.Optimizer):
    method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True,
    method zero_grad (line 88) | def zero_grad(self):
    method step (line 96) | def step(self, closure=None):

FILE: KoSentenceT5/apex/optimizers/fused_novograd.py
  class FusedNovoGrad (line 4) | class FusedNovoGrad(torch.optim.Optimizer):
    method __init__ (line 67) | def __init__(self, params, lr=1e-3, bias_correction=True,
    method zero_grad (line 92) | def zero_grad(self):
    method load_state_dict (line 100) | def load_state_dict(self, state_dict):
    method step (line 108) | def step(self, closure=None):

FILE: KoSentenceT5/apex/optimizers/fused_sgd.py
  class FusedSGD (line 6) | class FusedSGD(Optimizer):
    method __init__ (line 76) | def __init__(self, params, lr=required, momentum=0, dampening=0,
    method __setstate__ (line 108) | def __setstate__(self, state):
    method zero_grad (line 113) | def zero_grad(self):
    method get_momentums (line 121) | def get_momentums(self, params):
    method step (line 138) | def step(self, closure=None):

FILE: KoSentenceT5/apex/parallel/LARC.py
  class LARC (line 5) | class LARC(object):
    method __init__ (line 39) | def __init__(self, optimizer, trust_coefficient=0.02, clip=True, eps=1...
    method __getstate__ (line 45) | def __getstate__(self):
    method __setstate__ (line 48) | def __setstate__(self, state):
    method state (line 52) | def state(self):
    method __repr__ (line 55) | def __repr__(self):
    method param_groups (line 59) | def param_groups(self):
    method param_groups (line 63) | def param_groups(self, value):
    method state_dict (line 66) | def state_dict(self):
    method load_state_dict (line 69) | def load_state_dict(self, state_dict):
    method zero_grad (line 72) | def zero_grad(self):
    method add_param_group (line 75) | def add_param_group(self, param_group):
    method step (line 78) | def step(self):

FILE: KoSentenceT5/apex/parallel/__init__.py
  function convert_syncbn_model (line 21) | def convert_syncbn_model(module, process_group=None, channel_last=False):
  function create_syncbn_process_group (line 58) | def create_syncbn_process_group(group_size):

FILE: KoSentenceT5/apex/parallel/distributed.py
  function import_flatten_impl (line 13) | def import_flatten_impl():
  function flatten (line 25) | def flatten(bucket):
  function unflatten (line 30) | def unflatten(coalesced, bucket):
  function apply_flat_dist_call (line 36) | def apply_flat_dist_call(bucket, call, extra_args=None):
  function split_half_float_double (line 51) | def split_half_float_double(tensors):
  function split_by_type (line 60) | def split_by_type(tensors):
  function flat_dist_call (line 70) | def flat_dist_call(tensors, call, extra_args=None):
  function extract_tensors (line 78) | def extract_tensors(maybe_tensor, tensor_list):
  class Reducer (line 89) | class Reducer(object):
    method __init__ (line 111) | def __init__(self, module_or_grads_list):
    method reduce (line 121) | def reduce(self):
  class DistributedDataParallel (line 129) | class DistributedDataParallel(Module):
    method __init__ (line 162) | def __init__(self,
    method __setstate__ (line 256) | def __setstate__(self, state):
    method __getstate__ (line 268) | def __getstate__(self):
    method enable_allreduce (line 275) | def enable_allreduce(self):
    method disable_allreduce (line 278) | def disable_allreduce(self):
    method sync_bucket_structure (line 283) | def sync_bucket_structure(self):
    method create_hooks (line 319) | def create_hooks(self):
    method _stream_this_bucket (line 411) | def _stream_this_bucket(self, bucket_idx):
    method _event_this_bucket (line 418) | def _event_this_bucket(self, bucket_idx):
    method allreduce_bucket (line 425) | def allreduce_bucket(self, bucket, bucket_idx, force_default_stream):
    method allreduce_maybe_retain (line 478) | def allreduce_maybe_retain(self, bucket, bucket_idx, force_default_str...
    method allreduce_fallback (line 491) | def allreduce_fallback(self):
    method comm_ready_buckets (line 513) | def comm_ready_buckets(self, param):
    method forward (line 559) | def forward(self, *inputs, **kwargs):

FILE: KoSentenceT5/apex/parallel/multiproc.py
  function docstring_hack (line 5) | def docstring_hack():

FILE: KoSentenceT5/apex/parallel/optimized_sync_batchnorm.py
  class SyncBatchNorm (line 9) | class SyncBatchNorm(_BatchNorm):
    method __init__ (line 58) | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, ...
    method _specify_process_group (line 64) | def _specify_process_group(self, process_group):
    method _specify_channel_last (line 67) | def _specify_channel_last(self, channel_last):
    method forward (line 70) | def forward(self, input, z = None):

FILE: KoSentenceT5/apex/parallel/optimized_sync_batchnorm_kernel.py
  class SyncBatchnormFunction (line 7) | class SyncBatchnormFunction(Function):
    method forward (line 10) | def forward(ctx, input, z, weight, bias, running_mean, running_varianc...
    method backward (line 75) | def backward(ctx, grad_output):

FILE: KoSentenceT5/apex/parallel/sync_batchnorm.py
  class SyncBatchNorm (line 9) | class SyncBatchNorm(_BatchNorm):
    method __init__ (line 51) | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, ...
    method _specify_process_group (line 65) | def _specify_process_group(self, process_group):
    method forward (line 68) | def forward(self, input):

FILE: KoSentenceT5/apex/parallel/sync_batchnorm_kernel.py
  class SyncBatchnormFunction (line 7) | class SyncBatchnormFunction(Function):
    method forward (line 10) | def forward(ctx, input, weight, bias, running_mean, running_variance, ...
    method backward (line 33) | def backward(ctx, grad_output):

FILE: KoSentenceT5/apex/pyprof/examples/custom_func_module/custom_function.py
  class Foo (line 9) | class Foo(torch.autograd.Function):
    method forward (line 11) | def forward(ctx, in1, in2):
    method backward (line 16) | def backward(ctx, grad):

FILE: KoSentenceT5/apex/pyprof/examples/custom_func_module/custom_module.py
  class Foo (line 8) | class Foo(torch.nn.Module):
    method __init__ (line 9) | def __init__(self, size):
    method forward (line 14) | def forward(self, input):

FILE: KoSentenceT5/apex/pyprof/examples/imagenet/imagenet.py
  function parseArgs (line 17) | def parseArgs():
  function main (line 89) | def main():

FILE: KoSentenceT5/apex/pyprof/examples/jit/jit_script_function.py
  function foo (line 11) | def foo(x, y):

FILE: KoSentenceT5/apex/pyprof/examples/jit/jit_script_method.py
  class Foo (line 7) | class Foo(torch.jit.ScriptModule):
    method __init__ (line 8) | def __init__(self, size):
    method forward (line 14) | def forward(self, input):

FILE: KoSentenceT5/apex/pyprof/examples/jit/jit_trace_function.py
  function foo (line 7) | def foo(x, y):

FILE: KoSentenceT5/apex/pyprof/examples/jit/jit_trace_method.py
  class Foo (line 7) | class Foo(torch.nn.Module):
    method __init__ (line 8) | def __init__(self, size):
    method forward (line 13) | def forward(self, input):

FILE: KoSentenceT5/apex/pyprof/examples/lenet.py
  class LeNet5 (line 12) | class LeNet5(nn.Module):
    method __init__ (line 13) | def __init__(self):
    method forward (line 24) | def forward(self, x):
    method num_flat_features (line 35) | def num_flat_features(self, x):

FILE: KoSentenceT5/apex/pyprof/examples/user_annotation/resnet.py
  function conv3x3 (line 15) | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
  function conv1x1 (line 20) | def conv1x1(in_planes, out_planes, stride=1):
  class Bottleneck (line 24) | class Bottleneck(nn.Module):
    method __init__ (line 28) | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
    method forward (line 48) | def forward(self, x):
  class ResNet (line 102) | class ResNet(nn.Module):
    method __init__ (line 104) | def __init__(self, block, layers, num_classes=1000,
    method _make_layer (line 134) | def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
    method forward (line 158) | def forward(self, x):
  function resnet50 (line 193) | def resnet50():

FILE: KoSentenceT5/apex/pyprof/nvtx/nvmarker.py
  function isfunc (line 27) | def isfunc(mod, f):
  function traceMarker (line 46) | def traceMarker(stack):
  function modMarker (line 56) | def modMarker(mod, fn_name, args):
  function add_wrapper (line 67) | def add_wrapper(mod, fn_name):
  function argMarker (line 110) | def argMarker(mod, op, args, kwargs):
  function patchClass (line 201) | def patchClass(cls):
  function init (line 206) | def init():

FILE: KoSentenceT5/apex/pyprof/parse/db.py
  class DB (line 3) | class DB(object):
    method __init__ (line 9) | def __init__(self, dbFile):
    method select (line 21) | def select(self, cmd):
    method insert (line 36) | def insert(self, cmd, data):
    method execute (line 46) | def execute(self, cmd):
    method commit (line 56) | def commit(self):
    method close (line 59) | def close(self):

FILE: KoSentenceT5/apex/pyprof/parse/kernel.py
  function demangle (line 5) | def demangle(name):
  function encode_object_id (line 11) | def encode_object_id(pid, tid):
  function getShortName (line 20) | def getShortName(name):
  class Kernel (line 33) | class Kernel(object):
    method __init__ (line 41) | def __init__(self):
    method setKernelInfo (line 77) | def setKernelInfo(self, info):
    method setKernelName (line 93) | def setKernelName(self, name):
    method setRunTimeInfo (line 98) | def setRunTimeInfo(self, info):
    method setMarkerInfo (line 107) | def setMarkerInfo(self, info):
    method setDirection (line 111) | def setDirection(self):
    method setOp (line 123) | def setOp(self):
    method print (line 180) | def print(self):

FILE: KoSentenceT5/apex/pyprof/parse/nvvp.py
  class NVVP (line 3) | class NVVP(object):
    method __init__ (line 14) | def __init__(self, db):
    method getProfileStart (line 18) | def getProfileStart(self):
    method getString (line 36) | def getString(self, id_):
    method createMarkerTable (line 45) | def createMarkerTable(self):
    method getCPUInfo (line 65) | def getCPUInfo(self, corrId):
    method getKernelInfo (line 91) | def getKernelInfo(self):
    method getMarkerInfo (line 99) | def getMarkerInfo(self, objId, startTime, endTime):

FILE: KoSentenceT5/apex/pyprof/parse/parse.py
  function parseArgs (line 15) | def parseArgs():
  function main (line 25) | def main():

FILE: KoSentenceT5/apex/pyprof/prof/activation.py
  class Activation (line 5) | class Activation(OperatorLayerBase):
    method __init__ (line 12) | def __init__(self, d):
    method params (line 35) | def params(self):
    method flops (line 39) | def flops(self):
    method bytes (line 48) | def bytes(self):
    method tc (line 58) | def tc(self):
    method op (line 61) | def op(self):
    method mod (line 64) | def mod(self):

FILE: KoSentenceT5/apex/pyprof/prof/base.py
  class OperatorLayerBase (line 3) | class OperatorLayerBase(ABC):
    method tc (line 10) | def tc(self):
    method params (line 18) | def params(self):
    method flops (line 25) | def flops(self):
    method bytes (line 32) | def bytes(self):
    method mod (line 36) | def mod(self):
    method op (line 43) | def op(self):

FILE: KoSentenceT5/apex/pyprof/prof/blas.py
  class Addmm (line 8) | class Addmm(OperatorLayerBase):
    method __init__ (line 10) | def __init__(self, d):
    method tc (line 63) | def tc(self):
    method bytes (line 69) | def bytes(self):
    method flops (line 73) | def flops(self):
    method op (line 76) | def op(self):
    method mod (line 79) | def mod(self):
    method params (line 82) | def params(self):
  class Bmm (line 86) | class Bmm(OperatorLayerBase):
    method __init__ (line 88) | def __init__(self, d):
    method tc (line 123) | def tc(self):
    method params (line 129) | def params(self):
    method flops (line 134) | def flops(self):
    method bytes (line 137) | def bytes(self):
    method op (line 141) | def op(self):
    method mod (line 144) | def mod(self):
  class Matmul (line 147) | class Matmul(OperatorLayerBase):
    method __init__ (line 152) | def __init__(self, d):
    method params (line 252) | def params(self):
    method tc (line 255) | def tc(self):
    method bytes (line 264) | def bytes(self):
    method flops (line 272) | def flops(self):
    method op (line 279) | def op(self):
    method mod (line 282) | def mod(self):
  class Mm (line 285) | class Mm(OperatorLayerBase):
    method __init__ (line 287) | def __init__(self, d):
    method params (line 319) | def params(self):
    method tc (line 323) | def tc(self):
    method bytes (line 329) | def bytes(self):
    method flops (line 333) | def flops(self):
    method op (line 336) | def op(self):
    method mod (line 339) | def mod(self):

FILE: KoSentenceT5/apex/pyprof/prof/conv.py
  class Conv (line 5) | class Conv(OperatorLayerBase):
    method __init__ (line 26) | def __init__(self, d):
    method params (line 180) | def params(self):
    method conv_bytes_flops (line 184) | def conv_bytes_flops(self, N, C, H, W, K, P, Q, R, S, g, t):
    method bytes_flops (line 190) | def bytes_flops(self):
    method bytes (line 218) | def bytes(self):
    method flops (line 222) | def flops(self):
    method tc (line 226) | def tc(self):
    method op (line 232) | def op(self):
    method mod (line 235) | def mod(self):

FILE: KoSentenceT5/apex/pyprof/prof/convert.py
  class Convert (line 5) | class Convert(OperatorLayerBase):
    method __init__ (line 11) | def __init__(self, d):
    method params (line 41) | def params(self):
    method op (line 45) | def op(self):
    method mod (line 48) | def mod(self):
    method tc (line 51) | def tc(self):
    method elems (line 54) | def elems(self):
    method flops (line 57) | def flops(self):
    method bytes (line 60) | def bytes(self):

FILE: KoSentenceT5/apex/pyprof/prof/data.py
  class Data (line 3) | class Data(object):
    method __init__ (line 7) | def __init__(self, kernel):
    method setParams (line 41) | def setParams(self, params):

FILE: KoSentenceT5/apex/pyprof/prof/dropout.py
  class Dropout (line 5) | class Dropout(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method params (line 28) | def params(self):
    method op (line 32) | def op(self):
    method mod (line 35) | def mod(self):
    method tc (line 38) | def tc(self):
    method elems (line 41) | def elems(self):
    method bytes (line 44) | def bytes(self):
    method flops (line 48) | def flops(self):

FILE: KoSentenceT5/apex/pyprof/prof/embedding.py
  class Embedding (line 5) | class Embedding(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method params (line 33) | def params(self):
    method op (line 37) | def op(self):
    method mod (line 40) | def mod(self):
    method tc (line 43) | def tc(self):
    method bytes (line 46) | def bytes(self):
    method flops (line 69) | def flops(self):

FILE: KoSentenceT5/apex/pyprof/prof/index_slice_join_mutate.py
  class Cat (line 6) | class Cat(OperatorLayerBase):
    method __init__ (line 8) | def __init__(self, d):
    method params (line 34) | def params(self):
    method flops (line 38) | def flops(self):
    method tc (line 41) | def tc(self):
    method op (line 44) | def op(self):
    method mod (line 47) | def mod(self):
    method bytes (line 50) | def bytes(self):
  class Reshape (line 56) | class Reshape(OperatorLayerBase):
    method __init__ (line 58) | def __init__(self, d):
    method params (line 82) | def params(self):
    method flops (line 86) | def flops(self):
    method tc (line 89) | def tc(self):
    method op (line 92) | def op(self):
    method mod (line 95) | def mod(self):
    method bytes (line 98) | def bytes(self):
  class Gather (line 101) | class Gather(OperatorLayerBase):
    method __init__ (line 103) | def __init__(self, d):
    method params (line 132) | def params(self):
    method flops (line 136) | def flops(self):
    method tc (line 139) | def tc(self):
    method op (line 142) | def op(self):
    method mod (line 145) | def mod(self):
    method bytes (line 148) | def bytes(self):
  class MaskedScatter (line 151) | class MaskedScatter(OperatorLayerBase):
    method __init__ (line 153) | def __init__(self, d):
    method params (line 178) | def params(self):
    method flops (line 182) | def flops(self):
    method tc (line 185) | def tc(self):
    method op (line 188) | def op(self):
    method mod (line 191) | def mod(self):
    method bytes (line 194) | def bytes(self):
  class Nonzero (line 207) | class Nonzero(OperatorLayerBase):
    method __init__ (line 209) | def __init__(self, d):
    method params (line 229) | def params(self):
    method flops (line 233) | def flops(self):
    method tc (line 236) | def tc(self):
    method op (line 239) | def op(self):
    method mod (line 242) | def mod(self):
    method bytes (line 245) | def bytes(self):
  class IndexSelect (line 260) | class IndexSelect(OperatorLayerBase):
    method __init__ (line 262) | def __init__(self, d):
    method params (line 311) | def params(self):
    method tc (line 315) | def tc(self):
    method op (line 318) | def op(self):
    method mod (line 321) | def mod(self):
    method flops (line 324) | def flops(self):
    method bytes (line 327) | def bytes(self):
  class MaskedSelect (line 343) | class MaskedSelect(OperatorLayerBase):
    method __init__ (line 345) | def __init__(self, d):
    method params (line 393) | def params(self):
    method tc (line 397) | def tc(self):
    method op (line 400) | def op(self):
    method mod (line 403) | def mod(self):
    method bytes (line 406) | def bytes(self):
    method flops (line 418) | def flops(self):

FILE: KoSentenceT5/apex/pyprof/prof/linear.py
  class Linear (line 5) | class Linear(OperatorLayerBase):
    method setXWBMNK (line 17) | def setXWBMNK(self, args):
    method tc (line 63) | def tc(self):
    method __init__ (line 69) | def __init__(self, d):
    method params (line 118) | def params(self):
    method op (line 145) | def op(self):
    method bytesFlops (line 148) | def bytesFlops(self):
    method bytes (line 179) | def bytes(self):
    method flops (line 183) | def flops(self):
    method mod (line 187) | def mod(self):

FILE: KoSentenceT5/apex/pyprof/prof/loss.py
  class MSELoss (line 7) | class MSELoss(OperatorLayerBase):
    method __init__ (line 9) | def __init__(self, d):
    method params (line 51) | def params(self):
    method elems (line 55) | def elems(self):
    method bytes (line 71) | def bytes(self):
    method flops (line 74) | def flops(self):
    method tc (line 77) | def tc(self):
    method op (line 80) | def op(self):
    method mod (line 83) | def mod(self):

FILE: KoSentenceT5/apex/pyprof/prof/misc.py
  class Foo (line 5) | class Foo(OperatorLayerBase):
    method __init__ (line 9) | def __init__(self, d):
    method params (line 31) | def params(self):
    method tc (line 35) | def tc(self):
    method op (line 38) | def op(self):
    method mod (line 41) | def mod(self):
    method flops (line 44) | def flops(self):
    method bytes (line 47) | def bytes(self):
  class Copy (line 50) | class Copy(OperatorLayerBase):
    method __init__ (line 52) | def __init__(self, d):
    method params (line 75) | def params(self):
    method tc (line 80) | def tc(self):
    method op (line 83) | def op(self):
    method mod (line 86) | def mod(self):
    method flops (line 89) | def flops(self):
    method elems (line 92) | def elems(self):
    method bytes (line 95) | def bytes(self):
  class Clone (line 98) | class Clone(OperatorLayerBase):
    method __init__ (line 100) | def __init__(self, d):
    method params (line 118) | def params(self):
    method flops (line 122) | def flops(self):
    method tc (line 125) | def tc(self):
    method op (line 128) | def op(self):
    method mod (line 131) | def mod(self):
    method elems (line 134) | def elems(self):
    method bytes (line 137) | def bytes(self):
  class Contiguous (line 140) | class Contiguous(OperatorLayerBase):
    method __init__ (line 142) | def __init__(self, d):
    method params (line 160) | def params(self):
    method flops (line 164) | def flops(self):
    method bytes (line 167) | def bytes(self):
    method tc (line 170) | def tc(self):
    method op (line 173) | def op(self):
    method mod (line 176) | def mod(self):
  class Any (line 179) | class Any(OperatorLayerBase):
    method __init__ (line 181) | def __init__(self, d):
    method params (line 202) | def params(self):
    method op (line 206) | def op(self):
    method mod (line 209) | def mod(self):
    method tc (line 212) | def tc(self):
    method flops (line 215) | def flops(self):
    method bytes (line 218) | def bytes(self):

FILE: KoSentenceT5/apex/pyprof/prof/normalization.py
  class BatchNorm (line 5) | class BatchNorm(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method params (line 27) | def params(self):
    method tc (line 31) | def tc(self):
    method op (line 34) | def op(self):
    method mod (line 37) | def mod(self):
    method elems (line 40) | def elems(self):
    method flops (line 43) | def flops(self):
    method bytes (line 47) | def bytes(self):

FILE: KoSentenceT5/apex/pyprof/prof/optim.py
  class Adam (line 7) | class Adam(OperatorLayerBase):
    method __init__ (line 9) | def __init__(self, d):
    method params (line 31) | def params(self):
    method flops (line 35) | def flops(self):
    method bytes (line 38) | def bytes(self):
    method tc (line 58) | def tc(self):
    method op (line 61) | def op(self):
    method mod (line 64) | def mod(self):

FILE: KoSentenceT5/apex/pyprof/prof/output.py
  class Output (line 3) | class Output():
    method __init__ (line 33) | def __init__(self, args):
    method foo (line 77) | def foo(self, cadena, pformat):
    method header (line 99) | def header(self):
    method data (line 107) | def data(self, a):

FILE: KoSentenceT5/apex/pyprof/prof/pointwise.py
  class Pointwise (line 6) | class Pointwise(OperatorLayerBase):
    method foo (line 26) | def foo(d):
    method __init__ (line 29) | def __init__(self, d):
    method params (line 84) | def params(self):
    method tc (line 88) | def tc(self):
    method op (line 91) | def op(self):
    method mod (line 94) | def mod(self):
    method elems (line 97) | def elems(self):
    method bytes (line 138) | def bytes(self):
    method flops (line 141) | def flops(self):

FILE: KoSentenceT5/apex/pyprof/prof/pooling.py
  class MaxPool2d (line 7) | class MaxPool2d(object):
    method parse (line 9) | def parse(marker):

FILE: KoSentenceT5/apex/pyprof/prof/prof.py
  function findFpropKernel (line 39) | def findFpropKernel(seq):
  function foo (line 56) | def foo(mod, op, d):
  function main (line 171) | def main():

FILE: KoSentenceT5/apex/pyprof/prof/randomSample.py
  class RandPerm (line 5) | class RandPerm(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method params (line 25) | def params(self):
    method tc (line 29) | def tc(self):
    method op (line 32) | def op(self):
    method mod (line 35) | def mod(self):
    method bytes (line 38) | def bytes(self):
    method flops (line 41) | def flops(self):

FILE: KoSentenceT5/apex/pyprof/prof/recurrentCell.py
  function hasTileSize (line 5) | def hasTileSize(name):
  function ctaTile (line 11) | def ctaTile(name):
  class RNNCell (line 21) | class RNNCell(OperatorLayerBase):
    method __init__ (line 26) | def __init__(self, d):
    method params (line 73) | def params(self):
    method tc (line 83) | def tc(self):
    method op (line 89) | def op(self):
    method mod (line 92) | def mod(self):
    method bytes (line 95) | def bytes(self):
    method flops (line 105) | def flops(self):
    method bar (line 115) | def bar(self):

FILE: KoSentenceT5/apex/pyprof/prof/reduction.py
  class Mean (line 5) | class Mean(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method params (line 32) | def params(self):
    method tc (line 36) | def tc(self):
    method op (line 39) | def op(self):
    method mod (line 42) | def mod(self):
    method elems (line 45) | def elems(self):
    method bytes (line 48) | def bytes(self):
    method flops (line 54) | def flops(self):
  class Sum (line 60) | class Sum(OperatorLayerBase):
    method __init__ (line 62) | def __init__(self, d):
    method params (line 86) | def params(self):
    method tc (line 90) | def tc(self):
    method op (line 93) | def op(self):
    method mod (line 96) | def mod(self):
    method elems (line 99) | def elems(self):
    method flops (line 102) | def flops(self):
    method bytes (line 106) | def bytes(self):
  class Norm (line 109) | class Norm(OperatorLayerBase):
    method __init__ (line 111) | def __init__(self, d):
    method params (line 129) | def params(self):
    method elems (line 133) | def elems(self):
    method bytes (line 136) | def bytes(self):
    method flops (line 139) | def flops(self):
    method tc (line 143) | def tc(self):
    method op (line 146) | def op(self):
    method mod (line 149) | def mod(self):

FILE: KoSentenceT5/apex/pyprof/prof/softmax.py
  class Softmax (line 5) | class Softmax(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method op (line 31) | def op(self):
    method mod (line 34) | def mod(self):
    method tc (line 37) | def tc(self):
    method params (line 40) | def params(self):
    method elems (line 44) | def elems(self):
    method flops (line 47) | def flops(self):
    method bytes (line 52) | def bytes(self):
  class LogSoftmax (line 57) | class LogSoftmax(OperatorLayerBase):
    method __init__ (line 59) | def __init__(self, d):
    method op (line 91) | def op(self):
    method mod (line 94) | def mod(self):
    method tc (line 97) | def tc(self):
    method params (line 100) | def params(self):
    method elems (line 104) | def elems(self):
    method flops (line 107) | def flops(self):
    method bytes (line 112) | def bytes(self):

FILE: KoSentenceT5/apex/pyprof/prof/usage.py
  function parseArgs (line 4) | def parseArgs():

FILE: KoSentenceT5/apex/pyprof/prof/utility.py
  class Utility (line 3) | class Utility(object):
    method numElems (line 6) | def numElems(shape):
    method typeToBytes (line 11) | def typeToBytes(t):
    method typeToString (line 23) | def typeToString(t):
    method hasNVTX (line 45) | def hasNVTX(marker):
    method isscalar (line 59) | def isscalar(t):

FILE: KoSentenceT5/apex/reparameterization/__init__.py
  function apply_weight_norm (line 4) | def apply_weight_norm(module, name='', dim=0, hook_child=True):
  function remove_weight_norm (line 50) | def remove_weight_norm(module, name='', remove_all=False):
  function apply_reparameterization (line 64) | def apply_reparameterization(module, reparameterization=None, name='', d...
  function remove_reparameterization (line 96) | def remove_reparameterization(module, reparameterization=Reparameterizat...

FILE: KoSentenceT5/apex/reparameterization/reparameterization.py
  class Reparameterization (line 4) | class Reparameterization(object):
    method __init__ (line 19) | def __init__(self, name, dim, module, retain_forward=True):
    method compute_weight (line 28) | def compute_weight(self, module=None, name=None):
    method reparameterize (line 40) | def reparameterize(self, name, weight, dim):
    method apply (line 57) | def apply(module, name, dim, reparameterization=None, hook_child=True):
    method get_module_and_name (line 105) | def get_module_and_name(module, name):
    method get_params (line 123) | def get_params(self, module):
    method remove (line 127) | def remove(self, module):
    method __call__ (line 139) | def __call__(self, module, inputs):
    method backward_hook (line 147) | def backward_hook(self, module, grad_input, grad_output):

FILE: KoSentenceT5/apex/reparameterization/weight_norm.py
  function _norm (line 8) | def _norm(p, dim):
  class WeightNorm (line 22) | class WeightNorm(Reparameterization):
    method compute_weight (line 39) | def compute_weight(self, module=None, name=None):
    method reparameterize (line 62) | def reparameterize(self, name, weight, dim):

FILE: KoSentenceT5/data/dataloader.py
  class ModelDataLoader (line 10) | class ModelDataLoader(Dataset):
    method __init__ (line 11) | def __init__(self, file_path, args, metric, tokenizer, type_):
    method load_data (line 57) | def load_data(self, type):
    method data2tensor (line 70) | def data2tensor(self, line, type):
    method __getitem__ (line 133) | def __getitem__(self, index):
    method __len__ (line 175) | def __len__(self):
  function get_loader (line 183) | def get_loader(args, metric):

FILE: KoSentenceT5/main.py
  function main (line 5) | def main(args, logger) -> None:

FILE: KoSentenceT5/model/loss.py
  class Loss (line 12) | class Loss():
    method __init__ (line 14) | def __init__(self, args):
    method train_loss_fct (line 19) | def train_loss_fct(self, config, inputs, a, p, n):
    method evaluation_during_training (line 31) | def evaluation_during_training(self, embeddings1, embeddings2, labels,...

FILE: KoSentenceT5/model/setting.py
  class Arguments (line 8) | class Arguments():
    method __init__ (line 10) | def __init__(self):
    method add_type_of_processing (line 13) | def add_type_of_processing(self):
    method add_hyper_parameters (line 21) | def add_hyper_parameters(self):
    method add_data_parameters (line 35) | def add_data_parameters(self):
    method print_args (line 45) | def print_args(self, args):
    method add_argument (line 51) | def add_argument(self, *args, **kw_args):
    method parse (line 54) | def parse(self):
  class Setting (line 61) | class Setting():
    method set_logger (line 63) | def set_logger(self):
    method set_seed (line 77) | def set_seed(self, args):
    method run (line 91) | def run(self):

FILE: KoSentenceT5/model/simcse/kost5.py
  class KoSentenceT5 (line 5) | class KoSentenceT5(nn.Module):
    method __init__ (line 6) | def __init__(self, model):
    method forward (line 10) | def forward(self, config, inputs, mode):
    method encode (line 44) | def encode(self, inputs, device):

FILE: KoSentenceT5/model/simcse/processor.py
  class Processor (line 19) | class Processor():
    method __init__ (line 21) | def __init__(self, args):
    method run (line 33) | def run(self, inputs, indicator=None, type=None):
    method progress (line 52) | def progress(self, loss):
    method progress_validation (line 56) | def progress_validation(self, score):
    method return_value (line 60) | def return_value(self):
    method get_object (line 66) | def get_object(self, tokenizer, model):
    method get_scheduler (line 81) | def get_scheduler(self, optim, train_loader):
    method model_setting (line 89) | def model_setting(self):
    method train (line 125) | def train(self, epoch):
    method valid (line 155) | def valid(self):
    method test (line 179) | def test(self):

FILE: KoSentenceT5/model/utils.py
  class Metric (line 10) | class Metric():
    method __init__ (line 12) | def __init__(self, args):
    method get_lr (line 15) | def get_lr(self, optimizer):
    method count_parameters (line 18) | def count_parameters(self, model):
    method cal_acc (line 21) | def cal_acc(self, yhat, y):
    method cal_time (line 28) | def cal_time(self, start_time, end_time):
    method cal_dev_score (line 35) | def cal_dev_score(self, score, indicator):
    method update_indicator (line 51) | def update_indicator(self, indicator, score):
    method draw_graph (line 70) | def draw_graph(self, cp):
    method performance_check (line 74) | def performance_check(self, cp, config):
    method print_size_of_model (line 80) | def print_size_of_model(self, model):
    method move2device (line 85) | def move2device(self, sample, device):
    method save_model (line 106) | def save_model(self, config, cp, pco):
  function pytorch_cos_sim (line 131) | def pytorch_cos_sim(a, b):

FILE: KoSimCSE/SemanticSearch.py
  function main (line 6) | def main():

FILE: KoSimCSE/apex/RNN/RNNBackend.py
  function is_iterable (line 10) | def is_iterable(maybe_iterable):
  function flatten_list (line 14) | def flatten_list(tens_list):
  class bidirectionalRNN (line 25) | class bidirectionalRNN(nn.Module):
    method __init__ (line 29) | def __init__(self, inputRNN, num_layers=1, dropout = 0):
    method forward (line 37) | def forward(self, input, collect_hidden=False):
    method reset_parameters (line 52) | def reset_parameters(self):
    method init_hidden (line 59) | def init_hidden(self, bsz):
    method detach_hidden (line 66) | def detach_hidden(self):
    method reset_hidden (line 73) | def reset_hidden(self, bsz):
    method init_inference (line 80) | def init_inference(self, bsz):
  class stackedRNN (line 90) | class stackedRNN(nn.Module):
    method __init__ (line 94) | def __init__(self, inputRNN, num_layers=1, dropout=0):
    method forward (line 122) | def forward(self, input, collect_hidden=False, reverse=False):
    method reset_parameters (line 197) | def reset_parameters(self):
    method init_hidden (line 204) | def init_hidden(self, bsz):
    method detach_hidden (line 211) | def detach_hidden(self):
    method reset_hidden (line 218) | def reset_hidden(self, bsz):
    method init_inference (line 225) | def init_inference(self, bsz):
  class RNNCell (line 232) | class RNNCell(nn.Module):
    method __init__ (line 242) | def __init__(self, gate_multiplier, input_size, hidden_size, cell, n_h...
    method new_like (line 274) | def new_like(self, new_input_size=None):
    method reset_parameters (line 291) | def reset_parameters(self, gain=1):
    method init_hidden (line 309) | def init_hidden(self, bsz):
    method reset_hidden (line 330) | def reset_hidden(self, bsz):
    method detach_hidden (line 338) | def detach_hidden(self):
    method forward (line 348) | def forward(self, input):

FILE: KoSimCSE/apex/RNN/cells.py
  class mLSTMRNNCell (line 12) | class mLSTMRNNCell(RNNCell):
    method __init__ (line 17) | def __init__(self, input_size, hidden_size, bias = False, output_size ...
    method forward (line 26) | def forward(self, input):
    method new_like (line 45) | def new_like(self, new_input_size=None):
  function mLSTMCell (line 55) | def mLSTMCell(input, hidden, w_ih, w_hh, w_mih, w_mhh, b_ih=None, b_hh=N...

FILE: KoSimCSE/apex/RNN/models.py
  function toRNNBackend (line 8) | def toRNNBackend(inputRNN, num_layers, bidirectional=False, dropout = 0):
  function LSTM (line 19) | def LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=Fal...
  function GRU (line 26) | def GRU(input_size, hidden_size, num_layers, bias=True, batch_first=Fals...
  function ReLU (line 33) | def ReLU(input_size, hidden_size, num_layers, bias=True, batch_first=Fal...
  function Tanh (line 40) | def Tanh(input_size, hidden_size, num_layers, bias=True, batch_first=Fal...
  function mLSTM (line 47) | def mLSTM(input_size, hidden_size, num_layers, bias=True, batch_first=Fa...

FILE: KoSimCSE/apex/amp/_amp_state.py
  class AmpState (line 18) | class AmpState(object):
    method __init__ (line 19) | def __init__(self):
  function warn_or_err (line 29) | def warn_or_err(msg):
  function maybe_print (line 39) | def maybe_print(msg, rank0=False):
  function master_params (line 60) | def master_params(optimizer):

FILE: KoSimCSE/apex/amp/_initialize.py
  function to_type (line 21) | def to_type(dtype, t):
  function applier (line 39) | def applier(value, fn):
  function check_models (line 64) | def check_models(models):
  function check_params_fp32 (line 79) | def check_params_fp32(models):
  function check_optimizers (line 119) | def check_optimizers(optimizers):
  class O2StateDictHook (line 133) | class O2StateDictHook(object):
    method __init__ (line 134) | def __init__(self, fn):
    method __call__ (line 137) | def __call__(self, module, state_dict, prefix, local_metadata):
  function _initialize (line 145) | def _initialize(models, optimizers, properties, num_losses=1, cast_model...

FILE: KoSimCSE/apex/amp/_process_optimizer.py
  class AmpOptimizerState (line 9) | class AmpOptimizerState(object):
    method __init__ (line 10) | def __init__(self):
  function _master_params_to_model_params (line 14) | def _master_params_to_model_params(self):
  function lazy_init_with_master_weights (line 28) | def lazy_init_with_master_weights(self):
  function post_backward_models_are_masters (line 93) | def post_backward_models_are_masters(scaler, params, stashed_grads, scal...
  function prepare_backward_with_master_weights (line 142) | def prepare_backward_with_master_weights(self):
  function post_backward_with_master_weights (line 161) | def post_backward_with_master_weights(self, scaler):
  function lazy_init_no_master_weights (line 205) | def lazy_init_no_master_weights(self):
  function prepare_backward_no_master_weights (line 224) | def prepare_backward_no_master_weights(self):
  function post_backward_no_master_weights (line 240) | def post_backward_no_master_weights(self, scaler):
  function prepare_backward_with_master_weights_FusedSGD (line 258) | def prepare_backward_with_master_weights_FusedSGD(self):
  function post_backward_with_master_weights_FusedSGD (line 277) | def post_backward_with_master_weights_FusedSGD(self, scaler):
  function prepare_backward_no_master_weights_FusedSGD (line 305) | def prepare_backward_no_master_weights_FusedSGD(self):
  function post_backward_no_master_weights_FusedSGD (line 309) | def post_backward_no_master_weights_FusedSGD(self, scaler):
  function _amp_lazy_init (line 313) | def _amp_lazy_init(self):
  function _process_optimizer (line 321) | def _process_optimizer(optimizer, properties):

FILE: KoSimCSE/apex/amp/amp.py
  function _decorator_helper (line 18) | def _decorator_helper(orig_fn, cast_fn, wrap_fn):
  function half_function (line 30) | def half_function(fn):
  function float_function (line 35) | def float_function(fn):
  function promote_function (line 40) | def promote_function(fn):
  function register_half_function (line 46) | def register_half_function(module, name):
  function register_float_function (line 53) | def register_float_function(module, name):
  function register_promote_function (line 60) | def register_promote_function(module, name):
  function init (line 68) | def init(enabled=True, loss_scale="dynamic", enable_caching=True, verbos...

FILE: KoSimCSE/apex/amp/compat.py
  function variable_is_tensor (line 4) | def variable_is_tensor():
  function tensor_is_variable (line 8) | def tensor_is_variable():
  function tensor_is_float_tensor (line 13) | def tensor_is_float_tensor():
  function is_tensor_like (line 19) | def is_tensor_like(x):
  function is_floating_point (line 24) | def is_floating_point(x):
  function scalar_python_val (line 35) | def scalar_python_val(x):
  function filter_attrs (line 45) | def filter_attrs(module, attrs):

FILE: KoSimCSE/apex/amp/frontend.py
  class Properties (line 7) | class Properties(object):
    method __init__ (line 13) | def __init__(self):
    method _update_options_dict (line 33) | def _update_options_dict(self, new_options):
    method __getattr__ (line 43) | def __getattr__(self, name):
    method __setattr__ (line 51) | def __setattr__(self, name, value):
  class O3 (line 102) | class O3:
    method __call__ (line 111) | def __call__(self, properties):
  class O2 (line 124) | class O2:
    method __call__ (line 134) | def __call__(self, properties):
  class O1 (line 147) | class O1:
    method __call__ (line 156) | def __call__(self, properties):
  class O0 (line 169) | class O0:
    method __call__ (line 175) | def __call__(self, properties):
  function initialize (line 195) | def initialize(
  function state_dict (line 361) | def state_dict(destination=None):
  function load_state_dict (line 373) | def load_state_dict(state_dict):

FILE: KoSimCSE/apex/amp/handle.py
  function scale_loss (line 17) | def scale_loss(loss,
  function disable_casts (line 164) | def disable_casts():
  class AmpHandle (line 170) | class AmpHandle(object):
    method __init__ (line 171) | def __init__(self, loss_scale="dynamic", enable_caching=True, verbose=...
    method is_active (line 179) | def is_active(self):
    method _disable_casts (line 183) | def _disable_casts(self):
    method wrap_optimizer (line 188) | def wrap_optimizer(self, optimizer, num_loss=1):
    method scale_loss (line 193) | def scale_loss(self, loss, optimizer):
    method _clear_cache (line 226) | def _clear_cache(self):
    method _save_func (line 230) | def _save_func(self, mod, fn, func):
    method _deactivate (line 233) | def _deactivate(self):
    method has_cache (line 239) | def has_cache(self):
    method cache (line 243) | def cache(self):
    method remove_cache (line 246) | def remove_cache(self, param):
    method verbose (line 251) | def verbose(self):
  class NoOpHandle (line 254) | class NoOpHandle(object):
    method is_active (line 255) | def is_active(self):
    method _disable_casts (line 259) | def _disable_casts(self):
    method wrap_optimizer (line 262) | def wrap_optimizer(self, optimizer, num_loss=1):
    method scale_loss (line 266) | def scale_loss(self, loss, optimizer):
    method has_cache (line 270) | def has_cache(self):
    method verbose (line 274) | def verbose(self):
    method _clear_cache (line 277) | def _clear_cache(self):
    method _deactivate (line 280) | def _deactivate(self):

FILE: KoSimCSE/apex/amp/opt.py
  class OptimWrapper (line 9) | class OptimWrapper(object):
    method __init__ (line 10) | def __init__(self, optimizer, amp_handle, num_loss):
    method scale_loss (line 19) | def scale_loss(self, loss):
    method _cur_loss_scaler (line 55) | def _cur_loss_scaler(self):
    method step (line 59) | def step(self, closure=None):
    method __getattr__ (line 80) | def __getattr__(self, attr):
    method __getstate__ (line 84) | def __getstate__(self):
    method __setstate__ (line 87) | def __setstate__(self):
    method __repr__ (line 90) | def __repr__(self):
    method state_dict (line 93) | def state_dict(self):
    method load_state_dict (line 96) | def load_state_dict(self, state_dict):
    method zero_grad (line 99) | def zero_grad(self):
    method add_param_group (line 102) | def add_param_group(self, param_group):

FILE: KoSimCSE/apex/amp/rnn_compat.py
  function _gen_VF_wrapper (line 7) | def _gen_VF_wrapper(name):
  class VariableFunctionsShim (line 17) | class VariableFunctionsShim(object):
    method __init__ (line 18) | def __init__(self):
  function has_old_rnns (line 24) | def has_old_rnns():
  function whitelist_rnn_cells (line 31) | def whitelist_rnn_cells(handle, verbose):

FILE: KoSimCSE/apex/amp/scaler.py
  function scale_check_overflow_python (line 6) | def scale_check_overflow_python(model_grad, master_grad, scale, check_ov...
  function axpby_check_overflow_python (line 19) | def axpby_check_overflow_python(model_grad, stashed_grad, master_grad, a...
  class LossScaler (line 33) | class LossScaler(object):
    method __init__ (line 38) | def __init__(self,
    method loss_scale (line 73) | def loss_scale(self):
    method unscale_python (line 76) | def unscale_python(self, model_grads, master_grads, scale):
    method unscale (line 94) | def unscale(self, model_grads, master_grads, unused_scale, models_are_...
    method unscale_with_stashed_python (line 126) | def unscale_with_stashed_python(self,
    method unscale_with_stashed (line 152) | def unscale_with_stashed(self,
    method clear_overflow_state (line 191) | def clear_overflow_state(self):
    method update_scale (line 197) | def update_scale(self):

FILE: KoSimCSE/apex/amp/utils.py
  function is_cuda_enabled (line 8) | def is_cuda_enabled():
  function get_cuda_version (line 11) | def get_cuda_version():
  function is_fp_tensor (line 14) | def is_fp_tensor(x):
  function is_nested (line 23) | def is_nested(x):
  function should_cache (line 26) | def should_cache(x):
  function collect_fp_tensor_types (line 36) | def collect_fp_tensor_types(args, kwargs):
  function type_string (line 51) | def type_string(x):
  function maybe_half (line 54) | def maybe_half(x, name='', verbose=False):
  function maybe_float (line 65) | def maybe_float(x, name='', verbose=False):
  function casted_args (line 77) | def casted_args(cast_fn, args, kwargs):
  function cached_cast (line 90) | def cached_cast(cast_fn, x, cache):
  function verbosify (line 124) | def verbosify(cast_fn, fn_name, verbose):
  function as_inplace (line 130) | def as_inplace(fns):
  function has_func (line 134) | def has_func(mod, fn):
  function get_func (line 140) | def get_func(mod, fn):
  function set_func (line 146) | def set_func(mod, fn, new_fn):
  function set_func_save (line 152) | def set_func_save(handle, mod, fn, new_fn):
  function synthesize_flattened_rnn_weights (line 171) | def synthesize_flattened_rnn_weights(fp32_weights,
  function new_synthesize_flattened_rnn_weights (line 194) | def new_synthesize_flattened_rnn_weights(fp32_weights,

FILE: KoSimCSE/apex/amp/wrap.py
  function make_cast_wrapper (line 10) | def make_cast_wrapper(orig_fn, cast_fn, handle,
  function cached_cast (line 31) | def cached_cast(mod, fn, cast_fn, handle,
  function make_promote_wrapper (line 44) | def make_promote_wrapper(orig_fn, cast_fn, handle=None):
  function promote (line 65) | def promote(mod, fn, handle, verbose=False):
  function sequence_promote (line 71) | def sequence_promote(mod, fn, handle, verbose=False):
  function promote_match_arg0 (line 92) | def promote_match_arg0(mod, fn, handle, verbose=False):
  function err_if_any_half (line 114) | def err_if_any_half(mod, fn, handle, custom_err_msg=None):
  function err_if_arg0_half (line 132) | def err_if_arg0_half(mod, fn, handle, verbose=False):
  function rnn_cast (line 157) | def rnn_cast(backend, fn, handle, verbose=False):
  function new_rnn_cast (line 222) | def new_rnn_cast(fn, handle, verbose=False):
  function disable_casts (line 267) | def disable_casts(mod, fn, handle):

FILE: KoSimCSE/apex/contrib/bottleneck/bottleneck.py
  function kaiming_uniform_ (line 5) | def kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_rel...
  class FrozenBatchNorm2d (line 9) | class FrozenBatchNorm2d(torch.nn.Module):
    method __init__ (line 13) | def __init__(self, n):
    method get_scale_bias (line 20) | def get_scale_bias(self, nhwc=False):
    method forward (line 31) | def forward(self, x):
  function drelu_dscale1 (line 37) | def drelu_dscale1(grad_o, output, scale1):
  function drelu_dscale2 (line 44) | def drelu_dscale2(grad_o, output, scale1, scale2):
  class BottleneckFunction (line 51) | class BottleneckFunction(torch.autograd.Function):
    method forward (line 53) | def forward(ctx, nhwc, stride_1x1, scale, bias, x, *conv):
    method backward (line 75) | def backward(ctx, grad_o):
  function conv3x3 (line 102) | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
  function conv1x1 (line 107) | def conv1x1(in_planes, out_planes, stride=1):
  class Bottleneck (line 111) | class Bottleneck(torch.nn.Module):
    method __init__ (line 119) | def __init__(self, in_channels, bottleneck_channels, out_channels, str...
    method forward (line 174) | def forward(self, x):

FILE: KoSimCSE/apex/contrib/csrc/bottleneck/bottleneck.cpp
  function checkCudnnError (line 31) | int checkCudnnError(cudnnStatus_t code, const char* expr, const char* fi...
  function checkError (line 42) | void checkError(cudaError_t code, char const * func, const char *file, c...
  function generateStrides (line 55) | void generateStrides(const int64_t* dimA, int64_t* strideA, int nbDims, ...
  function getFwdConvDilatedFilterDim (line 75) | int getFwdConvDilatedFilterDim(int filterDim, int dilation) {
  function getFwdConvPaddedImageDim (line 79) | int getFwdConvPaddedImageDim(int tensorDim, int pad) {
  function getFwdConvOutputDim (line 83) | int getFwdConvOutputDim(
  function common_conv_descriptors (line 111) | common_conv_descriptors
  function common_convbias_descriptors (line 173) | common_convbias_descriptors
  function dconv_descriptors (line 294) | dconv_descriptors
  function getConvFusionString (line 377) | std::string getConvFusionString(int64_t* x_dim_padded,
  function run_conv_scale_bias_add_activation (line 469) | void
  function run_conv_scale_bias (line 630) | void
  function run_dconv_drelu_dscale (line 759) | void
  function run_dconv (line 886) | void
  function run_dconv_add (line 992) | void
  function bottleneck_forward (line 1104) | std::vector<at::Tensor> bottleneck_forward(bool explicit_nhwc, int strid...
  function bottleneck_backward (line 1287) | std::vector<at::Tensor> bottleneck_backward(bool explicit_nhwc, int stri...
  function PYBIND11_MODULE (line 1609) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/fmha/fmha_api.cpp
  function set_params (line 33) | void set_params(Fused_multihead_attention_fprop_params &params,
  function mha_fwd (line 86) | std::vector<at::Tensor>
  function mha_bwd (line 182) | std::vector<at::Tensor>
  function mha_fwd_nl (line 262) | std::vector<at::Tensor> mha_fwd_nl(const at::Tensor &qkv,         // tot...
  function mha_bwd_nl (line 342) | std::vector<at::Tensor> mha_bwd_nl(const at::Tensor &dout,        // tot...
  function PYBIND11_MODULE (line 426) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha.h
  type Qkv_params (line 46) | struct Qkv_params {
  function Qkv_params (line 59) | struct Fused_multihead_attention_fprop_params : public Qkv_params {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/gemm.h
  function namespace (line 34) | namespace fmha {
  type Fragment_accumulator (line 145) | struct Fragment_accumulator
  function add (line 152) | void add(const Other_fragment_ &other) {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/gmem_tile.h
  function namespace (line 30) | namespace fmha {
  function __device__ (line 112) | inline __device__ void store(const uint4 (&data)[LDGS]) {
  function __device__ (line 123) | inline __device__ void move() {
  function __device__ (line 201) | inline __device__ void store(const uint4 (&src)[STGS_PER_LOOP], int mi) {
  function __device__ (line 222) | inline __device__ void move() {
  function __device__ (line 273) | __device__ Gmem_tile_mma_sd(void *ptr, const Params &params, const int t...
  function __device__ (line 288) | inline __device__ void store(const Type &data, const int mi, const int n...
  function __device__ (line 300) | inline __device__ void move() {
  function Base (line 311) | struct Gmem_tile_mma_s : public Base {
  function Base (line 404) | struct Gmem_tile_dq : public Base {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/mask.h
  function namespace (line 30) | namespace fmha {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/smem_tile.h
  function namespace (line 33) | namespace fmha {
  function __device__ (line 396) | inline __device__ Smem_tile_row_a(void *smem, int tidx) : Base(smem, tid...
  function __device__ (line 462) | inline __device__ void reset_read_offset() {
  function __device__ (line 494) | inline __device__ Smem_tile_a(void *smem, int tidx) : Base(smem, tidx) {
  function __device__ (line 581) | inline __device__ Smem_tile_col_b(void *smem, int tidx) : Base(smem, tid...
  function __device__ (line 653) | inline __device__ void reset_read_offset() {
  function __device__ (line 685) | inline __device__ Smem_tile_b(void *smem, int tidx) : Base(smem, tidx) {
  function __device__ (line 748) | inline __device__ Smem_tile_row_b(void *smem, int tidx) : Base(smem, tid...
  function __device__ (line 892) | inline __device__ Smem_tile_b(void *smem, int tidx) : Base(smem, tidx) {
  function __device__ (line 912) | inline __device__ Smem_tile_v(void *smem, int tidx) : Base(smem, tidx) {
  function __device__ (line 1003) | inline __device__ Smem_tile_o(void *smem, int tidx) {
  function store (line 1057) | void store(const Accumulator (&acc)[M][N], int mi) {
  function __device__ (line 1129) | inline __device__ Smem_tile_mma(char *smem, int tidx) {
  function store (line 1147) | void store(const uint4 (&regs)[M][N]) {
  function __device__ (line 1177) | inline __device__ Smem_tile_mma_transposed(char *smem, int tidx) : Base(...
  function load (line 1189) | void load(Fragment (&frag)[M][N]) {
  function __device__ (line 1223) | inline __device__ Smem_tile_mma_epilogue(char *smem, int tidx) : Base(sm...
  function store (line 1238) | void store(const Acc (&acc)[M][N]){
  function store (line 1272) | void store(const uint4 (&regs)[M][N]) {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/softmax.h
  function namespace (line 30) | namespace fmha {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha/utils.h
  function namespace (line 38) | namespace fmha {
  function __device__ (line 247) | static inline __device__ uint32_t hadd2(uint32_t a, uint32_t b) {
  function __device__ (line 255) | static inline __device__ uint32_t hmin2(uint32_t a, uint32_t b) {
  function __device__ (line 263) | static inline __device__ uint32_t hmul2(uint32_t a, uint32_t b) {
  function __device__ (line 271) | static inline __device__ uint2 hmul4(uint2 a, uint2 b) {
  function __device__ (line 280) | static inline __device__ uint4 hmul8(uint4 a, uint4 b) {
  function __device__ (line 291) | static inline __device__ uint4 hmul8(uint32_t a, uint4 b) {
  function __device__ (line 317) | static inline __device__ uint32_t habs2(uint32_t x) {
  function __device__ (line 332) | static inline __device__ uint16_t clamp_to_zero(uint16_t x) {
  function __device__ (line 340) | static inline __device__ uint16_t float_to_half(float f) {
  function __device__ (line 348) | static inline __device__ uint32_t float2_to_half2(float a, float b) {
  function __device__ (line 362) | static inline __device__ uint32_t float_to_half2(float a) {
  function __device__ (line 368) | static inline __device__ uint32_t float2_to_half2(const float2 &f) {
  function __device__ (line 374) | static inline __device__ uint2 float4_to_half4(float x, float y, float z...
  function __device__ (line 383) | static inline __device__ uint32_t hfma2(uint32_t a, uint32_t b, uint32_t...
  function __device__ (line 391) | static inline __device__ uint32_t hfma2_relu(uint32_t a, uint32_t b, uin...
  function __device__ (line 403) | static inline __device__ uint32_t h0_h0(uint32_t x) {
  function __device__ (line 412) | static inline __device__ float h0_to_float(uint32_t h2) {
  function __device__ (line 424) | static inline __device__ uint32_t h1_h1(uint32_t x) {
  function __device__ (line 433) | static inline __device__ uint16_t hadd(uint16_t a, uint16_t b) {
  function __device__ (line 441) | static inline __device__ uint32_t hadd(uint32_t a, uint32_t b) {
  function __device__ (line 447) | static inline __device__ uint2 hadd4(uint2 a, uint2 b) {
  function __device__ (line 456) | static inline __device__ uint2 hadd(uint2 a, uint2 b) {
  function __device__ (line 462) | static inline __device__ uint4 hadd8(uint4 a, uint4 b) {
  function __device__ (line 473) | static inline __device__ uint4 fadd4(uint4 a, uint4 b) {
  function __device__ (line 484) | static inline __device__ uint4 hadd(uint4 a, uint4 b) {
  function __device__ (line 490) | static inline __device__ float half_to_float(uint16_t h) {
  function __device__ (line 498) | static inline __device__ float2 half2_to_float2(uint32_t x) {
  function __device__ (line 514) | static inline __device__ uint16_t hfma(uint16_t a, uint16_t b, uint16_t ...
  function __device__ (line 522) | static inline __device__ uint16_t hmul(uint16_t a, uint16_t b) {
  function __device__ (line 530) | static inline __device__ float sigmoid(float x) {
  function __device__ (line 685) | inline __device__ Ldg_functor(Data_type (&fetch)[N], const void* (&ptrs)...
  function __device__ (line 690) | inline __device__ void clear(int ii) {
  function __device__ (line 695) | inline __device__ void load(int ii, bool p) {
  function __device__ (line 847) | inline __device__ void stg(void *ptr, uint8_t val) {
  function __device__ (line 853) | inline __device__ void stg(void *ptr, uint16_t val) {
  function __device__ (line 859) | inline __device__ void stg(void *ptr, uint32_t val) {
  function __device__ (line 865) | inline __device__ void stg(void *ptr, uint2 val) {
  function __device__ (line 871) | inline __device__ void stg(void *ptr, uint4 val) {
  function __device__ (line 881) | inline __device__ void sts(uint32_t ptr, uint16_t val) {
  function __device__ (line 887) | inline __device__ void sts(uint32_t ptr, uint32_t val) {
  function __device__ (line 893) | inline __device__ void sts(uint32_t ptr, uint2 val) {
  function __device__ (line 903) | inline __device__ void sts(uint32_t ptr, uint4 val) {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_dgrad_kernel_1xN_reload.h
  function namespace (line 34) | namespace fmha {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_dgrad_kernel_1xN_reload_nl.h
  function namespace (line 34) | namespace fmha {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN.h
  function namespace (line 34) | namespace fmha {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN_nl.h
  function namespace (line 35) | namespace fmha {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_fprop_kernel_1xN_reload_v.h
  function namespace (line 34) | namespace fmha {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_kernel.h
  function namespace (line 39) | namespace fmha {
  function __device__ (line 90) | inline __device__ Noloop_traits(const int bidc)
  function move_all (line 96) | void move_all(Tiles & ... tiles) const {
  function __device__ (line 113) | inline __device__ int offset_loop_count(const int l) {
  function __device__ (line 157) | inline __device__ int offset_loop_count(const int l) {

FILE: KoSimCSE/apex/contrib/csrc/fmha/src/fmha_utils.h
  type Data_type (line 53) | enum Data_type { DATA_TYPE_FP16, DATA_TYPE_FP32, DATA_TYPE_INT32, DATA_T...
  function set_alpha (line 57) | static inline void set_alpha( uint32_t &alpha, float norm, Data_type dty...
  function get_size_in_bytes (line 75) | static inline size_t get_size_in_bytes( size_t n, Data_type dtype ) {

FILE: KoSimCSE/apex/contrib/csrc/groupbn/batch_norm.h
  function class (line 41) | class NhwcBatchNorm {
  function createTensorDescriptor (line 193) | void createTensorDescriptor(cudnnTensorDescriptor_t *descriptor) {
  function destroyTensorDescriptor (line 199) | void destroyTensorDescriptor(cudnnTensorDescriptor_t descriptor) {
  type StorageType (line 223) | typedef uint16_t StorageType;
  function _fwdKernelLauncher (line 258) | void _fwdKernelLauncher(cudaStream_t stream, NhwcBatchNormFwdParams params,
  function _bwdKernelLauncher (line 338) | void _bwdKernelLauncher(cudaStream_t stream, NhwcBatchNormBwdParams params,
  function smem_driven_bwd_occupancy (line 469) | static int smem_driven_bwd_occupancy(int device_id, const int max_cta_pe...
  function std (line 478) | const std::vector<size_t> NhwcBatchNorm::numWorkspaceBytes() const {
  function _setFwdParams (line 510) | void NhwcBatchNorm::_setFwdParams(NhwcBatchNormFwdParams *params) const {
  function _setFwdInferenceParams (line 534) | void NhwcBatchNorm::_setFwdInferenceParams(NhwcBatchNormFwdInferenceParams
  function _setBwdParams (line 548) | void NhwcBatchNorm::_setBwdParams(NhwcBatchNormBwdParams *params) const {
  function fwdInference (line 569) | void NhwcBatchNorm::fwdInference(cudaStream_t stream, bool use_relu) {
  function dim3 (line 612) | dim3 NhwcBatchNorm::calc_fwd_grid(int *loop, const int grid_dim_x) {
  function dim3 (line 635) | dim3 NhwcBatchNorm::calc_bwd_grid(int *loop, const int grid_dim_x) {
  function fwd (line 658) | void NhwcBatchNorm::fwd(cudaStream_t stream, bool use_relu, void* my_dat...
  function dgrad (line 697) | void NhwcBatchNorm::dgrad(cudaStream_t stream, bool use_relu, void* my_d...

FILE: KoSimCSE/apex/contrib/csrc/groupbn/batch_norm_add_relu.h
  function class (line 41) | class NhwcBatchNormAddRelu {
  function createTensorDescriptor (line 197) | void createTensorDescriptor(cudnnTensorDescriptor_t *descriptor) {
  function destroyTensorDescriptor (line 203) | void destroyTensorDescriptor(cudnnTensorDescriptor_t descriptor) {
  type StorageType (line 228) | typedef uint16_t StorageType;
  function _fwdKernelLauncher (line 262) | void _fwdKernelLauncher(cudaStream_t stream, NhwcBatchNormFwdParams params,
  function _bwdKernelLauncher (line 332) | void _bwdKernelLauncher(cudaStream_t stream, NhwcBatchNormBwdParams params,
  function smem_driven_bwd_occupancy (line 409) | static int smem_driven_bwd_occupancy(int device_id, const int max_cta_pe...
  function std (line 418) | const std::vector<size_t> NhwcBatchNormAddRelu::numWorkspaceBytes() const {
  function _setFwdParams (line 456) | void NhwcBatchNormAddRelu::_setFwdParams(NhwcBatchNormFwdParams *params)...
  function _setFwdInferenceParams (line 480) | void NhwcBatchNormAddRelu::_setFwdInferenceParams(NhwcBatchNormFwdInfere...
  function _setBwdParams (line 494) | void NhwcBatchNormAddRelu::_setBwdParams(NhwcBatchNormBwdParams *params)...
  function fwdInference (line 515) | void NhwcBatchNormAddRelu::fwdInference(cudaStream_t stream) {
  function dim3 (line 552) | dim3 NhwcBatchNormAddRelu::calc_fwd_grid(int *loop, const int grid_dim_x) {
  function dim3 (line 575) | dim3 NhwcBatchNormAddRelu::calc_bwd_grid(int *loop, const int grid_dim_x) {
  function fwd (line 598) | void NhwcBatchNormAddRelu::fwd(cudaStream_t stream, void* my_data, void*...
  function dgrad (line 640) | void NhwcBatchNormAddRelu::dgrad(cudaStream_t stream, void* my_data, voi...

FILE: KoSimCSE/apex/contrib/csrc/groupbn/cuda_utils.h
  function namespace (line 5) | namespace at {

FILE: KoSimCSE/apex/contrib/csrc/groupbn/interface.cpp
  function PYBIND11_MODULE (line 154) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/groupbn/nhwc_batch_norm_kernel.h
  type T (line 43) | typedef T Type;
  type Type (line 51) | typedef int Type;
  function DEVICE_FUNCTION (line 247) | DEVICE_FUNCTION void write_to_gmem(float *gmem, int idx, const float (&s...
  function DEVICE_FUNCTION (line 253) | DEVICE_FUNCTION void write_to_gmem(float *gmem, int idx, const float (&s...
  function DEVICE_FUNCTION (line 259) | DEVICE_FUNCTION void scaled_write_to_gmem(float *gmem, int idx, const fl...
  function DEVICE_FUNCTION (line 265) | DEVICE_FUNCTION void write_to_smem(float *smem, int idx, const float (&x...
  function DEVICE_FUNCTION (line 271) | DEVICE_FUNCTION void write_to_smem(int *smem, int idx, const int (&x)[1]) {
  function DEVICE_FUNCTION (line 277) | DEVICE_FUNCTION void write_to_smem(float *smem, int idx, const float (&x...
  function DEVICE_FUNCTION (line 283) | DEVICE_FUNCTION void write_to_smem(int *smem, int idx, const int (&x)[2]) {
  function Storage (line 351) | Storage relu(Storage in) {
  function parallel_sums (line 544) | void parallel_sums(float *smem, float (&x)[ELEMENTS_PER_LDG], int nhw) {
  type ParallelSums (line 637) | struct ParallelSums
  type ParallelSums (line 650) | struct ParallelSums
  function div_up (line 661) | static inline int div_up(int m, int n) {
  function DEVICE_FUNCTION (line 668) | DEVICE_FUNCTION void inter_block_sync(int* gmem_retired_ctas, int expect...
  type NhwcBatchNormFwdInferenceParams (line 697) | struct NhwcBatchNormFwdInferenceParams {
  type NhwcBatchNormFwdParams (line 799) | struct NhwcBatchNormFwdParams {
  type PackedStorage (line 870) | typedef PackedStorage<Storage, ELEMENTS_PER_LDG> PackedStorage_;
  type typename (line 872) | typedef typename PackedStorage_::Type PackedStorageType;
  type NhwcBatchNormBwdParams (line 1388) | struct NhwcBatchNormBwdParams {
  function nhwc_batch_norm_bwd (line 1528) | void nhwc_batch_norm_bwd(NhwcBatchNormBwdParams params) {
  function nhwc_batch_norm_bwd_relu (line 1892) | void nhwc_batch_norm_bwd_relu(NhwcBatchNormBwdParams params) {
  function nhwc_batch_norm_bwd_add_relu (line 2280) | void nhwc_batch_norm_bwd_add_relu(NhwcBatchNormBwdParams params) {

FILE: KoSimCSE/apex/contrib/csrc/layer_norm/ln_api.cpp
  function ln_fwd (line 15) | std::vector<at::Tensor> ln_fwd(const at::Tensor &x,      // BxSxhidden_size
  function ln_bwd (line 58) | std::vector<at::Tensor> ln_bwd(const at::Tensor &dw,     // BxSxhidden_size
  function PYBIND11_MODULE (line 102) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/additive_masked_softmax_dropout.cpp
  type multihead_attn (line 5) | namespace multihead_attn {
    type fused_softmax (line 6) | namespace fused_softmax {
      type additive_mask_softmax_dropout (line 7) | namespace additive_mask_softmax_dropout {
        function fwd (line 31) | std::vector<torch::Tensor> fwd(
        function bwd (line 57) | torch::Tensor bwd(
  function PYBIND11_MODULE (line 87) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/encdec_multihead_attn.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type encdec (line 5) | namespace encdec {
      type cublas_gemmex (line 6) | namespace cublas_gemmex {
        function fwd (line 43) | std::vector<torch::Tensor> fwd(
        function bwd (line 88) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 153) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/encdec_multihead_attn_norm_add.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type encdec_norm_add (line 5) | namespace encdec_norm_add {
      type cublas_gemmex (line 6) | namespace cublas_gemmex {
        function fwd (line 52) | std::vector<torch::Tensor> fwd(
        function bwd (line 105) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 194) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/layer_norm.h
  function rsqrt (line 230) | float rsqrt(float v) {
  function rsqrt (line 233) | double rsqrt(double v) {
  function float (line 256) | struct SharedMemory <float>
  function double (line 266) | struct SharedMemory <double>
  function stream (line 653) | auto stream = at::cuda::getCurrentCUDAStream().stream();

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/masked_softmax_dropout.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type fused_softmax (line 5) | namespace fused_softmax {
      type mask_softmax_dropout (line 6) | namespace mask_softmax_dropout {
        function fwd (line 31) | std::vector<torch::Tensor> fwd(
        function bwd (line 57) | torch::Tensor bwd(
  function PYBIND11_MODULE (line 89) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/philox.h
  function class (line 4) | class Philox {
  function __device__ (line 17) | __device__ inline uint4 operator()() {
  function __device__ (line 45) | __device__ inline void incr_n(unsigned long long n) {
  function __device__ (line 58) | __device__ inline void incr() {
  function mulhilo32 (line 67) | __device__ unsigned int mulhilo32(unsigned int a, unsigned int b,
  function __device__ (line 72) | __device__ inline uint4 single_round(uint4 ctr, uint2 key) {
  function __device__ (line 87) | __device__  __inline__ float4 uniform4(uint4 x) {

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/self_multihead_attn.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type self (line 5) | namespace self {
      type cublas_gemmex (line 6) | namespace cublas_gemmex {
        function fwd (line 39) | std::vector<torch::Tensor> fwd(
        function bwd (line 75) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 128) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/self_multihead_attn_bias.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type self_bias (line 5) | namespace self_bias {
      type cublas_gemmex (line 6) | namespace cublas_gemmex {
        function fwd (line 43) | std::vector<torch::Tensor> fwd(
        function bwd (line 82) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 135) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/self_multihead_attn_bias_additive_mask.cpp
  type multihead_attn (line 5) | namespace multihead_attn {
    type self_bias_additive_mask (line 6) | namespace self_bias_additive_mask {
      type cublas_gemmex (line 7) | namespace cublas_gemmex {
        function fwd (line 46) | std::vector<torch::Tensor> fwd(
        function bwd (line 86) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 139) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/self_multihead_attn_norm_add.cpp
  type multihead_attn (line 4) | namespace multihead_attn {
    type self_norm_add (line 5) | namespace self_norm_add {
      type cublas_gemmex (line 6) | namespace cublas_gemmex {
        function fwd (line 47) | std::vector<torch::Tensor> fwd(
        function bwd (line 93) | std::vector<torch::Tensor> bwd(
  function PYBIND11_MODULE (line 169) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/softmax.h
  function acc_t (line 139) | acc_t sum[WARP_BATCH] { 0.0f };
  function acc_t (line 363) | acc_t sum[WARP_BATCH] { 0.0f };
  function additive_masked_softmax_dropout_warp_forward (line 429) | void additive_masked_softmax_dropout_warp_forward(output_t *dst, uint8_t...
  function softmax_warp_backward (line 2244) | void softmax_warp_backward(__half *gradInput, const __half *grad, const ...
  function masked_softmax_warp_backward (line 2455) | void masked_softmax_warp_backward(__half *gradInput, const __half *grad,...

FILE: KoSimCSE/apex/contrib/csrc/multihead_attn/strided_batched_gemm.h
  function cublasOperation_t (line 21) | cublasOperation_t convertTransToCublasOperation(char trans) {
  function CublasStridedBatchedGemm (line 31) | void CublasStridedBatchedGemm(THCState *state, char transa, char transb,...
  type cutlass (line 78) | typedef cutlass::gemm::Gemm<WmmaGemmTraits> Gemm;
  function gemm_switch_fp32accum (line 149) | void gemm_switch_fp32accum(THCState *state, char transa, char transb, lo...
  function adjustLdLevel3 (line 278) | void adjustLdLevel3(char transa, char transb, int64_t m, int64_t n, int6...
  function HgemmStridedBatched (line 312) | void HgemmStridedBatched(THCState *state, char transa, char transb, long...

FILE: KoSimCSE/apex/contrib/csrc/optimizers/fused_adam_cuda.cpp
  function strided_check_finite (line 20) | void strided_check_finite(
  function adam (line 29) | void adam(at::Tensor & p, at::Tensor & p_copy, at::Tensor & m, at::Tenso...
  function reversible_adam (line 43) | void reversible_adam(at::Tensor & p, at::Tensor & p_copy, at::Tensor & m...
  function maybe_adam_undo (line 57) | void maybe_adam_undo(at::Tensor & overflow_flag, at::Tensor & p, at::Ten...
  function maybe_cast (line 69) | void maybe_cast(at::Tensor & overflow_flag, at::Tensor & p_in, at::Tenso...
  function PYBIND11_MODULE (line 78) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/optimizers/fused_lamb_cuda.cpp
  function PYBIND11_MODULE (line 19) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/optimizers/multi_tensor_distopt_adam.cpp
  function PYBIND11_MODULE (line 17) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/optimizers/multi_tensor_distopt_lamb.cpp
  function PYBIND11_MODULE (line 31) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/transducer/transducer_joint.cpp
  function transducer_joint_forward (line 33) | std::vector<torch::Tensor> transducer_joint_forward(
  function transducer_joint_backward (line 67) | std::vector<torch::Tensor> transducer_joint_backward(
  function PYBIND11_MODULE (line 95) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/transducer/transducer_loss.cpp
  function transducer_loss_forward (line 35) | std::vector<torch::Tensor> transducer_loss_forward(
  function transducer_loss_backward (line 65) | torch::Tensor transducer_loss_backward(
  function PYBIND11_MODULE (line 106) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/csrc/xentropy/interface.cpp
  function softmax_xentropy_forward (line 24) | std::vector<at::Tensor> softmax_xentropy_forward(
  function softmax_xentropy_backward (line 35) | at::Tensor softmax_xentropy_backward(
  function PYBIND11_MODULE (line 49) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {

FILE: KoSimCSE/apex/contrib/fmha/fmha.py
  class FMHAFun (line 33) | class FMHAFun(torch.autograd.Function):
    method forward (line 35) | def forward(ctx, qkv, cu_seqlens, p_dropout, max_s, is_training):
    method backward (line 48) | def backward(ctx, dout):
  class FMHA (line 58) | class FMHA(torch.nn.Module):
    method __init__ (line 60) | def __init__(self, config):
    method forward (line 70) | def forward(self, qkv, cu_seqlens, max_s, is_training=True):

FILE: KoSimCSE/apex/contrib/groupbn/batch_norm.py
  class bn_NHWC_impl (line 7) | class bn_NHWC_impl(torch.autograd.Function):
    method forward (line 9) | def forward(ctx, x, s, b, rm, riv, mini_m, mini_riv, ret_cta, mom, eps...
    method backward (line 32) | def backward(ctx, grad_y):
  class bn_addrelu_NHWC_impl (line 53) | class bn_addrelu_NHWC_impl(torch.autograd.Function):
    method forward (line 55) | def forward(ctx, x, z, s, b, rm, riv, mini_m, mini_riv, grid_dim_y, re...
    method backward (line 78) | def backward(ctx, grad_y):
  class BatchNorm2d_NHWC (line 101) | class BatchNorm2d_NHWC(_BatchNorm):
    method __init__ (line 103) | def __init__(self, num_features, fuse_relu=False, bn_group=1, max_cta_...
    method forward (line 196) | def forward(self, x, z=None):
    method __del__ (line 219) | def __del__(self):

FILE: KoSimCSE/apex/contrib/layer_norm/layer_norm.py
  class FastLayerNormFN (line 6) | class FastLayerNormFN(torch.autograd.Function):
    method forward (line 8) | def forward(ctx, x, gamma, beta, epsilon):
    method backward (line 19) | def backward(ctx, dy):
  class FastLayerNorm (line 31) | class FastLayerNorm(torch.nn.Module):
    method __init__ (line 32) | def __init__(self, hidden_size, eps=1e-5):
    method reset_parameters (line 39) | def reset_parameters(self):
    method forward (line 43) | def forward(self, x):

FILE: KoSimCSE/apex/contrib/multihead_attn/encdec_multihead_attn.py
  function jit_dropout_add (line 19) | def jit_dropout_add(x, residual, prob, is_training):
  class EncdecMultiheadAttn (line 26) | class EncdecMultiheadAttn(nn.Module):
    method __init__ (line 31) | def __init__(self, embed_dim, num_heads, dropout=0., bias=False, inclu...
    method reset_parameters (line 79) | def reset_parameters(self):
    method forward (line 98) | def forward(self, query, key, value, key_padding_mask=None, need_weigh...

FILE: KoSimCSE/apex/contrib/multihead_attn/encdec_multihead_attn_func.py
  class EncdecAttnFunc (line 5) | class EncdecAttnFunc(torch.autograd.Function):
    method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, scale, inputs_q, i...
    method backward (line 135) | def backward(ctx, output_grads):

FILE: KoSimCSE/apex/contrib/multihead_attn/fast_encdec_multihead_attn_func.py
  class FastEncdecAttnFunc (line 5) | class FastEncdecAttnFunc(torch.autograd.Function):
    method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, inputs_q, inputs_k...
    method backward (line 50) | def backward(ctx, output_grads):

FILE: KoSimCSE/apex/contrib/multihead_attn/fast_encdec_multihead_attn_norm_add_func.py
  class FastEncdecAttnNormAddFunc (line 12) | class FastEncdecAttnNormAddFunc(torch.autograd.Function):
    method forward (line 14) | def forward(ctx, use_time_mask, is_training, heads, inputs_q, inputs_k...
    method backward (line 69) | def backward(ctx, output_grads):

FILE: KoSimCSE/apex/contrib/multihead_attn/fast_self_multihead_attn_func.py
  class FastSelfAttnFunc (line 6) | class FastSelfAttnFunc(torch.autograd.Function) :
    method forward (line 8) | def forward(ctx, use_time_mask, is_training, heads, inputs, input_weig...
    method backward (line 120) | def backward(ctx, output_grads):

FILE: KoSimCSE/apex/contrib/multihead_attn/fast_self_multihead_attn_norm_add_func.py
  class FastSelfAttnNormAddFunc (line 5) | class FastSelfAttnNormAddFunc(torch.autograd.Function):
    method forward (line 7) | def forward(ctx, use_time_mask, is_training, heads, inputs, lyr_nrm_ga...
    method backward (line 56) | def backward(ctx, output_grads):

FILE: KoSimCSE/apex/contrib/multihead_attn/mask_softmax_dropout_func.py
  class MaskSoftmaxDropout (line 6) | class MaskSoftmaxDropout(torch.autograd.Function) :
    method forward (line 8) | def forward(ctx, is_training, heads, inputs, pad_mask, mask_additive, ...
    method backward (line 51) | def backward(ctx, output_grads):

FILE: KoSimCSE/apex/contrib/multihead_attn/self_multihead_attn.py
  function jit_dropout_add (line 19) | def jit_dropout_add(x, residual, prob, is_training):
  class SelfMultiheadAttn (line 26) | class SelfMultiheadAttn(nn.Module):
    method __init__ (line 31) | def __init__(self, embed_dim, num_heads, dropout=0., bias=False, inclu...
    method reset_parameters (line 97) | def reset_parameters(self):
    method forward (line 124) | def forward(self, query, key, value, key_padding_mask=None, need_weigh...

FILE: KoSimCSE/apex/contrib/multihead_attn/self_multihead_attn_func.py
  class SelfAttnFunc (line 4) | class SelfAttnFunc(torch.autograd.Function):
    method forward (line 6) | def forward(ctx, use_time_mask, is_training, heads, scale, inputs,
    method backward (line 121) | def backward(ctx, output_grads):

FILE: KoSimCSE/apex/contrib/optimizers/distributed_fused_adam.py
  class DistributedFusedAdam (line 9) | class DistributedFusedAdam(torch.optim.Optimizer):
    method __init__ (line 55) | def __init__(self, params,
    method _first_step_init (line 128) | def _first_step_init(self):
    method _init_everything (line 373) | def _init_everything(self):
    method set_last_step (line 378) | def set_last_step(self, last_step):
    method _get_flush_block (line 381) | def _get_flush_block(self):
    method _pipeline_block_reductions (line 397) | def _pipeline_block_reductions(self, block_id):
    method __launch_step_kernel (line 443) | def __launch_step_kernel(self):
    method _pipeline_step (line 469) | def _pipeline_step(self):
    method _flatten_grad_mt (line 479) | def _flatten_grad_mt(self, scale):
    method _do_overlapped_reduction (line 489) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of...
    method set_global_scale (line 504) | def set_global_scale(self, global_scale):
    method global_scale (line 510) | def global_scale(self):
    method has_overflow (line 514) | def has_overflow(self):
    method peek_overflow (line 523) | def peek_overflow(self):
    method strided_check_finite (line 529) | def strided_check_finite(self, output_params, stride=1, start=-1, end=...
    method L2_grad_norm (line 545) | def L2_grad_norm(self):
    method complete_reductions (line 552) | def complete_reductions(self):
    method step (line 577) | def step(self, closure=None):
    method state_dict (line 598) | def state_dict(self):
    method load_state_dict (line 615) | def load_state_dict(self, state_dict):

FILE: KoSimCSE/apex/contrib/optimizers/distributed_fused_adam_v2.py
  class DistributedFusedAdamV2 (line 7) | class DistributedFusedAdamV2(torch.optim.Optimizer):
    method __init__ (line 43) | def __init__(self, params,
    method set_last_step (line 351) | def set_last_step(self, last_step):
    method _get_flush_block (line 354) | def _get_flush_block(self):
    method _pipeline_block_reductions (line 370) | def _pipeline_block_reductions(self, block_id):
    method __launch_step_kernel (line 406) | def __launch_step_kernel(self, p, p_copy, m, v, g):
    method _pipeline_block_step (line 425) | def _pipeline_block_step(self, block_id):
    method _pipeline_step (line 445) | def _pipeline_step(self):
    method _flatten_grad_mt (line 460) | def _flatten_grad_mt(self, scale):
    method _do_overlapped_reduction (line 470) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of...
    method set_global_scale (line 487) | def set_global_scale(self, global_scale):
    method global_scale (line 493) | def global_scale(self):
    method has_overflow (line 497) | def has_overflow(self):
    method peek_overflow (line 506) | def peek_overflow(self):
    method strided_check_finite (line 512) | def strided_check_finite(self, output_params, stride=1, start=-1, end=...
    method L2_grad_norm (line 528) | def L2_grad_norm(self):
    method complete_reductions (line 535) | def complete_reductions(self):
    method revert_step (line 560) | def revert_step(self):
    method step (line 586) | def step(self, closure=None, skip_overflow_check=False):

FILE: KoSimCSE/apex/contrib/optimizers/distributed_fused_adam_v3.py
  class DistributedFusedAdamV3 (line 7) | class DistributedFusedAdamV3(torch.optim.Optimizer):
    method __init__ (line 43) | def __init__(self, params,
    method has_overflow (line 196) | def has_overflow(self):
    method set_last_step (line 199) | def set_last_step(self, last_step):
    method _get_flush_block (line 202) | def _get_flush_block(self):
    method __launch_step_kernel (line 218) | def __launch_step_kernel(self, p, p_copy, m, v, g):
    method _flatten_grad_mt (line 237) | def _flatten_grad_mt(self, scale):
    method _do_overlapped_reduction (line 247) | def _do_overlapped_reduction(self, param_i, param_grads_size, param_of...
    method set_global_scale (line 268) | def set_global_scale(self, global_scale):
    method global_scale (line 274) | def global_scale(self):
    method L2_grad_norm (line 278) | def L2_grad_norm(self):
    method complete_reductions (line 282) | def complete_reductions(self):
    method step (line 306) | def step(self, closure=None, skip_overflow_check=False):

FILE: KoSimCSE/apex/contrib/optimizers/distributed_fused_lamb.py
  class DistributedFusedLAMB (line 9) | class DistributedFusedLAMB(torch.optim.Optimizer):
    class AtomicCounter (line 70) | class AtomicCounter(object):
      method __init__ (line 71) | def __init__(self):
      method add (line 77) | def add(self, idx):
    method __init__ (line 82) | def __init__(self, params,
    method _lazy_init_stage1 (line 210) | def _lazy_init_stage1(self):
    method _lazy_init_stage2 (line 330) | def _lazy_init_stage2(self):
    method set_is_accumulation_step (line 451) | def set_is_accumulation_step(self, is_accumulation_step):
    method set_last_step (line 454) | def set_last_step(self, last_step):
    method _get_flush_block (line 457) | def _get_flush_block(self):
    method _pipeline_block_reductions (line 473) | def _pipeline_block_reductions(self, block_id):
    method __compute_contrib_param_norm (line 556) | def __compute_contrib_param_norm(self):
    method __compute_contrib_update_norm (line 569) | def __compute_contrib_update_norm(self):
    method _pipeline_step (line 577) | def _pipeline_step(self):
    method _flatten_grad_mt (line 633) | def _flatten_grad_mt(self, scale):
    method _do_overlapped_reduction (line 651) | def _do_overlapped_reduction(self, param_i, param):
    method set_global_scale (line 667) | def set_global_scale(self, global_scale):
    method global_scale (line 673) | def global_scale(self):
    method L2_grad_norm (line 677) | def L2_grad_norm(self):
    method complete_reductions (line 681) | def complete_reductions(self):
    method step (line 704) | def step(self, closure=None, grad_scaler=None):
    method state_dict (line 740) | def state_dict(self):
    method load_state_dict (line 757) | def load_state_dict(self, state_dict):

FILE: KoSimCSE/apex/contrib/optimizers/fp16_optimizer.py
  class FP16_Optimizer (line 4) | class FP16_Optimizer(object):
    method __init__ (line 25) | def __init__(self,
    method zero_grad (line 79) | def zero_grad(self, set_grads_to_None=True):
    method step (line 94) | def step(self, closure=None):
    method backward (line 132) | def backward(self, loss):
    method _update_scale (line 142) | def _update_scale(self, skip):
    method _get_state (line 161) | def _get_state(self):
    method _set_state (line 164) | def _set_state(self, value):
    method _get_param_groups (line 171) | def _get_param_groups(self):
    method _set_param_groups (line 174) | def _set_param_groups(self, value):
    method state_dict (line 179) | def state_dict(self):
    method load_state_dict (line 202) | def load_state_dict(self, state_dict):

FILE: KoSimCSE/apex/contrib/optimizers/fused_adam.py
  class FusedAdam (line 6) | class FusedAdam(torch.optim.Optimizer):
    method __init__ (line 38) | def __init__(self, params,
    method step (line 64) | def step(self, closure=None, grads=None, output_params=None, scale=1.,...

FILE: KoSimCSE/apex/contrib/optimizers/fused_lamb.py
  class FusedLAMB (line 6) | class FusedLAMB(torch.optim.Optimizer):
    method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True,
    method zero_grad (line 87) | def zero_grad(self):
    method step (line 95) | def step(self, closure=None):

FILE: KoSimCSE/apex/contrib/optimizers/fused_sgd.py
  class FusedSGD (line 7) | class FusedSGD(Optimizer):
    method __init__ (line 66) | def __init__(self, params, lr=required, momentum=0, dampening=0,
    method __setstate__ (line 93) | def __setstate__(self, state):
    method get_momentums (line 98) | def get_momentums(self, params):
    method step (line 115) | def step(self, closure=None, grads=None, output_params=None, scale=1.,...

FILE: KoSimCSE/apex/contrib/sparsity/asp.py
  function eligible_modules (line 12) | def eligible_modules(model, whitelist_layer_types, allowed_layer_names, ...
  class ASP (line 21) | class ASP:
    method init_model_for_pruning (line 29) | def init_model_for_pruning(cls, model, mask_calculator="m4n2_1d",
    method init_optimizer_for_pruning (line 127) | def init_optimizer_for_pruning(cls, optimizer):
    method compute_sparse_masks (line 155) | def compute_sparse_masks(cls):
    method restore_pruned_weights (line 176) | def restore_pruned_weights(cls):
    method is_sparsity_enabled (line 191) | def is_sparsity_enabled(cls):
    method prune_trained_model (line 212) | def prune_trained_model(cls, model, optimizer):

FILE: KoSimCSE/apex/contrib/sparsity/sparse_masklib.py
  function fill (line 9) | def fill(x):
  function reshape_1d (line 13) | def reshape_1d(matrix, m):
  function compute_valid_1d_patterns (line 25) | def compute_valid_1d_patterns(m,n):
  function mn_1d_best (line 37) | def mn_1d_best(matrix, m, n):
  function m4n2_1d (line 49) | def m4n2_1d(mat, density):
  function mn_2d_greedy (line 67) | def mn_2d_greedy(matrix, m, n):
  function m4n2_2d_greedy (line 98) | def m4n2_2d_greedy(mat, density):
  function compute_valid_2d_patterns (line 103) | def compute_valid_2d_patterns(m,n):
  function mn_2d_best (line 122) | def mn_2d_best(matrix, m, n):
  function m4n2_2d_best (line 140) | def m4n2_2d_best(mat, density):
  function create_mask (line 145) | def create_mask(tensor, pattern="m4n2_1d", density=0.5):

FILE: KoSimCSE/apex/contrib/sparsity/test/checkpointing_test_part1.py
  function build_model (line 7) | def build_model(args):
  function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step):
  function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps):
  function main (line 38) | def main(args):
  class Args (line 76) | class Args:

FILE: KoSimCSE/apex/contrib/sparsity/test/checkpointing_test_part2.py
  function build_model (line 7) | def build_model(args):
  function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step):
  function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps):
  function main (line 38) | def main(step, args, model_state_dict, optimizer_state_dict):
  class Args (line 61) | class Args:

FILE: KoSimCSE/apex/contrib/sparsity/test/checkpointing_test_reference.py
  function build_model (line 11) | def build_model(args):
  function train_step (line 25) | def train_step(args, model, optimizer, input_batch, target_batch, step):
  function train_loop (line 35) | def train_loop(args, model, optimizer, step, num_steps):
  function main (line 42) | def main(args):
  class Args (line 79) | class Args:

FILE: KoSimCSE/apex/contrib/sparsity/test/toy_problem.py
  function build_model (line 7) | def build_model(args):
  function train_step (line 21) | def train_step(args, model, optimizer, input_batch, target_batch, step):
  function train_loop (line 31) | def train_loop(args, model, optimizer, step, num_steps):
  function main (line 38) | def main(args):
  class Args (line 75) | class Args:

FILE: KoSimCSE/apex/contrib/test/fmha/test_fmha.py
  function py_mha (line 37) | def py_mha(qkv, amask, b, s, h, d):
  class TestFMHA (line 52) | class TestFMHA(unittest.TestCase):
    method run_test (line 54) | def run_test(self, s, b):
    method test_128 (line 106) | def test_128(self):
    method test_256 (line 109) | def test_256(self):
    method test_384 (line 112) | def test_384(self):
    method test_512 (line 115) | def test_512(self):

FILE: KoSimCSE/apex/contrib/test/layer_norm/test_fast_layer_norm.py
  class GPUTimer (line 12) | class GPUTimer:
    method __init__ (line 13) | def __init__(self, stream):
    method start (line 17) | def start(self):
    method stop (line 19) | def stop(self):
    method sync (line 21) | def sync(self):
    method millis (line 23) | def millis(self):
  function size_in_bytes (line 26) | def size_in_bytes(t):
  function abs_err (line 28) | def abs_err(x, y):
  class TestFastLayerNorm (line 35) | class TestFastLayerNorm(unittest.TestCase):
    method setUp (line 37) | def setUp(self, seed=1234):
    method test_ln_fp32 (line 42) | def test_ln_fp32(self):
    method test_ln_fp16 (line 44) | def test_ln_fp16(self):
    method run_test_layer_norm (line 47) | def run_test_layer_norm(self, dtype, atol, rtol=1e-5):
    method test_performance (line 94) | def test_performance(self):

FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_encdec_multihead_attn.py
  class EncdecMultiheadAttnTest (line 7) | class EncdecMultiheadAttnTest(unittest.TestCase):
    method setUp (line 8) | def setUp(self, seed=1234):
    method test_encdec_multihead_attn (line 49) | def test_encdec_multihead_attn(self) :
    method test_encdec_multihead_attn_time_mask (line 76) | def test_encdec_multihead_attn_time_mask(self) :
    method test_encdec_multihead_attn_pad_mask (line 105) | def test_encdec_multihead_attn_pad_mask(self) :

FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_encdec_multihead_attn_norm_add.py
  class EncdecMultiheadAttnNormAddTest (line 7) | class EncdecMultiheadAttnNormAddTest(unittest.TestCase):
    method setUp (line 8) | def setUp(self, seed=1234):
    method test_encdec_multihead_attn_norm_add (line 49) | def test_encdec_multihead_attn_norm_add(self) :

FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_fast_self_multihead_attn_bias.py
  class SelfMultiheadAttnTest (line 7) | class SelfMultiheadAttnTest(unittest.TestCase):
    method setUp (line 8) | def setUp(self, seed=1234):
    method test_self_multihead_attn_additive_mask (line 48) | def test_self_multihead_attn_additive_mask(self) :

FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_mha_fused_softmax.py
  class FusedSoftmaxTest (line 6) | class FusedSoftmaxTest(unittest.TestCase):
    method setUp (line 7) | def setUp(self, seed=1234):
    method test_fused_softmax (line 24) | def test_fused_softmax(self) :

FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_self_multihead_attn.py
  class SelfMultiheadAttnTest (line 7) | class SelfMultiheadAttnTest(unittest.TestCase):
    method setUp (line 8) | def setUp(self, seed=1234):
    method test_self_multihead_attn (line 45) | def test_self_multihead_attn(self) :
    method test_self_multihead_attn_time_mask (line 71) | def test_self_multihead_attn_time_mask(self) :
    method test_self_multihead_attn_pad_mask (line 100) | def test_self_multihead_attn_pad_mask(self) :

FILE: KoSimCSE/apex/contrib/test/multihead_attn/test_self_multihead_attn_norm_add.py
  class SelfMultiheadAttnNormAddTest (line 7) | class SelfMultiheadAttnNormAddTest(unittest.TestCase):
    method setUp (line 8) | def setUp(self, seed=1234):
    method test_self_multihead_attn_norm_add (line 45) | def test_self_multihead_attn_norm_add(self) :

FILE: KoSimCSE/apex/contrib/test/test_label_smoothing.py
  function label_smoothing_raw (line 10) | def label_smoothing_raw(x, target, padding_idx, smoothing):
  function label_smoothing_opt_1 (line 20) | def label_smoothing_opt_1(x, target, padding_idx, smoothing):
  class LabelSmoothingTest (line 30) | class LabelSmoothingTest(unittest.TestCase):
    method setUp (line 31) | def setUp(self, seed=1234):
    method gen_test_inputs (line 40) | def gen_test_inputs(self, N, T, H, smoothing, padding_idx):
    method print_max_diff_elem (line 50) | def print_max_diff_elem(self, ref, tst):
    method test_label_smoothing_function (line 57) | def test_label_smoothing_function(self):
    method test_label_smoothing_perf (line 91) | def test_label_smoothing_perf(self):

FILE: KoSimCSE/apex/contrib/test/transducer/test_transducer_joint.py
  class TransducerJointTest (line 6) | class TransducerJointTest(unittest.TestCase):
    method setUp (line 7) | def setUp(self, seed=1234):
    method gen_input (line 11) | def gen_input(self, for_vector_kernel):
    method _pack (line 41) | def _pack(self, x, f_len, g_len):
    method _unpack (line 53) | def _unpack(self, x, f_len, g_len):
    method run_transducer_joint (line 67) | def run_transducer_joint(self, for_vector_kernel, pack_output, relu, d...
    method test_transducer_joint (line 118) | def test_transducer_joint(self):
    method test_transducer_joint_vec (line 121) | def test_transducer_joint_vec(self):
    method test_transducer_joint_pack (line 124) | def test_transducer_joint_pack(self):
    method test_transducer_joint_vec_pack (line 127) | def test_transducer_joint_vec_pack(self):
    method test_transducer_joint_relu (line 130) | def test_transducer_joint_relu(self):
    method test_transducer_joint_vec_relu (line 133) | def test_transducer_joint_vec_relu(self):
    method test_transducer_joint_pack_relu (line 136) | def test_transducer_joint_pack_relu(self):
    method test_transducer_joint_vec_pack_relu (line 139) | def test_transducer_joint_vec_pack_relu(self):
    method test_transducer_joint_relu_dropout (line 142) | def test_transducer_joint_relu_dropout(self):
    method test_transducer_joint_vec_relu_dropout (line 145) | def test_transducer_joint_vec_relu_dropout(self):
    method test_transducer_joint_pack_relu_dropout (line 148) | def test_transducer_joint_pack_relu_dropout(self):
    method test_transducer_joint_vec_pack_relu_dropout (line 151) | def test_transducer_joint_vec_pack_relu_dropout(self):

FILE: KoSimCSE/apex/contrib/test/transducer/test_transducer_loss.py
  class TransducerLossTest (line 6) | class TransducerLossTest(unittest.TestCase):
    method setUp (line 7) | def setUp(self, seed=1234):
    method gen_input (line 11) | def gen_input(self, scalar_t, for_vector_kernel):
    method _pack (line 41) | def _pack(self, x):
    method _unpack (line 52) | def _unpack(self, x):
    method run_transducer_loss (line 64) | def run_transducer_loss(self, scalar_t, fuse_softmax_backward, packed_...
    method test_transducer_loss_fp32 (line 90) | def test_transducer_loss_fp32(self):
    method test_transducer_loss_fp16 (line 98) | def test_transducer_loss_fp16(self):
    method test_transducer_loss_fp16_backward_fusion (line 106) | def test_transducer_loss_fp16_backward_fusion(self):
    method test_transducer_loss_fp16_backward_fusion_packed (line 114) | def test_transducer_loss_fp16_backward_fusion_packed(self):
    method test_transducer_loss_fp16_backward_fusion_packed_vec (line 122) | def test_transducer_loss_fp16_backward_fusion_packed_vec(self):

FILE: KoSimCSE/apex/contrib/test/transducer/transducer_ref.py
  function transducer_loss_reference (line 5) | def transducer_loss_reference(x, label, f_len, y_len, blank_idx, loss_gr...
  function transducer_joint_reference (line 79) | def transducer_joint_reference(f, g, h_grad, f_len, g_len, pack_output, ...

FILE: KoSimCSE/apex/contrib/transducer/transducer.py
  class TransducerJoint (line 5) | class TransducerJoint(torch.nn.Module):
    method __init__ (line 27) | def __init__(self, pack_output=False, relu=False, dropout=False, opt=1...
    method forward (line 43) | def forward(self, f, g, f_len, g_len, batch_offset=None, packed_batch=0):
  class TransducerLoss (line 68) | class TransducerLoss(torch.nn.Module):
    method __init__ (line 81) | def __init__(self, fuse_softmax_backward=True, opt=1, packed_input=Fal...
    method forward (line 89) | def forward(self, x, label, f_len, y_len, blank_idx, batch_offset=None...
  class TransducerLossFunc (line 127) | class TransducerLossFunc(torch.autograd.Function):
    method forward (line 129) | def forward(ctx, x, label, f_len, y_len, batch_offset, max_f_len, blan...
    method backward (line 149) | def backward(ctx, loss_grad):
  class TransducerJointFunc (line 158) | class TransducerJointFunc(torch.autograd.Function):
    method forward (line 160) | def forward(ctx, f, g, f_len, g_len, pack_output, relu, dropout, batch...
    method backward (line 180) | def backward(ctx, loss_grad):

FILE: KoSimCSE/apex/contrib/xentropy/softmax_xentropy.py
  class SoftmaxCrossEntropyLoss (line 4) | class SoftmaxCrossEntropyLoss(torch.autograd.Function):
    method forward (line 6) | def forward(ctx, logits, labels, smoothing=0.0, padding_idx=0, half_to...
    method backward (line 18) | def backward(ctx, grad_loss):

FILE: KoSimCSE/apex/fp16_utils/fp16_optimizer.py
  class FP16_Optimizer (line 13) | class FP16_Optimizer(object):
    method __init__ (line 14) | def __init__(self,
    method maybe_print (line 110) | def maybe_print(self, msg):
    method __getstate__ (line 114) | def __getstate__(self):
    method __setstate__ (line 117) | def __setstate__(self, state):
    method zero_grad (line 120) | def zero_grad(self, set_grads_to_None=False):
    method _master_params_to_model_params (line 160) | def _master_params_to_model_params(self):
    method clip_master_grads (line 185) | def clip_master_grads(self, max_norm, norm_type=2):
    method state_dict (line 209) | def state_dict(self):
    method load_state_dict (line 230) | def load_state_dict(self, state_dict):
    method step (line 272) | def step(self, closure=None): # could add clip option.
    method _step_with_closure (line 334) | def _step_with_closure(self, closure):
    method backward (line 373) | def backward(self, loss, update_master_grads=True, retain_graph=False):
    method update_master_grads (line 436) | def update_master_grads(self):
    method inspect_master_grad_data (line 493) | def inspect_master_grad_data(self):
    method _get_loss_scale (line 528) | def _get_loss_scale(self):
    method _set_loss_scale (line 531) | def _set_loss_scale(self, value):
    method _get_state (line 537) | def _get_state(self):
    method _set_state (line 540) | def _set_state(self, value):
    method _get_param_groups (line 547) | def _get_param_groups(self):
    method _set_param_groups (line 550) | def _set_param_groups(self, value):

FILE: KoSimCSE/apex/fp16_utils/fp16util.py
  class tofp16 (line 7) | class tofp16(nn.Module):
    method __init__ (line 15) | def __init__(self):
    method forward (line 18) | def forward(self, input):
  function BN_convert_float (line 22) | def BN_convert_float(module):
  function network_to_half (line 35) | def network_to_half(network):
  function convert_module (line 44) | def convert_module(module, dtype):
  function convert_network (line 60) | def convert_network(network, dtype):
  class FP16Model (line 73) | class FP16Model(nn.Module):
    method __init__ (line 78) | def __init__(self, network):
    method forward (line 82) | def forward(self, *inputs):
  function backwards_debug_hook (line 87) | def backwards_debug_hook(grad):
  function prep_param_lists (line 90) | def prep_param_lists(model, flat_master=False):
  function model_grads_to_master_grads (line 136) | def model_grads_to_master_grads(model_params, master_params, flat_master...
  function master_params_to_model_params (line 158) | def master_params_to_model_params(model_params, master_params, flat_mast...
  function to_python_float (line 176) | def to_python_float(t):

FILE: KoSimCSE/apex/fp16_utils/loss_scaler.py
  function to_python_float (line 4) | def to_python_float(t):
  class LossScaler (line 10) | class LossScaler:
    method __init__ (line 22) | def __init__(self, scale=1):
    method has_overflow (line 26) | def has_overflow(self, params):
    method _has_inf_or_nan (line 30) | def _has_inf_or_nan(x):
    method update_scale (line 33) | def update_scale(self, overflow):
    method loss_scale (line 37) | def loss_scale(self):
    method scale_gradient (line 40) | def scale_gradient(self, module, grad_in, grad_out):
    method backward (line 43) | def backward(self, loss, retain_graph=False):
  class DynamicLossScaler (line 47) | class DynamicLossScaler:
    method __init__ (line 73) | def __init__(self,
    method has_overflow (line 84) | def has_overflow(self, params):
    method _has_inf_or_nan (line 92) | def _has_inf_or_nan(x):
    method update_scale (line 113) | def update_scale(self, overflow):
    method loss_scale (line 124) | def loss_scale(self):
    method scale_gradient (line 127) | def scale_gradient(self, module, grad_in, grad_out):
    method backward (line 130) | def backward(self, loss, retain_graph=False):

FILE: KoSimCSE/apex/mlp/mlp.py
  class MlpFunction (line 8) | class MlpFunction(torch.autograd.Function):
    method forward (line 10) | def forward(ctx, bias, activation, *args):
    method backward (line 19) | def backward(ctx, grad_o):
  class MLP (line 26) | class MLP(torch.nn.Module):
    method __init__ (line 34) | def __init__(self, mlp_sizes, bias=True, activation='relu'):
    method reset_parameters (line 64) | def reset_parameters(self):
    method forward (line 74) | def forward(self, input):
    method extra_repr (line 77) | def extra_repr(self):

FILE: KoSimCSE/apex/multi_tensor_apply/multi_tensor_apply.py
  class MultiTensorApply (line 3) | class MultiTensorApply(object):
    method __init__ (line 7) | def __init__(self, chunk_size):
    method check_avail (line 16) | def check_avail(self):
    method __call__ (line 24) | def __call__(self, op, noop_flag_buffer, tensor_lists, *args):

FILE: KoSimCSE/apex/normalization/fused_layer_norm.py
  class FusedLayerNormAffineFunction (line 12) | class FusedLayerNormAffineFunction(torch.autograd.Function):
    method forward (line 15) | def forward(ctx, input, weight, bias, normalized_shape, eps):
    method backward (line 30) | def backward(ctx, grad_output):
  class FusedLayerNormFunction (line 39) | class FusedLayerNormFunction(torch.autograd.Function):
    method forward (line 42) | def forward(ctx, input, normalized_shape, eps):
    method backward (line 55) | def backward(ctx, grad_output):
  function fused_layer_norm_affine (line 64) | def fused_layer_norm_affine(input, normalized_shape, weight, bias, eps=1...
  function fused_layer_norm (line 67) | def fused_layer_norm(input, normalized_shape, eps=1e-6):
  class FusedLayerNorm (line 70) | class FusedLayerNorm(torch.nn.Module):
    method __init__ (line 129) | def __init__(self, normalized_shape, eps=1e-5, elementwise_affine=True):
    method reset_parameters (line 148) | def reset_parameters(self):
    method forward (line 153) | def forward(self, input):
    method extra_repr (line 163) | def extra_repr(self):

FILE: KoSimCSE/apex/optimizers/fused_adagrad.py
  class FusedAdagrad (line 5) | class FusedAdagrad(torch.optim.Optimizer):
    method __init__ (line 43) | def __init__(self, params, lr=1e-2, eps=1e-10,
    method zero_grad (line 59) | def zero_grad(self):
    method step (line 67) | def step(self, closure=None):

FILE: KoSimCSE/apex/optimizers/fused_adam.py
  class FusedAdam (line 4) | class FusedAdam(torch.optim.Optimizer):
    method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True,
    method zero_grad (line 82) | def zero_grad(self):
    method step (line 90) | def step(self, closure=None, grads=None, output_params=None, scale=Non...

FILE: KoSimCSE/apex/optimizers/fused_lamb.py
  class FusedLAMB (line 4) | class FusedLAMB(torch.optim.Optimizer):
    method __init__ (line 63) | def __init__(self, params, lr=1e-3, bias_correction=True,
    method zero_grad (line 88) | def zero_grad(self):
    method step (line 96) | def step(self, closure=None):

FILE: KoSimCSE/apex/optimizers/fused_novograd.py
  class FusedNovoGrad (line 4) | class FusedNovoGrad(torch.optim.Optimizer):
    method __init__ (line 67) | def __init__(self, params, lr=1e-3, bias_correction=True,
    method zero_grad (line 92) | def zero_grad(self):
    method load_state_dict (line 100) | def load_state_dict(self, state_dict):
    method step (line 108) | def step(self, closure=None):

FILE: KoSimCSE/apex/optimizers/fused_sgd.py
  class FusedSGD (line 6) | class FusedSGD(Optimizer):
    method __init__ (line 76) | def __init__(self, params, lr=required, momentum=0, dampening=0,
    method __setstate__ (line 108) | def __setstate__(self, state):
    method zero_grad (line 113) | def zero_grad(self):
    method get_momentums (line 121) | def get_momentums(self, params):
    method step (line 138) | def step(self, closure=None):

FILE: KoSimCSE/apex/parallel/LARC.py
  class LARC (line 5) | class LARC(object):
    method __init__ (line 39) | def __init__(self, optimizer, trust_coefficient=0.02, clip=True, eps=1...
    method __getstate__ (line 45) | def __getstate__(self):
    method __setstate__ (line 48) | def __setstate__(self, state):
    method state (line 52) | def state(self):
    method __repr__ (line 55) | def __repr__(self):
    method param_groups (line 59) | def param_groups(self):
    method param_groups (line 63) | def param_groups(self, value):
    method state_dict (line 66) | def state_dict(self):
    method load_state_dict (line 69) | def load_state_dict(self, state_dict):
    method zero_grad (line 72) | def zero_grad(self):
    method add_param_group (line 75) | def add_param_group(self, param_group):
    method step (line 78) | def step(self):

FILE: KoSimCSE/apex/parallel/__init__.py
  function convert_syncbn_model (line 21) | def convert_syncbn_model(module, process_group=None, channel_last=False):
  function create_syncbn_process_group (line 58) | def create_syncbn_process_group(group_size):

FILE: KoSimCSE/apex/parallel/distributed.py
  function import_flatten_impl (line 13) | def import_flatten_impl():
  function flatten (line 25) | def flatten(bucket):
  function unflatten (line 30) | def unflatten(coalesced, bucket):
  function apply_flat_dist_call (line 36) | def apply_flat_dist_call(bucket, call, extra_args=None):
  function split_half_float_double (line 51) | def split_half_float_double(tensors):
  function split_by_type (line 60) | def split_by_type(tensors):
  function flat_dist_call (line 70) | def flat_dist_call(tensors, call, extra_args=None):
  function extract_tensors (line 78) | def extract_tensors(maybe_tensor, tensor_list):
  class Reducer (line 89) | class Reducer(object):
    method __init__ (line 111) | def __init__(self, module_or_grads_list):
    method reduce (line 121) | def reduce(self):
  class DistributedDataParallel (line 129) | class DistributedDataParallel(Module):
    method __init__ (line 162) | def __init__(self,
    method __setstate__ (line 256) | def __setstate__(self, state):
    method __getstate__ (line 268) | def __getstate__(self):
    method enable_allreduce (line 275) | def enable_allreduce(self):
    method disable_allreduce (line 278) | def disable_allreduce(self):
    method sync_bucket_structure (line 283) | def sync_bucket_structure(self):
    method create_hooks (line 319) | def create_hooks(self):
    method _stream_this_bucket (line 411) | def _stream_this_bucket(self, bucket_idx):
    method _event_this_bucket (line 418) | def _event_this_bucket(self, bucket_idx):
    method allreduce_bucket (line 425) | def allreduce_bucket(self, bucket, bucket_idx, force_default_stream):
    method allreduce_maybe_retain (line 478) | def allreduce_maybe_retain(self, bucket, bucket_idx, force_default_str...
    method allreduce_fallback (line 491) | def allreduce_fallback(self):
    method comm_ready_buckets (line 513) | def comm_ready_buckets(self, param):
    method forward (line 559) | def forward(self, *inputs, **kwargs):

FILE: KoSimCSE/apex/parallel/multiproc.py
  function docstring_hack (line 5) | def docstring_hack():

FILE: KoSimCSE/apex/parallel/optimized_sync_batchnorm.py
  class SyncBatchNorm (line 9) | class SyncBatchNorm(_BatchNorm):
    method __init__ (line 58) | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, ...
    method _specify_process_group (line 64) | def _specify_process_group(self, process_group):
    method _specify_channel_last (line 67) | def _specify_channel_last(self, channel_last):
    method forward (line 70) | def forward(self, input, z = None):

FILE: KoSimCSE/apex/parallel/optimized_sync_batchnorm_kernel.py
  class SyncBatchnormFunction (line 7) | class SyncBatchnormFunction(Function):
    method forward (line 10) | def forward(ctx, input, z, weight, bias, running_mean, running_varianc...
    method backward (line 75) | def backward(ctx, grad_output):

FILE: KoSimCSE/apex/parallel/sync_batchnorm.py
  class SyncBatchNorm (line 9) | class SyncBatchNorm(_BatchNorm):
    method __init__ (line 51) | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, ...
    method _specify_process_group (line 65) | def _specify_process_group(self, process_group):
    method forward (line 68) | def forward(self, input):

FILE: KoSimCSE/apex/parallel/sync_batchnorm_kernel.py
  class SyncBatchnormFunction (line 7) | class SyncBatchnormFunction(Function):
    method forward (line 10) | def forward(ctx, input, weight, bias, running_mean, running_variance, ...
    method backward (line 33) | def backward(ctx, grad_output):

FILE: KoSimCSE/apex/pyprof/examples/custom_func_module/custom_function.py
  class Foo (line 9) | class Foo(torch.autograd.Function):
    method forward (line 11) | def forward(ctx, in1, in2):
    method backward (line 16) | def backward(ctx, grad):

FILE: KoSimCSE/apex/pyprof/examples/custom_func_module/custom_module.py
  class Foo (line 8) | class Foo(torch.nn.Module):
    method __init__ (line 9) | def __init__(self, size):
    method forward (line 14) | def forward(self, input):

FILE: KoSimCSE/apex/pyprof/examples/imagenet/imagenet.py
  function parseArgs (line 17) | def parseArgs():
  function main (line 89) | def main():

FILE: KoSimCSE/apex/pyprof/examples/jit/jit_script_function.py
  function foo (line 11) | def foo(x, y):

FILE: KoSimCSE/apex/pyprof/examples/jit/jit_script_method.py
  class Foo (line 7) | class Foo(torch.jit.ScriptModule):
    method __init__ (line 8) | def __init__(self, size):
    method forward (line 14) | def forward(self, input):

FILE: KoSimCSE/apex/pyprof/examples/jit/jit_trace_function.py
  function foo (line 7) | def foo(x, y):

FILE: KoSimCSE/apex/pyprof/examples/jit/jit_trace_method.py
  class Foo (line 7) | class Foo(torch.nn.Module):
    method __init__ (line 8) | def __init__(self, size):
    method forward (line 13) | def forward(self, input):

FILE: KoSimCSE/apex/pyprof/examples/lenet.py
  class LeNet5 (line 12) | class LeNet5(nn.Module):
    method __init__ (line 13) | def __init__(self):
    method forward (line 24) | def forward(self, x):
    method num_flat_features (line 35) | def num_flat_features(self, x):

FILE: KoSimCSE/apex/pyprof/examples/user_annotation/resnet.py
  function conv3x3 (line 15) | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
  function conv1x1 (line 20) | def conv1x1(in_planes, out_planes, stride=1):
  class Bottleneck (line 24) | class Bottleneck(nn.Module):
    method __init__ (line 28) | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
    method forward (line 48) | def forward(self, x):
  class ResNet (line 102) | class ResNet(nn.Module):
    method __init__ (line 104) | def __init__(self, block, layers, num_classes=1000,
    method _make_layer (line 134) | def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
    method forward (line 158) | def forward(self, x):
  function resnet50 (line 193) | def resnet50():

FILE: KoSimCSE/apex/pyprof/nvtx/nvmarker.py
  function isfunc (line 27) | def isfunc(mod, f):
  function traceMarker (line 46) | def traceMarker(stack):
  function modMarker (line 56) | def modMarker(mod, fn_name, args):
  function add_wrapper (line 67) | def add_wrapper(mod, fn_name):
  function argMarker (line 110) | def argMarker(mod, op, args, kwargs):
  function patchClass (line 201) | def patchClass(cls):
  function init (line 206) | def init():

FILE: KoSimCSE/apex/pyprof/parse/db.py
  class DB (line 3) | class DB(object):
    method __init__ (line 9) | def __init__(self, dbFile):
    method select (line 21) | def select(self, cmd):
    method insert (line 36) | def insert(self, cmd, data):
    method execute (line 46) | def execute(self, cmd):
    method commit (line 56) | def commit(self):
    method close (line 59) | def close(self):

FILE: KoSimCSE/apex/pyprof/parse/kernel.py
  function demangle (line 5) | def demangle(name):
  function encode_object_id (line 11) | def encode_object_id(pid, tid):
  function getShortName (line 20) | def getShortName(name):
  class Kernel (line 33) | class Kernel(object):
    method __init__ (line 41) | def __init__(self):
    method setKernelInfo (line 77) | def setKernelInfo(self, info):
    method setKernelName (line 93) | def setKernelName(self, name):
    method setRunTimeInfo (line 98) | def setRunTimeInfo(self, info):
    method setMarkerInfo (line 107) | def setMarkerInfo(self, info):
    method setDirection (line 111) | def setDirection(self):
    method setOp (line 123) | def setOp(self):
    method print (line 180) | def print(self):

FILE: KoSimCSE/apex/pyprof/parse/nvvp.py
  class NVVP (line 3) | class NVVP(object):
    method __init__ (line 14) | def __init__(self, db):
    method getProfileStart (line 18) | def getProfileStart(self):
    method getString (line 36) | def getString(self, id_):
    method createMarkerTable (line 45) | def createMarkerTable(self):
    method getCPUInfo (line 65) | def getCPUInfo(self, corrId):
    method getKernelInfo (line 91) | def getKernelInfo(self):
    method getMarkerInfo (line 99) | def getMarkerInfo(self, objId, startTime, endTime):

FILE: KoSimCSE/apex/pyprof/parse/parse.py
  function parseArgs (line 15) | def parseArgs():
  function main (line 25) | def main():

FILE: KoSimCSE/apex/pyprof/prof/activation.py
  class Activation (line 5) | class Activation(OperatorLayerBase):
    method __init__ (line 12) | def __init__(self, d):
    method params (line 35) | def params(self):
    method flops (line 39) | def flops(self):
    method bytes (line 48) | def bytes(self):
    method tc (line 58) | def tc(self):
    method op (line 61) | def op(self):
    method mod (line 64) | def mod(self):

FILE: KoSimCSE/apex/pyprof/prof/base.py
  class OperatorLayerBase (line 3) | class OperatorLayerBase(ABC):
    method tc (line 10) | def tc(self):
    method params (line 18) | def params(self):
    method flops (line 25) | def flops(self):
    method bytes (line 32) | def bytes(self):
    method mod (line 36) | def mod(self):
    method op (line 43) | def op(self):

FILE: KoSimCSE/apex/pyprof/prof/blas.py
  class Addmm (line 8) | class Addmm(OperatorLayerBase):
    method __init__ (line 10) | def __init__(self, d):
    method tc (line 63) | def tc(self):
    method bytes (line 69) | def bytes(self):
    method flops (line 73) | def flops(self):
    method op (line 76) | def op(self):
    method mod (line 79) | def mod(self):
    method params (line 82) | def params(self):
  class Bmm (line 86) | class Bmm(OperatorLayerBase):
    method __init__ (line 88) | def __init__(self, d):
    method tc (line 123) | def tc(self):
    method params (line 129) | def params(self):
    method flops (line 134) | def flops(self):
    method bytes (line 137) | def bytes(self):
    method op (line 141) | def op(self):
    method mod (line 144) | def mod(self):
  class Matmul (line 147) | class Matmul(OperatorLayerBase):
    method __init__ (line 152) | def __init__(self, d):
    method params (line 252) | def params(self):
    method tc (line 255) | def tc(self):
    method bytes (line 264) | def bytes(self):
    method flops (line 272) | def flops(self):
    method op (line 279) | def op(self):
    method mod (line 282) | def mod(self):
  class Mm (line 285) | class Mm(OperatorLayerBase):
    method __init__ (line 287) | def __init__(self, d):
    method params (line 319) | def params(self):
    method tc (line 323) | def tc(self):
    method bytes (line 329) | def bytes(self):
    method flops (line 333) | def flops(self):
    method op (line 336) | def op(self):
    method mod (line 339) | def mod(self):

FILE: KoSimCSE/apex/pyprof/prof/conv.py
  class Conv (line 5) | class Conv(OperatorLayerBase):
    method __init__ (line 26) | def __init__(self, d):
    method params (line 180) | def params(self):
    method conv_bytes_flops (line 184) | def conv_bytes_flops(self, N, C, H, W, K, P, Q, R, S, g, t):
    method bytes_flops (line 190) | def bytes_flops(self):
    method bytes (line 218) | def bytes(self):
    method flops (line 222) | def flops(self):
    method tc (line 226) | def tc(self):
    method op (line 232) | def op(self):
    method mod (line 235) | def mod(self):

FILE: KoSimCSE/apex/pyprof/prof/convert.py
  class Convert (line 5) | class Convert(OperatorLayerBase):
    method __init__ (line 11) | def __init__(self, d):
    method params (line 41) | def params(self):
    method op (line 45) | def op(self):
    method mod (line 48) | def mod(self):
    method tc (line 51) | def tc(self):
    method elems (line 54) | def elems(self):
    method flops (line 57) | def flops(self):
    method bytes (line 60) | def bytes(self):

FILE: KoSimCSE/apex/pyprof/prof/data.py
  class Data (line 3) | class Data(object):
    method __init__ (line 7) | def __init__(self, kernel):
    method setParams (line 41) | def setParams(self, params):

FILE: KoSimCSE/apex/pyprof/prof/dropout.py
  class Dropout (line 5) | class Dropout(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method params (line 28) | def params(self):
    method op (line 32) | def op(self):
    method mod (line 35) | def mod(self):
    method tc (line 38) | def tc(self):
    method elems (line 41) | def elems(self):
    method bytes (line 44) | def bytes(self):
    method flops (line 48) | def flops(self):

FILE: KoSimCSE/apex/pyprof/prof/embedding.py
  class Embedding (line 5) | class Embedding(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method params (line 33) | def params(self):
    method op (line 37) | def op(self):
    method mod (line 40) | def mod(self):
    method tc (line 43) | def tc(self):
    method bytes (line 46) | def bytes(self):
    method flops (line 69) | def flops(self):

FILE: KoSimCSE/apex/pyprof/prof/index_slice_join_mutate.py
  class Cat (line 6) | class Cat(OperatorLayerBase):
    method __init__ (line 8) | def __init__(self, d):
    method params (line 34) | def params(self):
    method flops (line 38) | def flops(self):
    method tc (line 41) | def tc(self):
    method op (line 44) | def op(self):
    method mod (line 47) | def mod(self):
    method bytes (line 50) | def bytes(self):
  class Reshape (line 56) | class Reshape(OperatorLayerBase):
    method __init__ (line 58) | def __init__(self, d):
    method params (line 82) | def params(self):
    method flops (line 86) | def flops(self):
    method tc (line 89) | def tc(self):
    method op (line 92) | def op(self):
    method mod (line 95) | def mod(self):
    method bytes (line 98) | def bytes(self):
  class Gather (line 101) | class Gather(OperatorLayerBase):
    method __init__ (line 103) | def __init__(self, d):
    method params (line 132) | def params(self):
    method flops (line 136) | def flops(self):
    method tc (line 139) | def tc(self):
    method op (line 142) | def op(self):
    method mod (line 145) | def mod(self):
    method bytes (line 148) | def bytes(self):
  class MaskedScatter (line 151) | class MaskedScatter(OperatorLayerBase):
    method __init__ (line 153) | def __init__(self, d):
    method params (line 178) | def params(self):
    method flops (line 182) | def flops(self):
    method tc (line 185) | def tc(self):
    method op (line 188) | def op(self):
    method mod (line 191) | def mod(self):
    method bytes (line 194) | def bytes(self):
  class Nonzero (line 207) | class Nonzero(OperatorLayerBase):
    method __init__ (line 209) | def __init__(self, d):
    method params (line 229) | def params(self):
    method flops (line 233) | def flops(self):
    method tc (line 236) | def tc(self):
    method op (line 239) | def op(self):
    method mod (line 242) | def mod(self):
    method bytes (line 245) | def bytes(self):
  class IndexSelect (line 260) | class IndexSelect(OperatorLayerBase):
    method __init__ (line 262) | def __init__(self, d):
    method params (line 311) | def params(self):
    method tc (line 315) | def tc(self):
    method op (line 318) | def op(self):
    method mod (line 321) | def mod(self):
    method flops (line 324) | def flops(self):
    method bytes (line 327) | def bytes(self):
  class MaskedSelect (line 343) | class MaskedSelect(OperatorLayerBase):
    method __init__ (line 345) | def __init__(self, d):
    method params (line 393) | def params(self):
    method tc (line 397) | def tc(self):
    method op (line 400) | def op(self):
    method mod (line 403) | def mod(self):
    method bytes (line 406) | def bytes(self):
    method flops (line 418) | def flops(self):

FILE: KoSimCSE/apex/pyprof/prof/linear.py
  class Linear (line 5) | class Linear(OperatorLayerBase):
    method setXWBMNK (line 17) | def setXWBMNK(self, args):
    method tc (line 63) | def tc(self):
    method __init__ (line 69) | def __init__(self, d):
    method params (line 118) | def params(self):
    method op (line 145) | def op(self):
    method bytesFlops (line 148) | def bytesFlops(self):
    method bytes (line 179) | def bytes(self):
    method flops (line 183) | def flops(self):
    method mod (line 187) | def mod(self):

FILE: KoSimCSE/apex/pyprof/prof/loss.py
  class MSELoss (line 7) | class MSELoss(OperatorLayerBase):
    method __init__ (line 9) | def __init__(self, d):
    method params (line 51) | def params(self):
    method elems (line 55) | def elems(self):
    method bytes (line 71) | def bytes(self):
    method flops (line 74) | def flops(self):
    method tc (line 77) | def tc(self):
    method op (line 80) | def op(self):
    method mod (line 83) | def mod(self):

FILE: KoSimCSE/apex/pyprof/prof/misc.py
  class Foo (line 5) | class Foo(OperatorLayerBase):
    method __init__ (line 9) | def __init__(self, d):
    method params (line 31) | def params(self):
    method tc (line 35) | def tc(self):
    method op (line 38) | def op(self):
    method mod (line 41) | def mod(self):
    method flops (line 44) | def flops(self):
    method bytes (line 47) | def bytes(self):
  class Copy (line 50) | class Copy(OperatorLayerBase):
    method __init__ (line 52) | def __init__(self, d):
    method params (line 75) | def params(self):
    method tc (line 80) | def tc(self):
    method op (line 83) | def op(self):
    method mod (line 86) | def mod(self):
    method flops (line 89) | def flops(self):
    method elems (line 92) | def elems(self):
    method bytes (line 95) | def bytes(self):
  class Clone (line 98) | class Clone(OperatorLayerBase):
    method __init__ (line 100) | def __init__(self, d):
    method params (line 118) | def params(self):
    method flops (line 122) | def flops(self):
    method tc (line 125) | def tc(self):
    method op (line 128) | def op(self):
    method mod (line 131) | def mod(self):
    method elems (line 134) | def elems(self):
    method bytes (line 137) | def bytes(self):
  class Contiguous (line 140) | class Contiguous(OperatorLayerBase):
    method __init__ (line 142) | def __init__(self, d):
    method params (line 160) | def params(self):
    method flops (line 164) | def flops(self):
    method bytes (line 167) | def bytes(self):
    method tc (line 170) | def tc(self):
    method op (line 173) | def op(self):
    method mod (line 176) | def mod(self):
  class Any (line 179) | class Any(OperatorLayerBase):
    method __init__ (line 181) | def __init__(self, d):
    method params (line 202) | def params(self):
    method op (line 206) | def op(self):
    method mod (line 209) | def mod(self):
    method tc (line 212) | def tc(self):
    method flops (line 215) | def flops(self):
    method bytes (line 218) | def bytes(self):

FILE: KoSimCSE/apex/pyprof/prof/normalization.py
  class BatchNorm (line 5) | class BatchNorm(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method params (line 27) | def params(self):
    method tc (line 31) | def tc(self):
    method op (line 34) | def op(self):
    method mod (line 37) | def mod(self):
    method elems (line 40) | def elems(self):
    method flops (line 43) | def flops(self):
    method bytes (line 47) | def bytes(self):

FILE: KoSimCSE/apex/pyprof/prof/optim.py
  class Adam (line 7) | class Adam(OperatorLayerBase):
    method __init__ (line 9) | def __init__(self, d):
    method params (line 31) | def params(self):
    method flops (line 35) | def flops(self):
    method bytes (line 38) | def bytes(self):
    method tc (line 58) | def tc(self):
    method op (line 61) | def op(self):
    method mod (line 64) | def mod(self):

FILE: KoSimCSE/apex/pyprof/prof/output.py
  class Output (line 3) | class Output():
    method __init__ (line 33) | def __init__(self, args):
    method foo (line 77) | def foo(self, cadena, pformat):
    method header (line 99) | def header(self):
    method data (line 107) | def data(self, a):

FILE: KoSimCSE/apex/pyprof/prof/pointwise.py
  class Pointwise (line 6) | class Pointwise(OperatorLayerBase):
    method foo (line 26) | def foo(d):
    method __init__ (line 29) | def __init__(self, d):
    method params (line 84) | def params(self):
    method tc (line 88) | def tc(self):
    method op (line 91) | def op(self):
    method mod (line 94) | def mod(self):
    method elems (line 97) | def elems(self):
    method bytes (line 138) | def bytes(self):
    method flops (line 141) | def flops(self):

FILE: KoSimCSE/apex/pyprof/prof/pooling.py
  class MaxPool2d (line 7) | class MaxPool2d(object):
    method parse (line 9) | def parse(marker):

FILE: KoSimCSE/apex/pyprof/prof/prof.py
  function findFpropKernel (line 39) | def findFpropKernel(seq):
  function foo (line 56) | def foo(mod, op, d):
  function main (line 171) | def main():

FILE: KoSimCSE/apex/pyprof/prof/randomSample.py
  class RandPerm (line 5) | class RandPerm(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method params (line 25) | def params(self):
    method tc (line 29) | def tc(self):
    method op (line 32) | def op(self):
    method mod (line 35) | def mod(self):
    method bytes (line 38) | def bytes(self):
    method flops (line 41) | def flops(self):

FILE: KoSimCSE/apex/pyprof/prof/recurrentCell.py
  function hasTileSize (line 5) | def hasTileSize(name):
  function ctaTile (line 11) | def ctaTile(name):
  class RNNCell (line 21) | class RNNCell(OperatorLayerBase):
    method __init__ (line 26) | def __init__(self, d):
    method params (line 73) | def params(self):
    method tc (line 83) | def tc(self):
    method op (line 89) | def op(self):
    method mod (line 92) | def mod(self):
    method bytes (line 95) | def bytes(self):
    method flops (line 105) | def flops(self):
    method bar (line 115) | def bar(self):

FILE: KoSimCSE/apex/pyprof/prof/reduction.py
  class Mean (line 5) | class Mean(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method params (line 32) | def params(self):
    method tc (line 36) | def tc(self):
    method op (line 39) | def op(self):
    method mod (line 42) | def mod(self):
    method elems (line 45) | def elems(self):
    method bytes (line 48) | def bytes(self):
    method flops (line 54) | def flops(self):
  class Sum (line 60) | class Sum(OperatorLayerBase):
    method __init__ (line 62) | def __init__(self, d):
    method params (line 86) | def params(self):
    method tc (line 90) | def tc(self):
    method op (line 93) | def op(self):
    method mod (line 96) | def mod(self):
    method elems (line 99) | def elems(self):
    method flops (line 102) | def flops(self):
    method bytes (line 106) | def bytes(self):
  class Norm (line 109) | class Norm(OperatorLayerBase):
    method __init__ (line 111) | def __init__(self, d):
    method params (line 129) | def params(self):
    method elems (line 133) | def elems(self):
    method bytes (line 136) | def bytes(self):
    method flops (line 139) | def flops(self):
    method tc (line 143) | def tc(self):
    method op (line 146) | def op(self):
    method mod (line 149) | def mod(self):

FILE: KoSimCSE/apex/pyprof/prof/softmax.py
  class Softmax (line 5) | class Softmax(OperatorLayerBase):
    method __init__ (line 7) | def __init__(self, d):
    method op (line 31) | def op(self):
    method mod (line 34) | def mod(self):
    method tc (line 37) | def tc(self):
    method params (line 40) | def params(self):
    method elems (line 44) | def elems(self):
    method flops (line 47) | def flops(self):
    method bytes (line 52) | def bytes(self):
  class LogSoftmax (line 57) | class LogSoftmax(OperatorLayerBase):
    method __init__ (line 59) | def __init__(self, d):
    method op (line 91) | def op(self):
    method mod (line 94) | def mod(self):
    method tc (line 97) | def tc(self):
    method params (line 100) | def params(self):
    method elems (line 104) | def elems(self):
    method flops (line 107) | def flops(self):
    method bytes (line 112) | def bytes(self):

FILE: KoSimCSE/apex/pyprof/prof/usage.py
  function parseArgs (line 4) | def parseArgs():

FILE: KoSimCSE/apex/pyprof/prof/utility.py
  class Utility (line 3) | class Utility(object):
    method numElems (line 6) | def numElems(shape):
    method typeToBytes (line 11) | def typeToBytes(t):
    method typeToString (line 23) | def typeToString(t):
    method hasNVTX (line 45) | def hasNVTX(marker):
    method isscalar (line 59) | def isscalar(t):

FILE: KoSimCSE/apex/reparameterization/__init__.py
  function apply_weight_norm (line 4) | def apply_weight_norm(module, name='', dim=0, hook_child=True):
  function remove_weight_norm (line 50) | def remove_weight_norm(module, name='', remove_all=False):
  function apply_reparameterization (line 64) | def apply_reparameterization(module, reparameterization=None, name='', d...
  function remove_reparameterization (line 96) | def remove_reparameterization(module, reparameterization=Reparameterizat...

FILE: KoSimCSE/apex/reparameterization/reparameterization.py
  class Reparameterization (line 4) | class Reparameterization(object):
    method __init__ (line 19) | def __init__(self, name, dim, module, retain_forward=True):
    method compute_weight (line 28) | def compute_weight(self, module=None, name=None):
    method reparameterize (line 40) | def reparameterize(self, name, weight, dim):
    method apply (line 57) | def apply(module, name, dim, reparameterization=None, hook_child=True):
    method get_module_and_name (line 105) | def get_module_and_name(module, name):
    method get_params (line 123) | def get_params(self, module):
    method remove (line 127) | def remove(self, module):
    method __call__ (line 139) | def __call__(self, module, inputs):
    method backward_hook (line 147) | def backward_hook(self, module, grad_input, grad_output):

FILE: KoSimCSE/apex/reparameterization/weight_norm.py
  function _norm (line 8) | def _norm(p, dim):
  class WeightNorm (line 22) | class WeightNorm(Reparameterization):
    method compute_weight (line 39) | def compute_weight(self, module=None, name=None):
    method reparameterize (line 62) | def reparameterize(self, name, weight, dim):

FILE: KoSimCSE/data/dataloader.py
  class ModelDataLoader (line 10) | class ModelDataLoader(Dataset):
    method __init__ (line 11) | def __init__(self, file_path, args, metric, tokenizer, type_):
    method load_data (line 43) | def load_data(self, type):
    method data2tensor (line 56) | def data2tensor(self, line, type):
    method __getitem__ (line 104) | def __getitem__(self, index):
    method __len__ (line 146) | def __len__(self):
  function get_loader (line 154) | def get_loader(args, metric):
  function convert_to_tensor (line 190) | def convert_to_tensor(corpus, tokenizer, device):
  function example_model_setting (line 208) | def example_model_setting(model_ckpt, model_name):

FILE: KoSimCSE/main.py
  function main (line 5) | def main(args, logger) -> None:

FILE: KoSimCSE/model/loss.py
  class Loss (line 12) | class Loss():
    method __init__ (line 14) | def __init__(self, args):
    method train_loss_fct (line 19) | def train_loss_fct(self, config, inputs, a, p, n):
    method evaluation_during_training (line 31) | def evaluation_during_training(self, embeddings1, embeddings2, labels,...

FILE: KoSimCSE/model/setting.py
  class Arguments (line 8) | class Arguments():
    method __init__ (line 10) | def __init__(self):
    method add_type_of_processing (line 13) | def add_type_of_processing(self):
    method add_hyper_parameters (line 20) | def add_hyper_parameters(self):
    method add_data_parameters (line 34) | def add_data_parameters(self):
    method print_args (line 44) | def print_args(self, args):
    method add_argument (line 50) | def add_argument(self, *args, **kw_args):
    method parse (line 53) | def parse(self):
  class Setting (line 60) | class Setting():
    method set_logger (line 62) | def set_logger(self):
    method set_seed (line 76) | def set_seed(self, args):
    method run (line 90) | def run(self):

FILE: KoSimCSE/model/simcse/bert.py
  class BERT (line 5) | class BERT(nn.Module):
    method __init__ (line 6) | def __init__(self, bert):
    method forward (line 10) | def forward(self, config, inputs, mode):
    method encode (line 45) | def encode(self, inputs, device):

FILE: KoSimCSE/model/simcse/processor.py
  class Processor (line 18) | class Processor():
    method __init__ (line 20) | def __init__(self, args):
    method run (line 32) | def run(self, inputs, indicator=None, type=None):
    method progress (line 50) | def progress(self, loss):
    method progress_validation (line 54) | def progress_validation(self, score):
    method return_value (line 58) | def return_value(self):
    method get_object (line 64) | def get_object(self, tokenizer, model):
    method get_scheduler (line 79) | def get_scheduler(self, optim, train_loader):
    method model_setting (line 87) | def model_setting(self):
    method train (line 116) | def train(self, epoch):
    method valid (line 144) | def valid(self):
    method test (line 168) | def test(self):

FILE: KoSimCSE/model/utils.py
  class Metric (line 10) | class Metric():
    method __init__ (line 12) | def __init__(self, args):
    method get_lr (line 15) | def get_lr(self, optimizer):
    method count_parameters (line 18) | def count_parameters(self, model):
    method cal_acc (line 21) | def cal_acc(self, yhat, y):
    method cal_time (line 28) | def cal_time(self, start_time, end_time):
    method cal_dev_score (line 35) | def cal_dev_score(self, score, indicator):
    method update_indicator (line 51) | def update_indicator(self, indicator, score):
    method draw_graph (line 70) | def draw_graph(self, cp):
    method performance_check (line 74) | def performance_check(self, cp, config):
    method print_size_of_model (line 80) | def print_size_of_model(self, model):
    method move2device (line 85) | def move2device(self, sample, device):
    method save_model (line 106) | def save_model(self, config, cp, pco):
  function pytorch_cos_sim (line 123) | def pytorch_cos_sim(a, b):