SYMBOL INDEX (1025 symbols across 75 files) FILE: RWKV-v1/src/model.py function RWKV_Init (line 16) | def RWKV_Init(module, config): # fancy initialization of all lin & emb l... class RWKV_TimeMix (line 56) | class RWKV_TimeMix(nn.Module): method __init__ (line 57) | def __init__(self, config, layer_id): method forward (line 96) | def forward(self, x): class RWKV_ChannelMix (line 129) | class RWKV_ChannelMix(nn.Module): method __init__ (line 130) | def __init__(self, config, layer_id): method forward (line 144) | def forward(self, x): class RWKV_TinyAttn (line 158) | class RWKV_TinyAttn(nn.Module): # extra tiny attention method __init__ (line 159) | def __init__(self, config): method forward (line 168) | def forward(self, x, mask): class RotaryEmbedding (line 192) | class RotaryEmbedding(torch.nn.Module): method __init__ (line 193) | def __init__(self, dim, base=10000): method forward (line 201) | def forward(self, x, seq_len=None): function rotate_half (line 211) | def rotate_half(x): function apply_rotary_pos_emb (line 216) | def apply_rotary_pos_emb(q, k, cos, sin): class MHA_rotary (line 220) | class MHA_rotary(nn.Module): method __init__ (line 221) | def __init__(self, config, layer_id, time_shift = False): method forward (line 243) | def forward(self, x): class GeGLU (line 270) | class GeGLU(torch.nn.Module): method __init__ (line 271) | def __init__(self, config, layer_id, time_shift = False): method forward (line 283) | def forward(self, x): class MHA_pro (line 297) | class MHA_pro(nn.Module): method __init__ (line 298) | def __init__(self, config, layer_id): method forward (line 324) | def forward(self, x): class RMSNorm (line 361) | class RMSNorm(nn.Module): method __init__ (line 362) | def __init__(self, d): method forward (line 367) | def forward(self, x): class FixedNorm (line 372) | class FixedNorm(nn.Module): method __init__ (line 373) | def __init__(self, d): method forward (line 377) | def forward(self, x): class GPTConfig (line 384) | class GPTConfig: method __init__ (line 385) | def __init__(self, vocab_size, ctx_len, **kwargs): class Block (line 391) | class Block(nn.Module): method __init__ (line 392) | def __init__(self, config, layer_id): method forward (line 417) | def forward(self, x): class GPT (line 424) | class GPT(nn.Module): method __init__ (line 425) | def __init__(self, config): method get_ctx_len (line 452) | def get_ctx_len(self): method _init_weights (line 455) | def _init_weights(self, module): method configure_optimizers (line 461) | def configure_optimizers(self, train_config): method forward (line 494) | def forward(self, idx, targets=None): FILE: RWKV-v1/src/trainer.py class TrainerConfig (line 14) | class TrainerConfig: method __init__ (line 29) | def __init__(self, **kwargs): class Trainer (line 33) | class Trainer: method __init__ (line 35) | def __init__(self, model, train_dataset, test_dataset, config): method get_run_name (line 54) | def get_run_name(self): method train (line 60) | def train(self): FILE: RWKV-v1/src/utils.py function top_k_logits (line 7) | def top_k_logits(logits, k): function top_p_probs (line 13) | def top_p_probs(probs, p): function sample_logits (line 27) | def sample_logits(logits, pos, temperature=1.0, top_k=None, top_p=None, ... function set_seed (line 46) | def set_seed(seed): FILE: RWKV-v1/train.py class Dataset (line 79) | class Dataset(Dataset): method __init__ (line 80) | def __init__(self, data, model_level, ctx_len): method __len__ (line 110) | def __len__(self): method __getitem__ (line 113) | def __getitem__(self, idx): FILE: RWKV-v2-RNN/cuda/timex_op.cpp function forward (line 6) | void forward(torch::Tensor &w, const torch::Tensor &k, torch::Tensor &x,... function backward (line 9) | void backward(torch::Tensor &w, const torch::Tensor &k, const torch::Ten... function PYBIND11_MODULE (line 13) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { function TORCH_LIBRARY (line 18) | TORCH_LIBRARY(timex, m) { FILE: RWKV-v2-RNN/src/model.py class TimeX (line 26) | class TimeX(torch.autograd.Function): method forward (line 28) | def forward(ctx, w, k, B, C, T, eps): method backward (line 42) | def backward(ctx, gwk): function RWKV_Init (line 63) | def RWKV_Init(module, config): # fancy initialization of all lin & emb ... class RWKV_TimeMix (line 109) | class RWKV_TimeMix(nn.Module): method __init__ (line 110) | def __init__(self, config, layer_id): method forward (line 162) | def forward(self, x): class RWKV_ChannelMix (line 189) | class RWKV_ChannelMix(nn.Module): method __init__ (line 190) | def __init__(self, config, layer_id): method forward (line 210) | def forward(self, x): class GPTConfig (line 225) | class GPTConfig: method __init__ (line 226) | def __init__(self, vocab_size, ctx_len, **kwargs): class Block (line 233) | class Block(nn.Module): method __init__ (line 234) | def __init__(self, config, layer_id): method forward (line 249) | def forward(self, x): class GPT (line 260) | class GPT(nn.Module): method __init__ (line 261) | def __init__(self, config): method get_ctx_len (line 288) | def get_ctx_len(self): method _init_weights (line 291) | def _init_weights(self, module): method configure_optimizers (line 299) | def configure_optimizers(self, train_config): method forward (line 327) | def forward(self, idx, targets=None): FILE: RWKV-v2-RNN/src/model_run.py class RWKV_RNN (line 13) | class RWKV_RNN(): method __init__ (line 14) | def __init__(self, MODEL_NAME, RUN_DEVICE, model_type, n_layer, n_embd... method clear (line 55) | def clear(self): method save (line 61) | def save(self, target): method load (line 67) | def load(self, target): method LN (line 73) | def LN(self, xx, w): method FF (line 76) | def FF(self, xx, w, name): method SA (line 88) | def SA(self, xx, w, name): method run (line 111) | def run(self, ctx): FILE: RWKV-v2-RNN/src/trainer.py class TrainerConfig (line 30) | class TrainerConfig: method __init__ (line 44) | def __init__(self, **kwargs): class Trainer (line 49) | class Trainer: method __init__ (line 51) | def __init__(self, model, train_dataset, test_dataset, config): method get_run_name (line 70) | def get_run_name(self): method train (line 78) | def train(self): FILE: RWKV-v2-RNN/src/utils.py class Dataset (line 16) | class Dataset(Dataset): method __init__ (line 17) | def __init__(self, data, ctx_len, epoch_length_fixed): method __len__ (line 42) | def __len__(self): method __getitem__ (line 45) | def __getitem__(self, idx): class TOKENIZER (line 57) | class TOKENIZER(): method __init__ (line 58) | def __init__(self, WORD_NAME, UNKNOWN_CHAR='\ue083'): method refine_context (line 69) | def refine_context(self, context): method sample_logits (line 80) | def sample_logits(self, out, x, ctx_len, temperature=1.0, top_p_usual=... function to_float (line 114) | def to_float(x): function set_seed (line 118) | def set_seed(seed): FILE: RWKV-v3/cuda/timex_op.cpp function forward (line 6) | void forward(torch::Tensor &w, const torch::Tensor &k, torch::Tensor &x,... function backward (line 9) | void backward(torch::Tensor &w, const torch::Tensor &k, const torch::Ten... function PYBIND11_MODULE (line 13) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { function TORCH_LIBRARY (line 18) | TORCH_LIBRARY(timex, m) { FILE: RWKV-v3/src/model.py class TimeX (line 31) | class TimeX(torch.autograd.Function): method forward (line 33) | def forward(ctx, w, k, B, C, T, eps): method backward (line 47) | def backward(ctx, gwk): function RWKV_Init (line 62) | def RWKV_Init(module, config): # fancy initialization of all lin & emb ... class RWKV_TimeMix (line 108) | class RWKV_TimeMix(nn.Module): method __init__ (line 109) | def __init__(self, config, layer_id): method forward (line 156) | def forward(self, x): class RWKV_ChannelMix (line 190) | class RWKV_ChannelMix(nn.Module): method __init__ (line 191) | def __init__(self, config, layer_id): method forward (line 215) | def forward(self, x): class GPTConfig (line 232) | class GPTConfig: method __init__ (line 233) | def __init__(self, vocab_size, ctx_len, **kwargs): class Block (line 240) | class Block(nn.Module): method __init__ (line 241) | def __init__(self, config, layer_id): method forward (line 259) | def forward(self, x): class GPT (line 270) | class GPT(nn.Module): method __init__ (line 271) | def __init__(self, config): method get_ctx_len (line 299) | def get_ctx_len(self): method _init_weights (line 302) | def _init_weights(self, module): method configure_optimizers (line 310) | def configure_optimizers(self, train_config): method forward (line 338) | def forward(self, idx, targets=None): FILE: RWKV-v3/src/model_run.py class RWKV_ChannelMix (line 23) | class RWKV_ChannelMix(nn.Module): method __init__ (line 24) | def __init__(self, layer_id): method forward (line 37) | def forward(self, x): class RWKV_TimeMix (line 49) | class RWKV_TimeMix(nn.Module): method __init__ (line 50) | def __init__(self, layer_id): method forward (line 68) | def forward(self, x): class Block (line 97) | class Block(nn.Module): method __init__ (line 98) | def __init__(self, layer_id): method forward (line 114) | def forward(self, x): class RWKV_GPT (line 124) | class RWKV_GPT(nn.Module): method __init__ (line 125) | def __init__(self, MODEL_NAME, RUN_DEVICE, model_type, vocab_size, n_l... method forward (line 158) | def forward(self, idx): class RWKV_RNN (line 181) | class RWKV_RNN(): method __init__ (line 182) | def __init__(self, MODEL_NAME, RUN_DEVICE, model_type, n_layer, n_embd... method clear (line 223) | def clear(self): method save (line 229) | def save(self, target): method load (line 235) | def load(self, target): method LN (line 241) | def LN(self, xx, w): method FF (line 244) | def FF(self, xx, w, name): method SA (line 257) | def SA(self, xx, w, name): method run (line 283) | def run(self, ctx): FILE: RWKV-v3/src/trainer.py class TrainerConfig (line 30) | class TrainerConfig: method __init__ (line 44) | def __init__(self, **kwargs): class Trainer (line 49) | class Trainer: method __init__ (line 51) | def __init__(self, model, train_dataset, test_dataset, config): method get_run_name (line 70) | def get_run_name(self): method train (line 78) | def train(self): FILE: RWKV-v3/src/utils.py class Dataset (line 16) | class Dataset(Dataset): method __init__ (line 17) | def __init__(self, data, ctx_len, epoch_length_fixed): method __len__ (line 42) | def __len__(self): method __getitem__ (line 45) | def __getitem__(self, idx): class TOKENIZER (line 57) | class TOKENIZER(): method __init__ (line 58) | def __init__(self, WORD_NAME, UNKNOWN_CHAR='\ue083'): method refine_context (line 69) | def refine_context(self, context): method sample_logits (line 80) | def sample_logits(self, out, x, ctx_len, temperature=1.0, top_p_usual=... function to_float (line 114) | def to_float(x): function set_seed (line 118) | def set_seed(seed): FILE: RWKV-v4/cuda/wkv_op.cpp function forward (line 6) | void forward(int64_t B, int64_t T, int64_t C, torch::Tensor &w, torch::T... function backward (line 9) | void backward(int64_t B, int64_t T, int64_t C, torch::Tensor &w, torch::... function PYBIND11_MODULE (line 13) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { function TORCH_LIBRARY (line 18) | TORCH_LIBRARY(wkv, m) { FILE: RWKV-v4/src/binidx.py function print_rank_0 (line 10) | def print_rank_0(*message): function _warmup_mmap_file (line 18) | def _warmup_mmap_file(path): function code (line 35) | def code(dtype): function index_file_path (line 41) | def index_file_path(prefix_path): function data_file_path (line 44) | def data_file_path(prefix_path): class MMapIndexedDataset (line 47) | class MMapIndexedDataset(torch.utils.data.Dataset): class Index (line 48) | class Index(object): method __init__ (line 51) | def __init__(self, path, skip_warmup=False): method __del__ (line 96) | def __del__(self): method dtype (line 101) | def dtype(self): method sizes (line 105) | def sizes(self): method doc_idx (line 109) | def doc_idx(self): method __getitem__ (line 113) | def __getitem__(self, i): method __len__ (line 116) | def __len__(self): method __init__ (line 119) | def __init__(self, path, skip_warmup=False): method __getstate__ (line 128) | def __getstate__(self): method __setstate__ (line 131) | def __setstate__(self, state): method _do_init (line 134) | def _do_init(self, path, skip_warmup): method __del__ (line 148) | def __del__(self): method __len__ (line 153) | def __len__(self): method __getitem__ (line 157) | def __getitem__(self, idx): method get (line 179) | def get(self, idx, offset=0, length=None): method sizes (line 195) | def sizes(self): method doc_idx (line 199) | def doc_idx(self): method get_doc_idx (line 202) | def get_doc_idx(self): method set_doc_idx (line 205) | def set_doc_idx(self, doc_idx_): method supports_prefetch (line 209) | def supports_prefetch(self): method exists (line 213) | def exists(path): FILE: RWKV-v4/src/model.py class L2Wrap (line 21) | class L2Wrap(torch.autograd.Function): method forward (line 23) | def forward(ctx, loss, y): method backward (line 27) | def backward(ctx, grad_output): class WKV (line 47) | class WKV(torch.autograd.Function): method forward (line 49) | def forward(ctx, B, T, C, w, u, k, v): method backward (line 76) | def backward(ctx, gy): function RUN_CUDA (line 100) | def RUN_CUDA(B, T, C, w, u, k, v): function RWKV_Init (line 107) | def RWKV_Init(model, args): # fancy initialization of all lin & emb lay... class RWKV_TimeMix (line 164) | class RWKV_TimeMix(torch.jit.ScriptModule): method __init__ (line 165) | def __init__(self, config, layer_id): method jit_func (line 209) | def jit_func(self, x): method forward (line 225) | def forward(self, x): class RWKV_ChannelMix (line 235) | class RWKV_ChannelMix(torch.jit.ScriptModule): method __init__ (line 236) | def __init__(self, config, layer_id): method forward (line 261) | def forward(self, x): class GPTConfig (line 278) | class GPTConfig: method __init__ (line 279) | def __init__(self, vocab_size, ctx_len, **kwargs): class Block (line 286) | class Block(nn.Module): method __init__ (line 287) | def __init__(self, config, layer_id): method forward (line 305) | def forward(self, x): class GPT (line 316) | class GPT(nn.Module): method __init__ (line 317) | def __init__(self, config): method get_ctx_len (line 349) | def get_ctx_len(self): method _init_weights (line 352) | def _init_weights(self, module): method configure_optimizers (line 360) | def configure_optimizers(self, train_config): method forward (line 382) | def forward(self, idx, targets=None): FILE: RWKV-v4/src/model_run.py class WKV (line 29) | class WKV(torch.autograd.Function): method forward (line 31) | def forward(ctx, B, T, C, w, u, k, v): method backward (line 58) | def backward(ctx, gy): function RUN_CUDA (line 82) | def RUN_CUDA(B, T, C, w, u, k, v): class RWKV_ChannelMix (line 89) | class RWKV_ChannelMix(nn.Module): method __init__ (line 90) | def __init__(self, layer_id): method forward (line 103) | def forward(self, x): class RWKV_TimeMix (line 115) | class RWKV_TimeMix(nn.Module): method __init__ (line 116) | def __init__(self, layer_id): method forward (line 133) | def forward(self, x): class Block (line 150) | class Block(nn.Module): method __init__ (line 151) | def __init__(self, layer_id): method forward (line 167) | def forward(self, x): class RWKV_GPT (line 177) | class RWKV_GPT(nn.Module): method __init__ (line 178) | def __init__(self, MODEL_NAME, RUN_DEVICE, model_type, vocab_size, n_l... method forward (line 211) | def forward(self, idx): class RWKV_RNN (line 240) | class RWKV_RNN(): # this is running in FP32 at this moment method __init__ (line 241) | def __init__(self, MODEL_NAME, RUN_DEVICE, model_type, n_layer, n_embd... method clear (line 281) | def clear(self): method save (line 288) | def save(self, target): method load (line 295) | def load(self, target): method LN (line 302) | def LN(self, xx, w): method FF (line 305) | def FF(self, xx, w, name): method SA (line 318) | def SA(self, xx, w, name): method run (line 356) | def run(self, ctx): FILE: RWKV-v4/src/trainer.py class TrainerConfig (line 28) | class TrainerConfig: method __init__ (line 40) | def __init__(self, **kwargs): class Trainer (line 46) | class Trainer(LightningLite): method get_run_name (line 48) | def get_run_name(self): method run (line 56) | def run(self, m_cfg, train_dataset, test_dataset, config): FILE: RWKV-v4/src/utils.py class Dataset (line 18) | class Dataset(Dataset): method __init__ (line 19) | def __init__(self, data, ctx_len, epoch_length_fixed): method __len__ (line 55) | def __len__(self): method __getitem__ (line 58) | def __getitem__(self, idx): class TOKENIZER (line 75) | class TOKENIZER(): method __init__ (line 76) | def __init__(self, WORD_NAME, UNKNOWN_CHAR='\ue083'): method refine_context (line 98) | def refine_context(self, context): method sample_logits (line 108) | def sample_logits(self, out, x, ctx_len, temperature=1.0, top_p_usual=... function to_float (line 145) | def to_float(x): function set_seed (line 149) | def set_seed(seed): FILE: RWKV-v4neo/chat.py function run_rnn (line 145) | def run_rnn(tokens, newline_adj = 0): function save_all_stat (line 163) | def save_all_stat(srv, name, last_out): function load_all_stat (line 170) | def load_all_stat(srv, name): function reply_msg (line 194) | def reply_msg(msg): function on_message (line 197) | def on_message(message): FILE: RWKV-v4neo/cuda/wkv5_op.cpp function forward (line 8) | void forward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor &... function backward (line 11) | void backward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor ... function PYBIND11_MODULE (line 14) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { function TORCH_LIBRARY (line 19) | TORCH_LIBRARY(wkv5, m) { FILE: RWKV-v4neo/cuda/wkv_op.cpp function forward (line 6) | void forward(int64_t B, int64_t T, int64_t C, torch::Tensor &w, torch::T... function backward (line 9) | void backward(int64_t B, int64_t T, int64_t C, torch::Tensor &w, torch::... function PYBIND11_MODULE (line 13) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { function TORCH_LIBRARY (line 18) | TORCH_LIBRARY(wkv, m) { FILE: RWKV-v4neo/cuda/wkv_op_bf16.cpp function forward (line 8) | void forward(int64_t B, int64_t T, int64_t C, torch::Tensor &w, torch::T... function backward (line 11) | void backward(int64_t B, int64_t T, int64_t C, torch::Tensor &w, torch::... function PYBIND11_MODULE (line 17) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { function TORCH_LIBRARY (line 22) | TORCH_LIBRARY(wkv, m) { FILE: RWKV-v4neo/img_demoAE.py class ToBinary (line 22) | class ToBinary(torch.autograd.Function): method forward (line 24) | def forward(ctx, x): method backward (line 28) | def backward(ctx, grad_output): class R_ENCODER (line 31) | class R_ENCODER(nn.Module): method __init__ (line 32) | def __init__(self, args): method forward (line 62) | def forward(self, img): class R_DECODER (line 84) | class R_DECODER(nn.Module): method __init__ (line 85) | def __init__(self, args): method forward (line 113) | def forward(self, code): FILE: RWKV-v4neo/math_demo/run.py class TOKENIZER (line 23) | class TOKENIZER(): method __init__ (line 24) | def __init__(self): method encode (line 30) | def encode(self, x): method decode (line 33) | def decode(self, x): class RWKV_RNN (line 40) | class RWKV_RNN(torch.jit.ScriptModule): method __init__ (line 41) | def __init__(self, args): method layer_norm (line 68) | def layer_norm(self, x, w): method channel_mixing (line 72) | def channel_mixing(self, x, state, i:int, time_mix_k, time_mix_r, kw, ... method time_mixing (line 81) | def time_mixing(self, x, state, i:int, time_mix_k, time_mix_v, time_mi... method forward (line 109) | def forward(self, token, state): FILE: RWKV-v4neo/run.py function record_time (line 168) | def record_time(name): FILE: RWKV-v4neo/src/binidx.py function print_rank_0 (line 10) | def print_rank_0(*message): function _warmup_mmap_file (line 19) | def _warmup_mmap_file(path): function code (line 36) | def code(dtype): function index_file_path (line 42) | def index_file_path(prefix_path): function data_file_path (line 45) | def data_file_path(prefix_path): class MMapIndexedDataset (line 48) | class MMapIndexedDataset(torch.utils.data.Dataset): class Index (line 49) | class Index(object): method writer (line 53) | def writer(cls, path, dtype): method __init__ (line 104) | def __init__(self, path, skip_warmup=False): method __del__ (line 149) | def __del__(self): method dtype (line 154) | def dtype(self): method sizes (line 158) | def sizes(self): method doc_idx (line 162) | def doc_idx(self): method __getitem__ (line 166) | def __getitem__(self, i): method __len__ (line 169) | def __len__(self): method __init__ (line 172) | def __init__(self, path, skip_warmup=False): method __getstate__ (line 181) | def __getstate__(self): method __setstate__ (line 184) | def __setstate__(self, state): method _do_init (line 187) | def _do_init(self, path, skip_warmup): method __del__ (line 201) | def __del__(self): method __len__ (line 206) | def __len__(self): method __getitem__ (line 210) | def __getitem__(self, idx): method get (line 232) | def get(self, idx, offset=0, length=None): method sizes (line 248) | def sizes(self): method doc_idx (line 252) | def doc_idx(self): method get_doc_idx (line 255) | def get_doc_idx(self): method set_doc_idx (line 258) | def set_doc_idx(self, doc_idx_): method supports_prefetch (line 262) | def supports_prefetch(self): method exists (line 266) | def exists(path): FILE: RWKV-v4neo/src/dataset.py class MyDataset (line 14) | class MyDataset(Dataset): method __init__ (line 15) | def __init__(self, args): method __len__ (line 104) | def __len__(self): method __getitem__ (line 107) | def __getitem__(self, idx): FILE: RWKV-v4neo/src/model.py function __nop (line 25) | def __nop(ob): class WKV (line 47) | class WKV(torch.autograd.Function): method forward (line 49) | def forward(ctx, B, T, C, w, u, k, v): method backward (line 64) | def backward(ctx, gy): method forward (line 83) | def forward(ctx, B, T, C, w, u, k, v): method backward (line 109) | def backward(ctx, gy): class WKV (line 81) | class WKV(torch.autograd.Function): method forward (line 49) | def forward(ctx, B, T, C, w, u, k, v): method backward (line 64) | def backward(ctx, gy): method forward (line 83) | def forward(ctx, B, T, C, w, u, k, v): method backward (line 109) | def backward(ctx, gy): function RUN_CUDA (line 134) | def RUN_CUDA(B, T, C, w, u, k, v): class RWKV_TimeMix_RWKV5_Preview (line 139) | class RWKV_TimeMix_RWKV5_Preview(MyModule): method __init__ (line 140) | def __init__(self, args, layer_id): method jit_func (line 194) | def jit_func(self, x): method jit_func_2 (line 211) | def jit_func_2(self, r, k, v, g, w, wk, wb, ws): method jit_func (line 232) | def jit_func(self, x): method jit_func_2 (line 247) | def jit_func_2(self, r, k, v, w, wk, wb, ws): method forward (line 267) | def forward(self, x): class WKV_5 (line 320) | class WKV_5(torch.autograd.Function): method forward (line 322) | def forward(ctx, B, T, C, H, r, k, v, w, u): method backward (line 347) | def backward(ctx, gy): function RUN_CUDA_RWKV5 (line 366) | def RUN_CUDA_RWKV5(B, T, C, H, r, k, v, w, u): class RWKV_TimeMix_RWKV5 (line 371) | class RWKV_TimeMix_RWKV5(MyModule): method __init__ (line 372) | def __init__(self, args, layer_id): method jit_func (line 420) | def jit_func(self, x): method jit_func_2 (line 437) | def jit_func_2(self, x, g): method forward (line 445) | def forward(self, x): class RWKV_TimeMix (line 460) | class RWKV_TimeMix(MyModule): method __init__ (line 461) | def __init__(self, args, layer_id): method jit_func (line 511) | def jit_func(self, x): method forward (line 522) | def forward(self, x): method QKV (line 530) | def QKV(self, q, k, v): method jit_funcQKV (line 538) | def jit_funcQKV(self, x): method forward (line 555) | def forward(self, x): class RWKV_ChannelMix (line 564) | class RWKV_ChannelMix(MyModule): method __init__ (line 565) | def __init__(self, args, layer_id): method forward (line 584) | def forward(self, x): class MishGLU (line 593) | class MishGLU(MyModule): method __init__ (line 594) | def __init__(self, args, layer_id): method forward (line 614) | def forward(self, x): class Block (line 627) | class Block(nn.Module): method __init__ (line 628) | def __init__(self, args, layer_id): method forward (line 668) | def forward(self, x, x_emb=None): class L2Wrap (line 700) | class L2Wrap(torch.autograd.Function): method forward (line 702) | def forward(ctx, loss, y): method backward (line 707) | def backward(ctx, grad_output): class RWKV (line 717) | class RWKV(pl.LightningModule): method __init__ (line 718) | def __init__(self, args): method configure_optimizers (line 747) | def configure_optimizers(self): method deepspeed_offload (line 815) | def deepspeed_offload(self) -> bool: method forward (line 822) | def forward(self, idx): method training_step (line 866) | def training_step(self, batch, batch_idx): method training_step_end (line 907) | def training_step_end(self, batch_parts): method generate_init_weight (line 913) | def generate_init_weight(self): FILE: RWKV-v4neo/src/model_img.py function __nop (line 18) | def __nop(ob): class L2pooling (line 27) | class L2pooling(nn.Module): method __init__ (line 28) | def __init__(self, filter_size=5, stride=2, channels=None, pad_off=0): method forward (line 40) | def forward(self, input): class DISTS (line 52) | class DISTS(torch.nn.Module): method __init__ (line 53) | def __init__(self, load_weights=True): method forward_once (line 99) | def forward_once(self, x): method forward (line 113) | def forward(self, x, y, require_grad=False, batch_average=False): class ToBinary (line 150) | class ToBinary(torch.autograd.Function): method forward (line 152) | def forward(ctx, x):#, noise_scale): method backward (line 161) | def backward(ctx, grad_output): class R_ENCODER (line 166) | class R_ENCODER(MyModule): method __init__ (line 167) | def __init__(self, args): method forward (line 203) | def forward(self, img): class R_DECODER (line 229) | class R_DECODER(MyModule): method __init__ (line 230) | def __init__(self, args): method forward (line 264) | def forward(self, code): function cosine_loss (line 289) | def cosine_loss(x, y): class RWKV_IMG (line 294) | class RWKV_IMG(pl.LightningModule): method __init__ (line 295) | def __init__(self, args): method configure_optimizers (line 330) | def configure_optimizers(self): method deepspeed_offload (line 359) | def deepspeed_offload(self) -> bool: method forward (line 366) | def forward(self, img): method training_step (line 372) | def training_step(self, batch, batch_idx): method training_step_end (line 401) | def training_step_end(self, batch_parts): method generate_init_weight (line 406) | def generate_init_weight(self): FILE: RWKV-v4neo/src/model_run.py function __nop (line 13) | def __nop(ob): class RWKV_RNN (line 35) | class RWKV_RNN(MyModule): method __init__ (line 36) | def __init__(self, args): method LN (line 116) | def LN(self, x, w): method FF (line 122) | def FF(self, x, state, i:int, time_mix_k, time_mix_r, kw, vw, rw): method SA (line 143) | def SA(self, x, state, i:int, time_mix_k, time_mix_v, time_mix_r, time... method forward (line 195) | def forward(self, ctx, state, preprocess_only = False): FILE: RWKV-v4neo/src/trainer.py function my_save (line 7) | def my_save(args, trainer, dd, ff): class train_callback (line 25) | class train_callback(pl.Callback): method __init__ (line 26) | def __init__(self, args): method on_train_batch_start (line 30) | def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): method on_train_batch_end (line 116) | def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch... method on_train_epoch_start (line 159) | def on_train_epoch_start(self, trainer, pl_module): method on_train_epoch_end (line 171) | def on_train_epoch_end(self, trainer, pl_module): function generate_init_weight (line 203) | def generate_init_weight(model, init_weight_name): FILE: RWKV-v4neo/src/utils.py function record_time (line 9) | def record_time(name): class TOKENIZER (line 16) | class TOKENIZER(): method __init__ (line 17) | def __init__(self, WORD_NAME, UNKNOWN_CHAR='\ue083'): method refine_context (line 39) | def refine_context(self, context): method sample_logits (line 49) | def sample_logits(self, out, x, ctx_len, temperature=1.0, top_p_usual=... function MaybeIsPrime (line 84) | def MaybeIsPrime(number): function FermatPrimalityTest (line 91) | def FermatPrimalityTest(number): function MillerRabinPrimalityTest (line 102) | def MillerRabinPrimalityTest(number): FILE: RWKV-v5/compute_magic_prime.py function is_prime (line 5) | def is_prime(n): FILE: RWKV-v5/cuda/wkv5_op.cpp function forward (line 8) | void forward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor &... function backward (line 11) | void backward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor ... function PYBIND11_MODULE (line 14) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { function TORCH_LIBRARY (line 19) | TORCH_LIBRARY(wkv5, m) { FILE: RWKV-v5/cuda/wkv6_op.cpp function forward (line 8) | void forward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor &... function backward (line 11) | void backward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor ... function TORCH_LIBRARY (line 15) | TORCH_LIBRARY(wkv6, m) { FILE: RWKV-v5/cuda/wkv6state_op.cpp function forward (line 8) | void forward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor &... function backward (line 11) | void backward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor ... function PYBIND11_MODULE (line 14) | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { function TORCH_LIBRARY (line 19) | TORCH_LIBRARY(wkv6state, m) { FILE: RWKV-v5/cuda/wkv7_op.cpp function forward (line 7) | void forward(torch::Tensor &w, torch::Tensor &q, torch::Tensor &k, torch... function backward (line 14) | void backward(torch::Tensor &w, torch::Tensor &q, torch::Tensor &k, torc... function TORCH_LIBRARY (line 21) | TORCH_LIBRARY(wind_backstepping, m) { function TORCH_LIBRARY_IMPL (line 26) | TORCH_LIBRARY_IMPL(wind_backstepping, CUDA, m) { FILE: RWKV-v5/make_data.py function index_file_path (line 36) | def index_file_path(prefix_path): function data_file_path (line 38) | def data_file_path(prefix_path): class MMapIndexedDatasetBuilder (line 40) | class MMapIndexedDatasetBuilder(object): method __init__ (line 41) | def __init__(self, out_file, dtype=np.uint16): method add_item (line 46) | def add_item(self, np_array): method end_document (line 50) | def end_document(self): method finalize (line 52) | def finalize(self, index_file): function add_raw (line 57) | def add_raw(raw): function is_prime (line 69) | def is_prime(n): FILE: RWKV-v5/rwkv_v6_demo.py class WKV_6 (line 274) | class WKV_6(torch.autograd.Function): method forward (line 276) | def forward(ctx, B, T, C, H, r, k, v, w, u): # forward: r, k, v, w, u ... method backward (line 299) | def backward(ctx, gy): # backward: gy => gr, gk, gv, gw, gu function RUN_CUDA_RWKV6 (line 317) | def RUN_CUDA_RWKV6(B, T, C, H, r, k, v, w, u): class RWKV_Tmix_x060 (line 324) | class RWKV_Tmix_x060(nn.Module): method __init__ (line 325) | def __init__(self, args, layer_id): method jit_func (line 377) | def jit_func(self, x): method jit_func_2 (line 403) | def jit_func_2(self, x, g): method forward (line 411) | def forward(self, x): class RWKV_CMix_x060 (line 424) | class RWKV_CMix_x060(nn.Module): method __init__ (line 425) | def __init__(self, args, layer_id): method forward (line 443) | def forward(self, x): class Block (line 457) | class Block(nn.Module): method __init__ (line 458) | def __init__(self, args, layer_id): method forward (line 472) | def forward(self, x): class RWKV (line 486) | class RWKV(nn.Module): method __init__ (line 487) | def __init__(self, args): method forward (line 506) | def forward(self, idx): method init_params (line 518) | def init_params(self): class RWKV_TOKENIZER (line 583) | class RWKV_TOKENIZER(): method __init__ (line 587) | def __init__(self, file_name): method encodeBytes (line 618) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 639) | def decodeBytes(self, tokens): method encode (line 642) | def encode(self, src: str): method decode (line 645) | def decode(self, tokens): method printTokens (line 648) | def printTokens(self, tokens): FILE: RWKV-v5/src/binidx.py function print_rank_0 (line 10) | def print_rank_0(*message): function _warmup_mmap_file (line 19) | def _warmup_mmap_file(path): function code (line 36) | def code(dtype): function index_file_path (line 42) | def index_file_path(prefix_path): function data_file_path (line 45) | def data_file_path(prefix_path): class MMapIndexedDataset (line 48) | class MMapIndexedDataset(torch.utils.data.Dataset): class Index (line 49) | class Index(object): method writer (line 53) | def writer(cls, path, dtype): method __init__ (line 104) | def __init__(self, path, skip_warmup=False): method __del__ (line 149) | def __del__(self): method dtype (line 154) | def dtype(self): method sizes (line 158) | def sizes(self): method doc_idx (line 162) | def doc_idx(self): method __getitem__ (line 166) | def __getitem__(self, i): method __len__ (line 169) | def __len__(self): method __init__ (line 172) | def __init__(self, path, skip_warmup=False): method __getstate__ (line 181) | def __getstate__(self): method __setstate__ (line 184) | def __setstate__(self, state): method _do_init (line 187) | def _do_init(self, path, skip_warmup): method __del__ (line 201) | def __del__(self): method __len__ (line 206) | def __len__(self): method __getitem__ (line 210) | def __getitem__(self, idx): method get (line 232) | def get(self, idx, offset=0, length=None): method sizes (line 248) | def sizes(self): method doc_idx (line 252) | def doc_idx(self): method get_doc_idx (line 255) | def get_doc_idx(self): method set_doc_idx (line 258) | def set_doc_idx(self, doc_idx_): method supports_prefetch (line 262) | def supports_prefetch(self): method exists (line 266) | def exists(path): FILE: RWKV-v5/src/dataset.py class MyDataset (line 14) | class MyDataset(Dataset): method __init__ (line 15) | def __init__(self, args): method __len__ (line 99) | def __len__(self): method __getitem__ (line 102) | def __getitem__(self, idx): FILE: RWKV-v5/src/model.py function __nop (line 25) | def __nop(ob): class WindBackstepping (line 50) | class WindBackstepping(torch.autograd.Function): method forward (line 52) | def forward(ctx, w,q,k,v,z,b): method backward (line 64) | def backward(ctx, dy): function RUN_CUDA_RWKV7g (line 72) | def RUN_CUDA_RWKV7g(q,w,k,v,a,b): class WKV_6STATE (line 82) | class WKV_6STATE(torch.autograd.Function): method forward (line 84) | def forward(ctx, B, T, C, H, r, k, v, w, u, s): method backward (line 109) | def backward(ctx, gy): function RUN_CUDA_RWKV6_STATE (line 129) | def RUN_CUDA_RWKV6_STATE(B, T, C, H, r, k, v, w, u, s): class WKV_6 (line 135) | class WKV_6(torch.autograd.Function): method forward (line 137) | def forward(ctx, r, k, v, w, u): method backward (line 162) | def backward(ctx, gy): function RUN_CUDA_RWKV6 (line 180) | def RUN_CUDA_RWKV6(r, k, v, w, u): class WKV_5 (line 187) | class WKV_5(torch.autograd.Function): method forward (line 189) | def forward(ctx, B, T, C, H, r, k, v, w, u): method backward (line 214) | def backward(ctx, gy): function RUN_CUDA_RWKV5 (line 233) | def RUN_CUDA_RWKV5(B, T, C, H, r, k, v, w, u): class RWKV_Tmix_x052 (line 241) | class RWKV_Tmix_x052(MyModule): method __init__ (line 242) | def __init__(self, args, layer_id): method jit_func (line 290) | def jit_func(self, x): method jit_func_2 (line 307) | def jit_func_2(self, x, g): method forward (line 315) | def forward(self, x): class RWKV_Tmix_x060 (line 325) | class RWKV_Tmix_x060(MyModule): method __init__ (line 326) | def __init__(self, args, layer_id): method forward (line 381) | def forward(self, x): class RWKV_Tmix_x060_state (line 414) | class RWKV_Tmix_x060_state(MyModule): method __init__ (line 415) | def __init__(self, args, layer_id): method jit_func (line 471) | def jit_func(self, x): method jit_func_2 (line 498) | def jit_func_2(self, x, g): method forward (line 506) | def forward(self, x): class RWKV_Tmix_x060a (line 517) | class RWKV_Tmix_x060a(MyModule): method __init__ (line 518) | def __init__(self, args, layer_id): method forward (line 576) | def forward(self, x): class RWKV_Tmix_x060b (line 609) | class RWKV_Tmix_x060b(MyModule): method __init__ (line 610) | def __init__(self, args, layer_id): method forward (line 658) | def forward(self, x): class RWKV_Tmix_x060c (line 686) | class RWKV_Tmix_x060c(MyModule): method __init__ (line 687) | def __init__(self, args, layer_id): method forward (line 735) | def forward(self, x): class RWKV_Tmix_x070 (line 766) | class RWKV_Tmix_x070(MyModule): method __init__ (line 767) | def __init__(self, args, layer_id): method forward (line 859) | def forward(self, x, v_first): class RWKV_CMix_x052 (line 895) | class RWKV_CMix_x052(MyModule): method __init__ (line 896) | def __init__(self, args, layer_id): method forward (line 915) | def forward(self, x): class RWKV_CMix_x060 (line 924) | class RWKV_CMix_x060(MyModule): method __init__ (line 925) | def __init__(self, args, layer_id): method forward (line 944) | def forward(self, x): class RWKV_CMix_x070 (line 954) | class RWKV_CMix_x070(MyModule): method __init__ (line 955) | def __init__(self, args, layer_id): method forward (line 976) | def forward(self, x): class MishGLU (line 986) | class MishGLU(MyModule): method __init__ (line 987) | def __init__(self, args, layer_id): method forward (line 1007) | def forward(self, x): class Block (line 1020) | class Block(nn.Module): method __init__ (line 1021) | def __init__(self, args, layer_id): method forward (line 1077) | def forward(self, x, v_first): method forward (line 1087) | def forward(self, x, x_emb=None): class L2Wrap (line 1119) | class L2Wrap(torch.autograd.Function): method forward (line 1121) | def forward(ctx, loss, y): method backward (line 1126) | def backward(ctx, grad_output): class RWKV (line 1136) | class RWKV(pl.LightningModule): method __init__ (line 1137) | def __init__(self, args): method configure_optimizers (line 1169) | def configure_optimizers(self): method deepspeed_offload (line 1251) | def deepspeed_offload(self) -> bool: method forward (line 1258) | def forward(self, idx): method training_step (line 1310) | def training_step(self, batch, batch_idx): method training_step_end (line 1351) | def training_step_end(self, batch_parts): method generate_init_weight (line 1357) | def generate_init_weight(self): FILE: RWKV-v5/src/trainer.py function my_save (line 7) | def my_save(args, trainer, dd, ff): class train_callback (line 32) | class train_callback(pl.Callback): method __init__ (line 33) | def __init__(self, args): method on_train_batch_start (line 37) | def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): method on_train_batch_end (line 123) | def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch... method on_train_epoch_start (line 166) | def on_train_epoch_start(self, trainer, pl_module): method on_train_epoch_end (line 178) | def on_train_epoch_end(self, trainer, pl_module): function generate_init_weight (line 210) | def generate_init_weight(model, init_weight_name): FILE: RWKV-v5/src/utils.py function record_time (line 9) | def record_time(name): class TOKENIZER (line 16) | class TOKENIZER(): method __init__ (line 17) | def __init__(self, WORD_NAME, UNKNOWN_CHAR='\ue083'): method refine_context (line 39) | def refine_context(self, context): method sample_logits (line 49) | def sample_logits(self, out, x, ctx_len, temperature=1.0, top_p_usual=... function MaybeIsPrime (line 84) | def MaybeIsPrime(number): function FermatPrimalityTest (line 91) | def FermatPrimalityTest(number): function MillerRabinPrimalityTest (line 102) | def MillerRabinPrimalityTest(number): FILE: RWKV-v5/tokenizer/rwkv_tokenizer.py class TRIE (line 5) | class TRIE: method __init__ (line 9) | def __init__(self, front=None, ch=None): method __repr__ (line 15) | def __repr__(self): method add (line 24) | def add(self, key:bytes, idx:int=0, val=None): method find_longest (line 35) | def find_longest(self, key:bytes, idx:int=0): class TRIE_TOKENIZER (line 49) | class TRIE_TOKENIZER(): method __init__ (line 50) | def __init__(self, file_name): method encodeBytes (line 72) | def encodeBytes(self, src:bytes): method decodeBytes (line 83) | def decodeBytes(self, tokens): method encode (line 86) | def encode(self, src): method decode (line 89) | def decode(self, tokens): method printTokens (line 95) | def printTokens(self, tokens): FILE: RWKV-v7/cuda/wkv7_op.cpp function forward (line 9) | void forward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor &... function TORCH_LIBRARY (line 13) | TORCH_LIBRARY(wkv7, m) { FILE: RWKV-v7/cuda/wkv7s_op.cpp function forward (line 9) | void forward(int64_t B, int64_t T, int64_t C, int64_t H, torch::Tensor &... function TORCH_LIBRARY (line 13) | TORCH_LIBRARY(wkv7s, m) { FILE: RWKV-v7/rwkv_v7_demo.py class RWKV_TOKENIZER (line 53) | class RWKV_TOKENIZER(): method __init__ (line 57) | def __init__(self, file_name): method encodeBytes (line 88) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 109) | def decodeBytes(self, tokens): method encode (line 112) | def encode(self, src: str): method decode (line 115) | def decode(self, tokens): method printTokens (line 118) | def printTokens(self, tokens): class WKV_7 (line 141) | class WKV_7(torch.autograd.Function): method forward (line 143) | def forward(ctx, r, w, k, v, a, b): function RWKV7_OP (line 165) | def RWKV7_OP(r, w, k, v, a, b): function RWKV7_OP (line 170) | def RWKV7_OP(r, w, k, v, a, b): class RWKV_Tmix_x070 (line 209) | class RWKV_Tmix_x070(MyModule): method __init__ (line 210) | def __init__(self, args, layer_id): method forward (line 257) | def forward(self, x, v_first): class RWKV_CMix_x070 (line 295) | class RWKV_CMix_x070(MyModule): method __init__ (line 296) | def __init__(self, args, layer_id): method forward (line 309) | def forward(self, x): class Block (line 320) | class Block(MyModule): method __init__ (line 321) | def __init__(self, args, layer_id): method forward (line 334) | def forward(self, x, v_first): class RWKV (line 349) | class RWKV(nn.Module): method __init__ (line 350) | def __init__(self, args): method forward (line 361) | def forward(self, idx): FILE: RWKV-v7/rwkv_v7_demo_fast.py class WKV_7 (line 63) | class WKV_7(torch.autograd.Function): method forward (line 65) | def forward(ctx, state, r, w, k, v, a, b): function RWKV7_OP (line 76) | def RWKV7_OP(state, r, w, k, v, a, b): class RWKV_x070 (line 81) | class RWKV_x070(MyModule): method __init__ (line 82) | def __init__(self, args): method forward (line 106) | def forward(self, idx, state, full_output=False): method forward_one (line 123) | def forward_one(self, idx:int, state:List[torch.Tensor]): method forward_seq (line 154) | def forward_seq(self, idx:List[int], state:List[torch.Tensor], full_ou... function RWKV_x070_TMix_one (line 188) | def RWKV_x070_TMix_one(layer_id: int, H:int, N:int, x, x_prev, v_first, ... function RWKV_x070_TMix_seq (line 215) | def RWKV_x070_TMix_seq(layer_id: int, H:int, N:int, x, x_prev, v_first, ... function RWKV_x070_CMix_one (line 251) | def RWKV_x070_CMix_one(x, x_prev, x_k, K_, V_): function RWKV_x070_CMix_seq (line 258) | def RWKV_x070_CMix_seq(x, x_prev, x_k, K_, V_): function sample_logits (line 271) | def sample_logits(logits, temperature:float=1.0, top_p:float=1.0, top_k:... class RWKV_TOKENIZER (line 299) | class RWKV_TOKENIZER(): method __init__ (line 303) | def __init__(self, file_name): method encodeBytes (line 334) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 355) | def decodeBytes(self, tokens): method encode (line 358) | def encode(self, src: str): method decode (line 361) | def decode(self, tokens): method printTokens (line 364) | def printTokens(self, tokens): FILE: RWKV-v7/rwkv_v7_demo_rnn.py class RWKV_RNN (line 51) | class RWKV_RNN(MyModule): method __init__ (line 52) | def __init__(self, args): method forward (line 79) | def forward(self, token:int, state:List[torch.Tensor]): function time_mixing__ (line 112) | def time_mixing__(layer_id:int, H:int, N:int, x, x_prev, v_first, state,... function channel_mixing__ (line 158) | def channel_mixing__(x, x_prev, x_k, kw, vw): function sample_logits (line 171) | def sample_logits(logits, temperature:float=1.0, top_p:float=1.0, top_k:... class RWKV_TOKENIZER (line 199) | class RWKV_TOKENIZER(): method __init__ (line 203) | def __init__(self, file_name): method encodeBytes (line 234) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 255) | def decodeBytes(self, tokens): method encode (line 258) | def encode(self, src: str): method decode (line 261) | def decode(self, tokens): method printTokens (line 264) | def printTokens(self, tokens): FILE: RWKV-v7/rwkv_v7_numpy.py function time_mixing (line 13) | def time_mixing(x, v0, last_x, S, params): function channel_mixing (line 46) | def channel_mixing(x, last_x, mix, Wk, Wv): function RWKV7 (line 52) | def RWKV7(params, token, state): FILE: RWKV-v7/rwkv_v7a_demo.py class WKV_7 (line 65) | class WKV_7(torch.autograd.Function): method forward (line 67) | def forward(ctx, state, r, w, k, v, a, b): function RWKV7_OP (line 78) | def RWKV7_OP(state, r, w, k, v, a, b): class RWKV_x070 (line 83) | class RWKV_x070(MyModule): method __init__ (line 84) | def __init__(self, args): method forward (line 112) | def forward(self, idx, state, full_output=False): method forward_one (line 129) | def forward_one(self, idx:int, state:List[torch.Tensor]): method forward_seq (line 160) | def forward_seq(self, idx:List[int], state:List[torch.Tensor], full_ou... function RWKV_x070_TMix_one (line 194) | def RWKV_x070_TMix_one(layer_id: int, H:int, N:int, x, x_prev, v_first, ... function RWKV_x070_TMix_seq (line 221) | def RWKV_x070_TMix_seq(layer_id: int, H:int, N:int, x, x_prev, v_first, ... function RWKV_x070_CMix_one (line 257) | def RWKV_x070_CMix_one(x, x_prev, x_k, K_, V_, semb_, s1_, s2_, s0_): function RWKV_x070_CMix_seq (line 266) | def RWKV_x070_CMix_seq(x, x_prev, x_k, K_, V_, semb_, s1_, s2_, s0_): function sample_logits (line 282) | def sample_logits(logits, temperature:float=1.0, top_p:float=1.0, top_k:... class RWKV_TOKENIZER (line 310) | class RWKV_TOKENIZER(): method __init__ (line 314) | def __init__(self, file_name): method encodeBytes (line 345) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 366) | def decodeBytes(self, tokens): method encode (line 369) | def encode(self, src: str): method decode (line 372) | def decode(self, tokens): method printTokens (line 375) | def printTokens(self, tokens): FILE: RWKV-v7/rwkv_v7b_demo.py class WKV_7 (line 65) | class WKV_7(torch.autograd.Function): method forward (line 67) | def forward(ctx, state, r, w, k, v, a, b): function RWKV7_OP (line 78) | def RWKV7_OP(state, r, w, k, v, a, b): class RWKV_x070 (line 83) | class RWKV_x070(MyModule): method __init__ (line 84) | def __init__(self, args): method forward (line 114) | def forward(self, idx, state, full_output=False): method forward_seq (line 138) | def forward_seq(self, idx:List[int], state:List[torch.Tensor], full_ou... function RWKV_x070_TMix_seq (line 196) | def RWKV_x070_TMix_seq(layer_id: int, H:int, N:int, x, x_prev, v_first, ... function RWKV_x070_CMix_seq (line 232) | def RWKV_x070_CMix_seq(x, x_prev, x_k, K_, V_, semb_, s1_, s2_, s0_): function sample_logits (line 248) | def sample_logits(logits, temperature:float=1.0, top_p:float=1.0, top_k:... class RWKV_TOKENIZER (line 276) | class RWKV_TOKENIZER(): method __init__ (line 280) | def __init__(self, file_name): method encodeBytes (line 311) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 332) | def decodeBytes(self, tokens): method encode (line 335) | def encode(self, src: str): method decode (line 338) | def decode(self, tokens): method printTokens (line 341) | def printTokens(self, tokens): FILE: RWKV-v7/rwkv_v8_rc00_demo.py class WKV_7 (line 67) | class WKV_7(torch.autograd.Function): method forward (line 69) | def forward(ctx, state, r, w, k, v, a, b): function RWKV7_OP (line 80) | def RWKV7_OP(state, r, w, k, v, a, b): class RWKV_x070 (line 85) | class RWKV_x070(MyModule): method __init__ (line 86) | def __init__(self, args): method forward (line 110) | def forward(self, idx, state, full_output=False): method forward_one (line 127) | def forward_one(self, idx:int, state:List[torch.Tensor]): method forward_seq (line 158) | def forward_seq(self, idx:List[int], state:List[torch.Tensor], full_ou... function RWKV_x070_TMix_one (line 192) | def RWKV_x070_TMix_one(layer_id: int, H:int, N:int, x, x_prev, v_first, ... function RWKV_x070_TMix_seq (line 219) | def RWKV_x070_TMix_seq(layer_id: int, H:int, N:int, x, x_prev, v_first, ... function RWKV_x080_CMix_one (line 255) | def RWKV_x080_CMix_one(x, x_prev, x_k, K_, V_, E_): function RWKV_x080_CMix_seq (line 262) | def RWKV_x080_CMix_seq(x, x_prev, x_k, K_, V_, E_): function sample_logits (line 275) | def sample_logits(logits, temperature:float=1.0, top_p:float=1.0, top_k:... class RWKV_TOKENIZER (line 304) | class RWKV_TOKENIZER(): method __init__ (line 308) | def __init__(self, file_name): method encodeBytes (line 339) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 360) | def decodeBytes(self, tokens): method encode (line 363) | def encode(self, src: str): method decode (line 366) | def decode(self, tokens): method printTokens (line 369) | def printTokens(self, tokens): method __init__ (line 383) | def __init__(self): method encode (line 385) | def encode(self, x): method decode (line 387) | def decode(self, x): class RWKV_TOKENIZER (line 382) | class RWKV_TOKENIZER(): method __init__ (line 308) | def __init__(self, file_name): method encodeBytes (line 339) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 360) | def decodeBytes(self, tokens): method encode (line 363) | def encode(self, src: str): method decode (line 366) | def decode(self, tokens): method printTokens (line 369) | def printTokens(self, tokens): method __init__ (line 383) | def __init__(self): method encode (line 385) | def encode(self, x): method decode (line 387) | def decode(self, x): FILE: RWKV-v7/rwkv_v8_rc00_hybrid_demo.py class WKV_7 (line 67) | class WKV_7(torch.autograd.Function): method forward (line 69) | def forward(ctx, state, r, w, k, v, a, b): function RWKV7_OP (line 80) | def RWKV7_OP(state, r, w, k, v, a, b): class RWKV_x070 (line 85) | class RWKV_x070(MyModule): method __init__ (line 86) | def __init__(self, args): method forward (line 109) | def forward(self, idx, state, full_output=False): method forward_seq (line 134) | def forward_seq(self, idx:List[int], state:List[torch.Tensor], full_ou... function RWKV_x070_TMix_seq (line 191) | def RWKV_x070_TMix_seq(layer_id: int, H:int, N:int, x, x_prev, v_first, ... function RWKV_x080_CMix_seq (line 218) | def RWKV_x080_CMix_seq(x, x_prev, x_k, K_, V_, E_): function sample_logits (line 231) | def sample_logits(logits, temperature:float=1.0, top_p:float=1.0, top_k:... class RWKV_TOKENIZER (line 260) | class RWKV_TOKENIZER(): method __init__ (line 264) | def __init__(self, file_name): method encodeBytes (line 295) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 316) | def decodeBytes(self, tokens): method encode (line 319) | def encode(self, src: str): method decode (line 322) | def decode(self, tokens): method printTokens (line 325) | def printTokens(self, tokens): method __init__ (line 339) | def __init__(self): method encode (line 341) | def encode(self, x): method decode (line 343) | def decode(self, x): class RWKV_TOKENIZER (line 338) | class RWKV_TOKENIZER(): method __init__ (line 264) | def __init__(self, file_name): method encodeBytes (line 295) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 316) | def decodeBytes(self, tokens): method encode (line 319) | def encode(self, src: str): method decode (line 322) | def decode(self, tokens): method printTokens (line 325) | def printTokens(self, tokens): method __init__ (line 339) | def __init__(self): method encode (line 341) | def encode(self, x): method decode (line 343) | def decode(self, x): FILE: RWKV-v7/train_temp/cuda/rwkv7_clampw.cpp function forward (line 12) | void forward(torch::Tensor &r, torch::Tensor &w, torch::Tensor &k, torch... function backward (line 19) | void backward(torch::Tensor &r, torch::Tensor &w, torch::Tensor &k, torc... function TORCH_LIBRARY (line 26) | TORCH_LIBRARY(rwkv7_clampw, m) { FILE: RWKV-v7/train_temp/cuda/wkv7_op.cpp function forward (line 7) | void forward(torch::Tensor &w, torch::Tensor &q, torch::Tensor &k, torch... function backward (line 14) | void backward(torch::Tensor &w, torch::Tensor &q, torch::Tensor &k, torc... function TORCH_LIBRARY (line 21) | TORCH_LIBRARY(wind_backstepping, m) { function TORCH_LIBRARY_IMPL (line 26) | TORCH_LIBRARY_IMPL(wind_backstepping, CUDA, m) { FILE: RWKV-v7/train_temp/cuda/wkv7_op_fp32.cpp function forward (line 7) | void forward(torch::Tensor &w, torch::Tensor &q, torch::Tensor &k, torch... function backward (line 14) | void backward(torch::Tensor &w, torch::Tensor &q, torch::Tensor &k, torc... function TORCH_LIBRARY (line 21) | TORCH_LIBRARY(wind_backstepping, m) { function TORCH_LIBRARY_IMPL (line 26) | TORCH_LIBRARY_IMPL(wind_backstepping, CUDA, m) { FILE: RWKV-v7/train_temp/rwkv7_train_simplified.py function set_seed_all (line 14) | def set_seed_all(seed): class WindBackstepping (line 39) | class WindBackstepping(torch.autograd.Function): method forward (line 41) | def forward(ctx, w,q,k,v,z,b): method backward (line 53) | def backward(ctx, dy): function RUN_CUDA_RWKV7g (line 60) | def RUN_CUDA_RWKV7g(q,w,k,v,a,b): class RWKV_Tmix_x070 (line 66) | class RWKV_Tmix_x070(MyModule): method __init__ (line 67) | def __init__(self, args, layer_id): method forward (line 147) | def forward(self, x, v_first): function _digits (line 186) | def _digits(n): return [TOK[c] for c in str(n)] function batch (line 188) | def batch(B,T, device=None): class FFN (line 200) | class FFN(nn.Module): method __init__ (line 201) | def __init__(self, C): method forward (line 210) | def forward(self, x): class MODEL (line 216) | class MODEL(nn.Module): method __init__ (line 217) | def __init__(s): method forward (line 243) | def forward(s,x): FILE: RWKV-v7/train_temp/src/binidx.py function print_rank_0 (line 8) | def print_rank_0(*message): function _warmup_mmap_file (line 17) | def _warmup_mmap_file(path): function code (line 34) | def code(dtype): function index_file_path (line 40) | def index_file_path(prefix_path): function data_file_path (line 43) | def data_file_path(prefix_path): class MMapIndexedDataset (line 46) | class MMapIndexedDataset(torch.utils.data.Dataset): class Index (line 47) | class Index(object): method writer (line 51) | def writer(cls, path, dtype): method __init__ (line 102) | def __init__(self, path, skip_warmup=True): method __del__ (line 147) | def __del__(self): method dtype (line 152) | def dtype(self): method sizes (line 156) | def sizes(self): method doc_idx (line 160) | def doc_idx(self): method __getitem__ (line 164) | def __getitem__(self, i): method __len__ (line 167) | def __len__(self): method __init__ (line 170) | def __init__(self, path, skip_warmup=True): method __getstate__ (line 179) | def __getstate__(self): method __setstate__ (line 182) | def __setstate__(self, state): method _do_init (line 185) | def _do_init(self, path, skip_warmup=True): method __del__ (line 199) | def __del__(self): method __len__ (line 204) | def __len__(self): method __getitem__ (line 208) | def __getitem__(self, idx): method get (line 230) | def get(self, idx, offset=0, length=None): method sizes (line 246) | def sizes(self): method doc_idx (line 250) | def doc_idx(self): method get_doc_idx (line 253) | def get_doc_idx(self): method set_doc_idx (line 256) | def set_doc_idx(self, doc_idx_): method supports_prefetch (line 260) | def supports_prefetch(self): method exists (line 264) | def exists(path): FILE: RWKV-v7/train_temp/src/dataset.py function is_prime (line 12) | def is_prime(n): class MyDataset (line 26) | class MyDataset(Dataset): method __init__ (line 27) | def __init__(self, args): method __len__ (line 46) | def __len__(self): method __getitem__ (line 49) | def __getitem__(self, idx): FILE: RWKV-v7/train_temp/src/model.py function __nop (line 21) | def __nop(ob): class RWKV7_CLAMPW_CUDA_OP (line 48) | class RWKV7_CLAMPW_CUDA_OP(torch.autograd.Function): method forward (line 50) | def forward(ctx,r,w,k,v,a,b): method backward (line 62) | def backward(ctx,dy): function RWKV7_CLAMPW_CUDA (line 69) | def RWKV7_CLAMPW_CUDA(r,w,k,v,a,b): class RWKV_Tmix_x070 (line 76) | class RWKV_Tmix_x070(MyModule): method __init__ (line 77) | def __init__(self, args, layer_id): method forward (line 164) | def forward(self, x, v_first): class RWKV_CMix_x070 (line 200) | class RWKV_CMix_x070(MyModule): method __init__ (line 201) | def __init__(self, args, layer_id): method forward (line 221) | def forward(self, x): class Block (line 234) | class Block(nn.Module): method __init__ (line 235) | def __init__(self, args, layer_id): method forward (line 249) | def forward(self, x, v_first): class L2Wrap (line 260) | class L2Wrap(torch.autograd.Function): method forward (line 262) | def forward(ctx, loss, y): method backward (line 267) | def backward(ctx, grad_output): class RWKV (line 277) | class RWKV(pl.LightningModule): method __init__ (line 278) | def __init__(self, args): method configure_optimizers (line 296) | def configure_optimizers(self): method deepspeed_offload (line 337) | def deepspeed_offload(self) -> bool: method forward (line 344) | def forward(self, idx): method training_step (line 362) | def training_step(self, batch, batch_idx): method training_step_end (line 368) | def training_step_end(self, batch_parts): method generate_init_weight (line 373) | def generate_init_weight(self): FILE: RWKV-v7/train_temp/src/trainer.py function my_save (line 7) | def my_save(args, trainer, dd, ff): class train_callback (line 13) | class train_callback(pl.Callback): method __init__ (line 14) | def __init__(self, args): method on_train_batch_start (line 18) | def on_train_batch_start(self, trainer, pl_module, batch, batch_idx): method on_train_batch_end (line 81) | def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch... method on_train_epoch_start (line 121) | def on_train_epoch_start(self, trainer, pl_module): method on_train_epoch_end (line 130) | def on_train_epoch_end(self, trainer, pl_module): function generate_init_weight (line 159) | def generate_init_weight(model, init_weight_name): FILE: RWKV-v8/251014_rosa_1bit_layer.py function rosa (line 6) | def rosa(x): class ROSA_1bit (line 27) | class ROSA_1bit(torch.autograd.Function): # !!! extremely slow !!! method forward (line 29) | def forward(ctx, x, emb0, emb1, tau: float): method backward (line 42) | def backward(ctx, gy): class ROSA_1bit_LAYER (line 86) | class ROSA_1bit_LAYER(nn.Module): # !!! extremely slow !!! method __init__ (line 87) | def __init__(self, C: int, tau: float = 1e-3): method forward (line 92) | def forward(self, x: torch.Tensor) -> torch.Tensor: FILE: RWKV-v8/251014_rosa_1bit_train.py function rosa (line 9) | def rosa(x): function rosa_torch (line 30) | def rosa_torch(z: torch.Tensor) -> torch.Tensor: class Emb_ROSA (line 35) | class Emb_ROSA(nn.Module): method __init__ (line 36) | def __init__(s,V,C): method forward (line 39) | def forward(s,idx): class ROSA_1bit (line 46) | class ROSA_1bit(torch.autograd.Function): # !!! extremely slow !!! method forward (line 48) | def forward(ctx, x, emb0, emb1, tau: float): method backward (line 61) | def backward(ctx, gy): class ROSA_1bit_LAYER (line 106) | class ROSA_1bit_LAYER(nn.Module): # !!! extremely slow !!! method __init__ (line 107) | def __init__(self, C: int, tau: float = 1e-3): method forward (line 112) | def forward(self, x: torch.Tensor) -> torch.Tensor: function batch (line 122) | def batch(B,T,nn=None): class MODEL (line 136) | class MODEL(nn.Module): method __init__ (line 137) | def __init__(s): method forward (line 143) | def forward(s,x): FILE: RWKV-v8/251014_rosa_onlyemb_train.py function rosa (line 9) | def rosa(x): function rosa_torch (line 30) | def rosa_torch(z: torch.Tensor) -> torch.Tensor: class Emb_ROSA (line 35) | class Emb_ROSA(nn.Module): method __init__ (line 36) | def __init__(s,V,C): method forward (line 39) | def forward(s,idx): function batch (line 51) | def batch(B,T,nn=None): class MODEL (line 65) | class MODEL(nn.Module): method __init__ (line 66) | def __init__(s): method forward (line 71) | def forward(s,x): FILE: RWKV-v8/251016_rosa_1bit_run.py function rosa (line 9) | def rosa(x): function rosa_torch (line 30) | def rosa_torch(z: torch.Tensor) -> torch.Tensor: class Emb_ROSA (line 35) | class Emb_ROSA(nn.Module): method __init__ (line 36) | def __init__(s,V,C): method forward (line 39) | def forward(s,idx): class ROSA_1bit (line 46) | class ROSA_1bit(torch.autograd.Function): method forward (line 48) | def forward(ctx, x, emb0, emb1, tau: float): class ROSA_1bit_LAYER (line 67) | class ROSA_1bit_LAYER(nn.Module): method __init__ (line 68) | def __init__(self, C: int, tau: float = 1e-3): method forward (line 73) | def forward(self, x: torch.Tensor) -> torch.Tensor: function batch (line 83) | def batch(B,T,nn=None): class MODEL (line 97) | class MODEL(nn.Module): method __init__ (line 98) | def __init__(s): method forward (line 106) | def forward(s,x): FILE: RWKV-v8/251018_rosa_4bit_run.py function rosa (line 9) | def rosa(x): function rosa_batch_python_orig (line 30) | def rosa_batch_python_orig(z: torch.Tensor) -> torch.Tensor: function rosa_batch_python (line 35) | def rosa_batch_python(z: torch.Tensor) -> torch.Tensor: class rosa_emb_layer (line 40) | class rosa_emb_layer(nn.Module): method __init__ (line 41) | def __init__(s,V,C): method forward (line 44) | def forward(s,idx): class rosa_4bit_layer (line 49) | class rosa_4bit_layer(nn.Module): method __init__ (line 50) | def __init__(self, C: int, eps: float = 1e-5): method forward (line 55) | def forward(self, x: torch.Tensor) -> torch.Tensor: function batch (line 77) | def batch(B,T,nn=None): class MODEL (line 91) | class MODEL(nn.Module): method __init__ (line 92) | def __init__(s): method forward (line 104) | def forward(s,x): FILE: RWKV-v8/251024_rosaQKV_run.py function set_seed_all (line 9) | def set_seed_all(seed): function samx_qkv_slow (line 29) | def samx_qkv_slow(qqq, kkk, vvv): # slow, only for reference function samx_qkv_batch_ref (line 50) | def samx_qkv_batch_ref(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor... class samx_qkv_1bit_layer_op (line 62) | class samx_qkv_1bit_layer_op(torch.autograd.Function): method forward (line 64) | def forward(ctx, q, k, v, e): class samx_qkv_1bit_layer (line 72) | class samx_qkv_1bit_layer(nn.Module): method __init__ (line 73) | def __init__(self, C: int): method forward (line 76) | def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -... class ROSA_QKV_B_1bit (line 79) | class ROSA_QKV_B_1bit(nn.Module): method __init__ (line 80) | def __init__(s,C): method forward (line 91) | def forward(s,x): class WindBackstepping (line 106) | class WindBackstepping(torch.autograd.Function): method forward (line 108) | def forward(ctx, w,q,k,v,z,b): method backward (line 120) | def backward(ctx, dy): function RUN_CUDA_RWKV7g (line 127) | def RUN_CUDA_RWKV7g(q,w,k,v,a,b): class RWKV_Tmix_x070 (line 132) | class RWKV_Tmix_x070(MyModule): method __init__ (line 133) | def __init__(self, args, layer_id): method forward (line 210) | def forward(self, x, v_first): class FFN (line 246) | class FFN(nn.Module): method __init__ (line 247) | def __init__(self, C): method forward (line 256) | def forward(self, x): class MODEL (line 263) | class MODEL(nn.Module): method __init__ (line 264) | def __init__(s): method forward (line 306) | def forward(s,x): method __init__ (line 331) | def __init__(s): method forward (line 359) | def forward(s,x): class MODEL (line 330) | class MODEL(nn.Module): method __init__ (line 264) | def __init__(s): method forward (line 306) | def forward(s,x): method __init__ (line 331) | def __init__(s): method forward (line 359) | def forward(s,x): function get_randint (line 378) | def get_randint(digits): FILE: RWKV-v8/251105_reverse_run.py function set_seed_all (line 9) | def set_seed_all(seed): function samx_qkv_slow (line 41) | def samx_qkv_slow(qqq, kkk, vvv): # slow, only for reference function samx_qkv_batch_ref (line 62) | def samx_qkv_batch_ref(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor... class samx_qkv_1bit_layer_op (line 74) | class samx_qkv_1bit_layer_op(torch.autograd.Function): method forward (line 76) | def forward(ctx, q, k, v, e): class samx_qkv_1bit_layer (line 84) | class samx_qkv_1bit_layer(nn.Module): method __init__ (line 85) | def __init__(self, C: int): method forward (line 88) | def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -... class ROSA_QKV_B_1bit (line 91) | class ROSA_QKV_B_1bit(nn.Module): method __init__ (line 92) | def __init__(s,C): method forward (line 103) | def forward(s,x): class WindBackstepping (line 118) | class WindBackstepping(torch.autograd.Function): method forward (line 120) | def forward(ctx, w,q,k,v,z,b): method backward (line 132) | def backward(ctx, dy): function RUN_CUDA_RWKV7g (line 139) | def RUN_CUDA_RWKV7g(q,w,k,v,a,b): class RWKV_Tmix_x070 (line 144) | class RWKV_Tmix_x070(MyModule): method __init__ (line 145) | def __init__(self, args, layer_id): method forward (line 222) | def forward(self, x, v_first): class FFN (line 258) | class FFN(nn.Module): method __init__ (line 259) | def __init__(self, C): method forward (line 268) | def forward(self, x): class MODEL (line 275) | class MODEL(nn.Module): method __init__ (line 276) | def __init__(s): method forward (line 318) | def forward(s,x): method __init__ (line 343) | def __init__(s): method forward (line 371) | def forward(s,x): class MODEL (line 342) | class MODEL(nn.Module): method __init__ (line 276) | def __init__(s): method forward (line 318) | def forward(s,x): method __init__ (line 343) | def __init__(s): method forward (line 371) | def forward(s,x): function get_randint (line 390) | def get_randint(digits): FILE: RWKV-v8/260212_rosa1bitLM_L12.py class RWKV_TOKENIZER (line 43) | class RWKV_TOKENIZER(): method __init__ (line 47) | def __init__(self, file_name): method encodeBytes (line 78) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 99) | def decodeBytes(self, tokens): method encode (line 102) | def encode(self, src: str): method decode (line 105) | def decode(self, tokens): method printTokens (line 108) | def printTokens(self, tokens): function sample_logits (line 122) | def sample_logits(logits, temperature:float=1.0, top_p:float=1.0, top_k:... function rosa_qkv_ref (line 150) | def rosa_qkv_ref(qqq, kkk, vvv): function rosa_qkv_batch_ref (line 171) | def rosa_qkv_batch_ref(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor... class rosa_qkv_1bit_layer_op (line 183) | class rosa_qkv_1bit_layer_op(torch.autograd.Function): method forward (line 185) | def forward(ctx, q, k, v, e): class rosa_qkv_1bit_layer (line 193) | class rosa_qkv_1bit_layer(nn.Module): method __init__ (line 194) | def __init__(self, C: int): method forward (line 197) | def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor) -... class RWKV_ROSA_1bit (line 200) | class RWKV_ROSA_1bit(nn.Module): method __init__ (line 201) | def __init__(s,C): method forward (line 212) | def forward(s,x): class RWKV_CMix_x070 (line 224) | class RWKV_CMix_x070(MyModule): method __init__ (line 225) | def __init__(self, args, layer_id): method forward (line 238) | def forward(self, x): class Block (line 249) | class Block(MyModule): method __init__ (line 250) | def __init__(self, args, layer_id): method forward (line 263) | def forward(self, x, v_first): class RWKV (line 277) | class RWKV(nn.Module): method __init__ (line 278) | def __init__(self, args): method forward (line 289) | def forward(self, idx): FILE: RWKV-v8/260222_rosa4bitLM_L12.py function __nop (line 39) | def __nop(ob): class RWKV_TOKENIZER (line 49) | class RWKV_TOKENIZER(): method __init__ (line 53) | def __init__(self, file_name): method encodeBytes (line 84) | def encodeBytes(self, src: bytes) -> list[int]: method decodeBytes (line 105) | def decodeBytes(self, tokens): method encode (line 108) | def encode(self, src: str): method decode (line 111) | def decode(self, tokens): method printTokens (line 114) | def printTokens(self, tokens): function sample_logits (line 128) | def sample_logits(logits, temperature:float=1.0, top_p:float=1.0, top_k:... function rosa_slow_ref (line 156) | def rosa_slow_ref(q, k, v): class rosa_slow_4bit_layer (line 175) | class rosa_slow_4bit_layer(nn.Module): # !!! matched 1 => e, matched 0 =... method __init__ (line 176) | def __init__(self, C): method forward (line 179) | def forward(self, q, k, v): class RWKV_ROSA_4bit (line 215) | class RWKV_ROSA_4bit(nn.Module): method __init__ (line 216) | def __init__(s,C): method forward (line 227) | def forward(s,x): class RWKV_CMix_x070 (line 239) | class RWKV_CMix_x070(MyModule): method __init__ (line 240) | def __init__(self, args, layer_id): method forward (line 253) | def forward(self, x): class Block (line 264) | class Block(MyModule): method __init__ (line 265) | def __init__(self, args, layer_id): method forward (line 278) | def forward(self, x, v_first): class RWKV (line 292) | class RWKV(nn.Module): method __init__ (line 293) | def __init__(self, args): method forward (line 304) | def forward(self, idx): FILE: RWKV-v8/cuda/wkv7_op.cpp function forward (line 7) | void forward(torch::Tensor &w, torch::Tensor &q, torch::Tensor &k, torch... function backward (line 14) | void backward(torch::Tensor &w, torch::Tensor &q, torch::Tensor &k, torc... function TORCH_LIBRARY (line 21) | TORCH_LIBRARY(wind_backstepping, m) { function TORCH_LIBRARY_IMPL (line 26) | TORCH_LIBRARY_IMPL(wind_backstepping, CUDA, m) {