SYMBOL INDEX (82 symbols across 3 files) FILE: examples/base_streaming_gen.py function parse_args (line 29) | def parse_args(): function set_seed (line 42) | def set_seed(seed): function generate (line 49) | def generate(inputs): FILE: finetune_demo/finetune.py function parse_args (line 62) | def parse_args(): function get_num_lines (line 291) | def get_num_lines(file_path): class SFTDataset (line 300) | class SFTDataset(Dataset): method __init__ (line 301) | def __init__(self, indexed_dataset): method __len__ (line 304) | def __len__(self): method __getitem__ (line 307) | def __getitem__(self, idx): class Encoder (line 316) | class Encoder(object): method __init__ (line 317) | def __init__(self, path, max_length=4096): method concat_encode (line 324) | def concat_encode(self, fin, file_path, stop_token_ids): function preprocess_data (line 385) | def preprocess_data(args, key, level, dataset_impl='mmap'): function main (line 423) | def main(): FILE: finetune_demo/plugins/indexed_dataset.py function print_rank_0 (line 28) | def print_rank_0(message): function __best_fitting_dtype (line 37) | def __best_fitting_dtype(vocab_size=None): function get_available_dataset_impl (line 41) | def get_available_dataset_impl(): function infer_dataset_impl (line 45) | def infer_dataset_impl(path): function make_builder (line 63) | def make_builder(out_file, impl, vocab_size=None): function make_dataset (line 71) | def make_dataset(path, impl, skip_warmup=False): function dataset_exists (line 90) | def dataset_exists(path, impl): function read_longs (line 97) | def read_longs(f, n): function write_longs (line 103) | def write_longs(f, a): function code (line 119) | def code(dtype): function index_file_path (line 126) | def index_file_path(prefix_path): function data_file_path (line 130) | def data_file_path(prefix_path): function create_doc_idx (line 134) | def create_doc_idx(sizes): class IndexedDataset (line 142) | class IndexedDataset(torch.utils.data.Dataset): method __init__ (line 146) | def __init__(self, path): method read_index (line 152) | def read_index(self, path): method read_data (line 169) | def read_data(self, path): method check_index (line 172) | def check_index(self, i): method __del__ (line 176) | def __del__(self): method __getitem__ (line 181) | def __getitem__(self, idx): method __len__ (line 207) | def __len__(self): method num_tokens (line 210) | def num_tokens(self, index): method size (line 213) | def size(self, index): method exists (line 217) | def exists(path): method supports_prefetch (line 222) | def supports_prefetch(self): class IndexedCachedDataset (line 226) | class IndexedCachedDataset(IndexedDataset): method __init__ (line 227) | def __init__(self, path): method supports_prefetch (line 233) | def supports_prefetch(self): method prefetch (line 236) | def prefetch(self, indices): method __getitem__ (line 261) | def __getitem__(self, idx): class IndexedDatasetBuilder (line 279) | class IndexedDatasetBuilder(object): method __init__ (line 290) | def __init__(self, out_file, dtype=np.int32): method add_item (line 299) | def add_item(self, tensor): method end_document (line 307) | def end_document(self): method merge_file_ (line 310) | def merge_file_(self, another_file): method finalize (line 335) | def finalize(self, index_file): function _warmup_mmap_file (line 351) | def _warmup_mmap_file(path): class MMapIndexedDataset (line 357) | class MMapIndexedDataset(torch.utils.data.Dataset): class Index (line 358) | class Index(object): method writer (line 362) | def writer(cls, path, dtype): method __init__ (line 407) | def __init__(self, path, skip_warmup=False): method __del__ (line 449) | def __del__(self): method dtype (line 454) | def dtype(self): method sizes (line 458) | def sizes(self): method doc_idx (line 462) | def doc_idx(self): method __getitem__ (line 466) | def __getitem__(self, i): method __len__ (line 469) | def __len__(self): method __init__ (line 472) | def __init__(self, path, skip_warmup=False): method __getstate__ (line 481) | def __getstate__(self): method __setstate__ (line 484) | def __setstate__(self, state): method _do_init (line 487) | def _do_init(self, path, skip_warmup): method __del__ (line 500) | def __del__(self): method __len__ (line 505) | def __len__(self): method __getitem__ (line 509) | def __getitem__(self, idx): method get (line 538) | def get(self, idx, offset=0, length=None): method sizes (line 556) | def sizes(self): method doc_idx (line 560) | def doc_idx(self): method get_doc_idx (line 563) | def get_doc_idx(self): method set_doc_idx (line 566) | def set_doc_idx(self, doc_idx_): method supports_prefetch (line 570) | def supports_prefetch(self): method exists (line 574) | def exists(path): class MMapIndexedDatasetBuilder (line 579) | class MMapIndexedDatasetBuilder(object): method __init__ (line 580) | def __init__(self, out_file, dtype=np.int64): method add_item (line 586) | def add_item(self, tensor): method add_doc (line 591) | def add_doc(self, tensor, sizes): method end_document (line 597) | def end_document(self): method merge_file_ (line 600) | def merge_file_(self, another_file): method finalize (line 613) | def finalize(self, index_file):