SYMBOL INDEX (140 symbols across 16 files) FILE: scripts/benchmark.py function benchmark_batch (line 15) | def benchmark_batch(documents: list[str]) -> None: FILE: scripts/redact.py function redact_file (line 7) | def redact_file(path: Path, dry_run: bool) -> None: function redact (line 42) | def redact(dry_run: bool) -> None: function main (line 57) | def main() -> None: FILE: scripts/wheel_download.py function download_artifacts (line 8) | def download_artifacts(token, owner, repo, run_id, output_dir): FILE: src/lib.rs type Rank (line 13) | pub type Rank = u32; type Merge (line 18) | struct Merge { method cmp (line 25) | fn cmp(&self, other: &Self) -> std::cmp::Ordering { method partial_cmp (line 34) | fn partial_cmp(&self, other: &Self) -> Option { type State (line 39) | struct State { function _byte_pair_merge_large (line 47) | fn _byte_pair_merge_large(ranks: &HashMap, Rank>, piece: &[u8]) ... function _byte_pair_merge (line 140) | fn _byte_pair_merge(ranks: &HashMap, Rank>, piece: &[u8]) -> Vec... function byte_pair_encode (line 198) | pub fn byte_pair_encode(piece: &[u8], ranks: &HashMap, Rank>) ->... function byte_pair_split (line 213) | pub fn byte_pair_split<'a>(piece: &'a [u8], ranks: &HashMap, Ran... type FakeThreadId (line 262) | struct FakeThreadId(NonZeroU64); function hash_current_thread (line 264) | fn hash_current_thread() -> usize { type DecodeKeyError (line 278) | pub struct DecodeKeyError { method fmt (line 283) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { type DecodeError (line 291) | pub struct DecodeError { method fmt (line 296) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { type EncodeError (line 304) | pub struct EncodeError { method fmt (line 309) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { constant MAX_NUM_THREADS (line 316) | const MAX_NUM_THREADS: usize = 128; type CoreBPE (line 320) | pub struct CoreBPE { method _get_tl_regex (line 331) | fn _get_tl_regex(&self) -> &Regex { method _get_tl_special_regex (line 338) | fn _get_tl_special_regex(&self) -> &Regex { method decode_bytes (line 345) | fn decode_bytes(&self, tokens: &[Rank]) -> Result, DecodeKeyEr... method encode_ordinary (line 360) | pub fn encode_ordinary(&self, text: &str) -> Vec { method encode (line 375) | pub fn encode( method _increase_last_piece_token_len (line 444) | fn _increase_last_piece_token_len( method _encode_unstable_native (line 483) | pub fn _encode_unstable_native( method new (line 601) | pub fn new( method new_internal (line 618) | fn new_internal( method special_tokens (line 665) | pub fn special_tokens(&self) -> HashSet<&str> { method encode_with_special_tokens (line 672) | pub fn encode_with_special_tokens(&self, text: &str) -> Vec { function setup_ranks (line 685) | fn setup_ranks() -> HashMap, Rank> { function test_simple_characters (line 690) | fn test_simple_characters() { function test_repeated_characters (line 697) | fn test_repeated_characters() { FILE: src/py.rs method py_new (line 16) | fn py_new( method py_encode_ordinary (line 30) | fn py_encode_ordinary(&self, py: Python, text: &str) -> Vec { method py_encode (line 35) | fn py_encode( method encode_to_tiktoken_buffer (line 51) | fn encode_to_tiktoken_buffer( method _encode_bytes (line 72) | fn _encode_bytes(&self, py: Python, bytes: &[u8]) -> Vec { method py_encode_with_unstable (line 118) | fn py_encode_with_unstable( method encode_single_token (line 133) | fn encode_single_token(&self, piece: &[u8]) -> PyResult { method encode_single_piece (line 145) | fn encode_single_piece(&self, piece: &[u8]) -> Vec { method py_decode_bytes (line 157) | fn py_decode_bytes(&self, py: Python, tokens: Vec) -> Result PyResult... method token_byte_values (line 178) | fn token_byte_values(&self, py: Python) -> Vec> { type TiktokenBuffer (line 187) | struct TiktokenBuffer { method __getbuffer__ (line 194) | unsafe fn __getbuffer__( method __releasebuffer__ (line 240) | unsafe fn __releasebuffer__(&self, view: *mut pyo3::ffi::Py_buffer) { function _tiktoken (line 252) | fn _tiktoken(_py: Python, m: &Bound) -> PyResult<()> { FILE: tests/test_encoding.py function test_simple (line 14) | def test_simple(): function test_simple_repeated (line 31) | def test_simple_repeated(): function test_large_repeated (line 52) | def test_large_repeated(): function test_simple_regex (line 60) | def test_simple_regex(): function test_basic_encode (line 69) | def test_basic_encode(): function test_encode_empty (line 81) | def test_encode_empty(): function test_encode_bytes (line 86) | def test_encode_bytes(): function test_hyp_encode_bytes (line 97) | def test_hyp_encode_bytes(make_enc: Callable[[], tiktoken.Encoding], byt... function test_encode_surrogate_pairs (line 102) | def test_encode_surrogate_pairs(): function test_catastrophically_repetitive (line 114) | def test_catastrophically_repetitive(make_enc: Callable[[], tiktoken.Enc... function test_basic_roundtrip (line 133) | def test_basic_roundtrip(make_enc): function test_hyp_roundtrip (line 152) | def test_hyp_roundtrip(make_enc: Callable[[], tiktoken.Encoding], text): function test_single_token_roundtrip (line 159) | def test_single_token_roundtrip(make_enc: Callable[[], tiktoken.Encoding]): function test_special_token (line 175) | def test_special_token(): function test_hyp_special_ordinary (line 229) | def test_hyp_special_ordinary(make_enc, text: str): function test_batch_encode (line 240) | def test_batch_encode(make_enc: Callable[[], tiktoken.Encoding]): function test_hyp_batch_roundtrip (line 258) | def test_hyp_batch_roundtrip(make_enc: Callable[[], tiktoken.Encoding], ... FILE: tests/test_misc.py function test_encoding_for_model (line 7) | def test_encoding_for_model(): function test_optional_blobfile_dependency (line 24) | def test_optional_blobfile_dependency(): FILE: tests/test_offsets.py function _common_prefix_len (line 12) | def _common_prefix_len(a, b): function _token_offsets_reference (line 19) | def _token_offsets_reference(enc, tokens): function test_hyp_offsets (line 31) | def test_hyp_offsets(make_enc: Callable[[], tiktoken.Encoding], data): function test_basic_offsets (line 49) | def test_basic_offsets(): FILE: tests/test_pickle.py function test_pickle (line 4) | def test_pickle(): FILE: tests/test_simple_public.py function test_simple (line 7) | def test_simple(): function test_encoding_for_model (line 25) | def test_encoding_for_model(): function test_optional_blobfile_dependency (line 36) | def test_optional_blobfile_dependency(): FILE: tiktoken/_educational.py class SimpleBytePairEncoding (line 12) | class SimpleBytePairEncoding: method __init__ (line 13) | def __init__(self, *, pat_str: str, mergeable_ranks: dict[bytes, int])... method encode (line 23) | def encode(self, text: str, visualise: str | None = "colour") -> list[... method decode_bytes (line 39) | def decode_bytes(self, tokens: list[int]) -> bytes: method decode (line 47) | def decode(self, tokens: list[int]) -> str: method decode_tokens_bytes (line 58) | def decode_tokens_bytes(self, tokens: list[int]) -> list[bytes]: method train (line 69) | def train(training_data: str, vocab_size: int, pat_str: str): method from_tiktoken (line 75) | def from_tiktoken(encoding): function bpe_encode (line 83) | def bpe_encode( function bpe_train (line 119) | def bpe_train( function visualise_tokens (line 188) | def visualise_tokens(token_values: list[bytes]) -> None: function train_simple_encoding (line 208) | def train_simple_encoding(): FILE: tiktoken/core.py class Encoding (line 16) | class Encoding: method __init__ (line 17) | def __init__( method __repr__ (line 56) | def __repr__(self) -> str: method encode_ordinary (line 63) | def encode_ordinary(self, text: str) -> list[int]: method encode (line 79) | def encode( method encode_to_numpy (line 135) | def encode_to_numpy( method encode_ordinary_batch (line 161) | def encode_ordinary_batch(self, text: list[str], *, num_threads: int =... method encode_batch (line 175) | def encode_batch( method encode_with_unstable (line 205) | def encode_with_unstable( method encode_single_token (line 242) | def encode_single_token(self, text_or_bytes: str | bytes) -> int: method decode_bytes (line 262) | def decode_bytes(self, tokens: Sequence[int]) -> bytes: method decode (line 272) | def decode(self, tokens: Sequence[int], errors: str = "replace") -> str: method decode_single_token_bytes (line 286) | def decode_single_token_bytes(self, token: int) -> bytes: method decode_tokens_bytes (line 300) | def decode_tokens_bytes(self, tokens: Sequence[int]) -> list[bytes]: method decode_with_offsets (line 309) | def decode_with_offsets(self, tokens: Sequence[int]) -> tuple[str, lis... method decode_batch (line 334) | def decode_batch( method decode_bytes_batch (line 342) | def decode_bytes_batch( method token_byte_values (line 353) | def token_byte_values(self) -> list[bytes]: method eot_token (line 358) | def eot_token(self) -> int: method special_tokens_set (line 362) | def special_tokens_set(self) -> set[str]: method is_special_token (line 365) | def is_special_token(self, token: int) -> bool: method n_vocab (line 370) | def n_vocab(self) -> int: method _encode_single_piece (line 378) | def _encode_single_piece(self, text_or_bytes: str | bytes) -> list[int]: method _encode_only_native_bpe (line 392) | def _encode_only_native_bpe(self, text: str) -> list[int]: method _encode_bytes (line 403) | def _encode_bytes(self, text: bytes) -> list[int]: method __getstate__ (line 406) | def __getstate__(self) -> object: method __setstate__ (line 419) | def __setstate__(self, value: object) -> None: function _special_token_regex (line 429) | def _special_token_regex(tokens: frozenset[str]) -> re.Pattern[str]: function raise_disallowed_special_token (line 438) | def raise_disallowed_special_token(token: str) -> NoReturn: FILE: tiktoken/load.py function read_file (line 8) | def read_file(blobpath: str) -> bytes: function check_hash (line 30) | def check_hash(data: bytes, expected_hash: str) -> bool: function read_file_cached (line 35) | def read_file_cached(blobpath: str, expected_hash: str | None = None) ->... function data_gym_to_mergeable_bpe_ranks (line 89) | def data_gym_to_mergeable_bpe_ranks( function dump_tiktoken_bpe (line 147) | def dump_tiktoken_bpe(bpe_ranks: dict[bytes, int], tiktoken_bpe_file: st... function load_tiktoken_bpe (line 159) | def load_tiktoken_bpe(tiktoken_bpe_file: str, expected_hash: str | None ... FILE: tiktoken/model.py function encoding_name_for_model (line 88) | def encoding_name_for_model(model_name: str) -> str: function encoding_for_model (line 113) | def encoding_for_model(model_name: str) -> Encoding: FILE: tiktoken/registry.py function _available_plugin_modules (line 20) | def _available_plugin_modules() -> Sequence[str]: function _find_constructors (line 33) | def _find_constructors() -> None: function get_encoding (line 63) | def get_encoding(encoding_name: str) -> Encoding: function list_encoding_names (line 91) | def list_encoding_names() -> list[str]: FILE: tiktoken_ext/openai_public.py function gpt2 (line 17) | def gpt2(): function r50k_base (line 33) | def r50k_base(): function p50k_base (line 47) | def p50k_base(): function p50k_edit (line 61) | def p50k_edit(): function cl100k_base (line 75) | def cl100k_base(): function o200k_base (line 95) | def o200k_base(): function o200k_harmony (line 123) | def o200k_harmony():