SYMBOL INDEX (140 symbols across 16 files)

FILE: scripts/benchmark.py
  function benchmark_batch (line 15) | def benchmark_batch(documents: list[str]) -> None:

FILE: scripts/redact.py
  function redact_file (line 7) | def redact_file(path: Path, dry_run: bool) -> None:
  function redact (line 42) | def redact(dry_run: bool) -> None:
  function main (line 57) | def main() -> None:

FILE: scripts/wheel_download.py
  function download_artifacts (line 8) | def download_artifacts(token, owner, repo, run_id, output_dir):

FILE: src/lib.rs
  type Rank (line 13) | pub type Rank = u32;
  type Merge (line 18) | struct Merge {
  method cmp (line 25) | fn cmp(&self, other: &Self) -> std::cmp::Ordering {
  method partial_cmp (line 34) | fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
  type State (line 39) | struct State {
  function _byte_pair_merge_large (line 47) | fn _byte_pair_merge_large(ranks: &HashMap<Vec<u8>, Rank>, piece: &[u8]) ...
  function _byte_pair_merge (line 140) | fn _byte_pair_merge(ranks: &HashMap<Vec<u8>, Rank>, piece: &[u8]) -> Vec...
  function byte_pair_encode (line 198) | pub fn byte_pair_encode(piece: &[u8], ranks: &HashMap<Vec<u8>, Rank>) ->...
  function byte_pair_split (line 213) | pub fn byte_pair_split<'a>(piece: &'a [u8], ranks: &HashMap<Vec<u8>, Ran...
  type FakeThreadId (line 262) | struct FakeThreadId(NonZeroU64);
  function hash_current_thread (line 264) | fn hash_current_thread() -> usize {
  type DecodeKeyError (line 278) | pub struct DecodeKeyError {
    method fmt (line 283) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  type DecodeError (line 291) | pub struct DecodeError {
    method fmt (line 296) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  type EncodeError (line 304) | pub struct EncodeError {
    method fmt (line 309) | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
  constant MAX_NUM_THREADS (line 316) | const MAX_NUM_THREADS: usize = 128;
  type CoreBPE (line 320) | pub struct CoreBPE {
    method _get_tl_regex (line 331) | fn _get_tl_regex(&self) -> &Regex {
    method _get_tl_special_regex (line 338) | fn _get_tl_special_regex(&self) -> &Regex {
    method decode_bytes (line 345) | fn decode_bytes(&self, tokens: &[Rank]) -> Result<Vec<u8>, DecodeKeyEr...
    method encode_ordinary (line 360) | pub fn encode_ordinary(&self, text: &str) -> Vec<Rank> {
    method encode (line 375) | pub fn encode(
    method _increase_last_piece_token_len (line 444) | fn _increase_last_piece_token_len(
    method _encode_unstable_native (line 483) | pub fn _encode_unstable_native(
    method new (line 601) | pub fn new<E, SE, NSE>(
    method new_internal (line 618) | fn new_internal(
    method special_tokens (line 665) | pub fn special_tokens(&self) -> HashSet<&str> {
    method encode_with_special_tokens (line 672) | pub fn encode_with_special_tokens(&self, text: &str) -> Vec<Rank> {
  function setup_ranks (line 685) | fn setup_ranks() -> HashMap<Vec<u8>, Rank> {
  function test_simple_characters (line 690) | fn test_simple_characters() {
  function test_repeated_characters (line 697) | fn test_repeated_characters() {

FILE: src/py.rs
  method py_new (line 16) | fn py_new(
  method py_encode_ordinary (line 30) | fn py_encode_ordinary(&self, py: Python, text: &str) -> Vec<Rank> {
  method py_encode (line 35) | fn py_encode(
  method encode_to_tiktoken_buffer (line 51) | fn encode_to_tiktoken_buffer(
  method _encode_bytes (line 72) | fn _encode_bytes(&self, py: Python, bytes: &[u8]) -> Vec<Rank> {
  method py_encode_with_unstable (line 118) | fn py_encode_with_unstable(
  method encode_single_token (line 133) | fn encode_single_token(&self, piece: &[u8]) -> PyResult<Rank> {
  method encode_single_piece (line 145) | fn encode_single_piece(&self, piece: &[u8]) -> Vec<Rank> {
  method py_decode_bytes (line 157) | fn py_decode_bytes(&self, py: Python, tokens: Vec<Rank>) -> Result<Py<Py...
  method decode_single_token_bytes (line 164) | fn decode_single_token_bytes(&self, py: Python, token: Rank) -> PyResult...
  method token_byte_values (line 178) | fn token_byte_values(&self, py: Python) -> Vec<Py<PyBytes>> {
  type TiktokenBuffer (line 187) | struct TiktokenBuffer {
    method __getbuffer__ (line 194) | unsafe fn __getbuffer__(
    method __releasebuffer__ (line 240) | unsafe fn __releasebuffer__(&self, view: *mut pyo3::ffi::Py_buffer) {
  function _tiktoken (line 252) | fn _tiktoken(_py: Python, m: &Bound<PyModule>) -> PyResult<()> {

FILE: tests/test_encoding.py
  function test_simple (line 14) | def test_simple():
  function test_simple_repeated (line 31) | def test_simple_repeated():
  function test_large_repeated (line 52) | def test_large_repeated():
  function test_simple_regex (line 60) | def test_simple_regex():
  function test_basic_encode (line 69) | def test_basic_encode():
  function test_encode_empty (line 81) | def test_encode_empty():
  function test_encode_bytes (line 86) | def test_encode_bytes():
  function test_hyp_encode_bytes (line 97) | def test_hyp_encode_bytes(make_enc: Callable[[], tiktoken.Encoding], byt...
  function test_encode_surrogate_pairs (line 102) | def test_encode_surrogate_pairs():
  function test_catastrophically_repetitive (line 114) | def test_catastrophically_repetitive(make_enc: Callable[[], tiktoken.Enc...
  function test_basic_roundtrip (line 133) | def test_basic_roundtrip(make_enc):
  function test_hyp_roundtrip (line 152) | def test_hyp_roundtrip(make_enc: Callable[[], tiktoken.Encoding], text):
  function test_single_token_roundtrip (line 159) | def test_single_token_roundtrip(make_enc: Callable[[], tiktoken.Encoding]):
  function test_special_token (line 175) | def test_special_token():
  function test_hyp_special_ordinary (line 229) | def test_hyp_special_ordinary(make_enc, text: str):
  function test_batch_encode (line 240) | def test_batch_encode(make_enc: Callable[[], tiktoken.Encoding]):
  function test_hyp_batch_roundtrip (line 258) | def test_hyp_batch_roundtrip(make_enc: Callable[[], tiktoken.Encoding], ...

FILE: tests/test_misc.py
  function test_encoding_for_model (line 7) | def test_encoding_for_model():
  function test_optional_blobfile_dependency (line 24) | def test_optional_blobfile_dependency():

FILE: tests/test_offsets.py
  function _common_prefix_len (line 12) | def _common_prefix_len(a, b):
  function _token_offsets_reference (line 19) | def _token_offsets_reference(enc, tokens):
  function test_hyp_offsets (line 31) | def test_hyp_offsets(make_enc: Callable[[], tiktoken.Encoding], data):
  function test_basic_offsets (line 49) | def test_basic_offsets():

FILE: tests/test_pickle.py
  function test_pickle (line 4) | def test_pickle():

FILE: tests/test_simple_public.py
  function test_simple (line 7) | def test_simple():
  function test_encoding_for_model (line 25) | def test_encoding_for_model():
  function test_optional_blobfile_dependency (line 36) | def test_optional_blobfile_dependency():

FILE: tiktoken/_educational.py
  class SimpleBytePairEncoding (line 12) | class SimpleBytePairEncoding:
    method __init__ (line 13) | def __init__(self, *, pat_str: str, mergeable_ranks: dict[bytes, int])...
    method encode (line 23) | def encode(self, text: str, visualise: str | None = "colour") -> list[...
    method decode_bytes (line 39) | def decode_bytes(self, tokens: list[int]) -> bytes:
    method decode (line 47) | def decode(self, tokens: list[int]) -> str:
    method decode_tokens_bytes (line 58) | def decode_tokens_bytes(self, tokens: list[int]) -> list[bytes]:
    method train (line 69) | def train(training_data: str, vocab_size: int, pat_str: str):
    method from_tiktoken (line 75) | def from_tiktoken(encoding):
  function bpe_encode (line 83) | def bpe_encode(
  function bpe_train (line 119) | def bpe_train(
  function visualise_tokens (line 188) | def visualise_tokens(token_values: list[bytes]) -> None:
  function train_simple_encoding (line 208) | def train_simple_encoding():

FILE: tiktoken/core.py
  class Encoding (line 16) | class Encoding:
    method __init__ (line 17) | def __init__(
    method __repr__ (line 56) | def __repr__(self) -> str:
    method encode_ordinary (line 63) | def encode_ordinary(self, text: str) -> list[int]:
    method encode (line 79) | def encode(
    method encode_to_numpy (line 135) | def encode_to_numpy(
    method encode_ordinary_batch (line 161) | def encode_ordinary_batch(self, text: list[str], *, num_threads: int =...
    method encode_batch (line 175) | def encode_batch(
    method encode_with_unstable (line 205) | def encode_with_unstable(
    method encode_single_token (line 242) | def encode_single_token(self, text_or_bytes: str | bytes) -> int:
    method decode_bytes (line 262) | def decode_bytes(self, tokens: Sequence[int]) -> bytes:
    method decode (line 272) | def decode(self, tokens: Sequence[int], errors: str = "replace") -> str:
    method decode_single_token_bytes (line 286) | def decode_single_token_bytes(self, token: int) -> bytes:
    method decode_tokens_bytes (line 300) | def decode_tokens_bytes(self, tokens: Sequence[int]) -> list[bytes]:
    method decode_with_offsets (line 309) | def decode_with_offsets(self, tokens: Sequence[int]) -> tuple[str, lis...
    method decode_batch (line 334) | def decode_batch(
    method decode_bytes_batch (line 342) | def decode_bytes_batch(
    method token_byte_values (line 353) | def token_byte_values(self) -> list[bytes]:
    method eot_token (line 358) | def eot_token(self) -> int:
    method special_tokens_set (line 362) | def special_tokens_set(self) -> set[str]:
    method is_special_token (line 365) | def is_special_token(self, token: int) -> bool:
    method n_vocab (line 370) | def n_vocab(self) -> int:
    method _encode_single_piece (line 378) | def _encode_single_piece(self, text_or_bytes: str | bytes) -> list[int]:
    method _encode_only_native_bpe (line 392) | def _encode_only_native_bpe(self, text: str) -> list[int]:
    method _encode_bytes (line 403) | def _encode_bytes(self, text: bytes) -> list[int]:
    method __getstate__ (line 406) | def __getstate__(self) -> object:
    method __setstate__ (line 419) | def __setstate__(self, value: object) -> None:
  function _special_token_regex (line 429) | def _special_token_regex(tokens: frozenset[str]) -> re.Pattern[str]:
  function raise_disallowed_special_token (line 438) | def raise_disallowed_special_token(token: str) -> NoReturn:

FILE: tiktoken/load.py
  function read_file (line 8) | def read_file(blobpath: str) -> bytes:
  function check_hash (line 30) | def check_hash(data: bytes, expected_hash: str) -> bool:
  function read_file_cached (line 35) | def read_file_cached(blobpath: str, expected_hash: str | None = None) ->...
  function data_gym_to_mergeable_bpe_ranks (line 89) | def data_gym_to_mergeable_bpe_ranks(
  function dump_tiktoken_bpe (line 147) | def dump_tiktoken_bpe(bpe_ranks: dict[bytes, int], tiktoken_bpe_file: st...
  function load_tiktoken_bpe (line 159) | def load_tiktoken_bpe(tiktoken_bpe_file: str, expected_hash: str | None ...

FILE: tiktoken/model.py
  function encoding_name_for_model (line 88) | def encoding_name_for_model(model_name: str) -> str:
  function encoding_for_model (line 113) | def encoding_for_model(model_name: str) -> Encoding:

FILE: tiktoken/registry.py
  function _available_plugin_modules (line 20) | def _available_plugin_modules() -> Sequence[str]:
  function _find_constructors (line 33) | def _find_constructors() -> None:
  function get_encoding (line 63) | def get_encoding(encoding_name: str) -> Encoding:
  function list_encoding_names (line 91) | def list_encoding_names() -> list[str]:

FILE: tiktoken_ext/openai_public.py
  function gpt2 (line 17) | def gpt2():
  function r50k_base (line 33) | def r50k_base():
  function p50k_base (line 47) | def p50k_base():
  function p50k_edit (line 61) | def p50k_edit():
  function cl100k_base (line 75) | def cl100k_base():
  function o200k_base (line 95) | def o200k_base():
  function o200k_harmony (line 123) | def o200k_harmony():