SYMBOL INDEX (160 symbols across 22 files) FILE: Tokenizer_C#/PerfBenchmark/Program.cs class Tokenization (line 8) | public class Tokenization method Tokenization (line 14) | public Tokenization() method Encode (line 31) | [Benchmark] method GlobalSetup (line 34) | [GlobalSetup] class Program (line 41) | public class Program method Main (line 43) | public static void Main(string[] args) FILE: Tokenizer_C#/Tokenizer/Program.cs class Program (line 7) | internal class Program method Main (line 14) | static async Task Main(string[] args) FILE: Tokenizer_C#/TokenizerLib/ITokenizer.cs type ITokenizer (line 7) | public interface ITokenizer method Encode (line 12) | public List Encode(string text, IReadOnlyCollection allow... method EncodeTrimSuffix (line 17) | public (List TokenIds, string Text) EncodeTrimSuffix(string text,... method EncodeTrimPrefix (line 23) | public (List TokenIds, string Text) EncodeTrimPrefix(string text,... method Encode (line 28) | public List Encode(string text, bool applySpecialTokens = true); method EncodeTrimSuffix (line 33) | public (List TokenIds, string Text) EncodeTrimSuffix(string text,... method EncodeTrimPrefix (line 39) | public (List TokenIds, string Text) EncodeTrimPrefix(string text,... method Decode (line 45) | public string Decode(int[] tokens); FILE: Tokenizer_C#/TokenizerLib/TikTokenizer.cs class TikTokenizer (line 20) | public class TikTokenizer : ITokenizer method TikTokenizer (line 48) | public TikTokenizer(IReadOnlyDictionary encoder, IReadOnl... method TikTokenizer (line 60) | public TikTokenizer(Stream tikTokenBpeFileStream, IReadOnlyDictionary<... method Init (line 74) | private void Init(IReadOnlyDictionary encoder, IReadOnlyD... method LoadTikTokenBpe (line 99) | private Dictionary LoadTikTokenBpe(Stream tikTokenBpeFile... method EncodeInternal (line 141) | private List EncodeInternal(string text, IReadOnlyCollection Encode(string text, IReadOnlyCollection allow... method Encode (line 193) | public List Encode(string text, bool applySpecialTokens = true) method EncodeSpecialToken (line 215) | private int EncodeSpecialToken(List tokenIds, Match nextSpecial) method FindNextSpecialToken (line 230) | private void FindNextSpecialToken(string text, IReadOnlyCollection tokenIds, int start, int end) method EncodeTrimSuffix (line 288) | private (int TokenCount, int EncodeLength) EncodeTrimSuffix(string tex... method EncodeTrimSuffixInternal (line 344) | private (List TokenIds, string Text) EncodeTrimSuffixInternal(str... method EncodeTrimSuffix (line 402) | public (List TokenIds, string Text) EncodeTrimSuffix(string text,... method EncodeTrimSuffix (line 420) | public (List TokenIds, string Text) EncodeTrimSuffix(string text,... method EncodeTrimPrefixInternal (line 437) | private (List TokenIds, string Text) EncodeTrimPrefixInternal(str... method TrimPrefix (line 479) | private static (List TokenIds, string Text) TrimPrefix(string tex... method Encode (line 502) | private void Encode(string text, List tokenIds, int start, ref in... method EncodeTrimPrefix (line 546) | public (List TokenIds, string Text) EncodeTrimPrefix(string text,... method EncodeTrimPrefix (line 562) | public (List TokenIds, string Text) EncodeTrimPrefix(string text,... method Decode (line 586) | public string Decode(int[] tokens) FILE: Tokenizer_C#/TokenizerLib/TokenizerBuilder.cs class TokenizerBuilder (line 14) | public static class TokenizerBuilder method CreateByModelNameAsync (line 82) | public static async Task CreateByModelNameAsync(string mod... method CreateByEncoderNameAsync (line 107) | public static async Task CreateByEncoderNameAsync(string e... method CreateTokenizerAsync (line 193) | private static async Task CreateTokenizerAsync(string rege... method CreateTokenizer (line 210) | public static ITokenizer CreateTokenizer(Stream tikTokenBpeFileStream,... FILE: Tokenizer_C#/TokenizerLib/Utils/BytePairComparer.cs class ByteArrayComparer (line 8) | internal class ByteArrayComparer : IEqualityComparer method Equals (line 10) | public bool Equals(byte[] x, byte[] y) method GetHashCode (line 30) | public int GetHashCode(byte[] bytes) FILE: Tokenizer_C#/TokenizerLib/Utils/BytePairEncoder.cs class BytePairEncoder (line 10) | internal class BytePairEncoder method BytePairEncode (line 13) | public static List BytePairEncode(byte[] mergingBytes, IReadOnlyD... FILE: Tokenizer_C#/TokenizerLib/Utils/LRUCache.cs class LruCache (line 7) | internal class LruCache class CacheItem (line 16) | private class CacheItem method CacheItem (line 21) | public CacheItem(TKey key, TValue value) method LruCache (line 41) | public LruCache(int cacheSize = DefaultCacheSize) method Lookup (line 59) | public bool Lookup(TKey key, out TValue value) method OnEviction (line 77) | protected virtual void OnEviction(TValue evictedValue) { } method EvictIfNeeded (line 79) | private void EvictIfNeeded() method Add (line 95) | public void Add(TKey key, TValue value) => Replace(key, value, out _); method Replace (line 97) | public bool Replace(TKey key, TValue value, out TValue oldValue) method ReplaceInternal (line 105) | private bool ReplaceInternal(TKey key, TValue value, out TValue oldValue) method Clear (line 131) | public void Clear() FILE: Tokenizer_C#/TokenizerLib/Utils/Range.cs type Index (line 18) | internal readonly struct Index : IEquatable method Index (line 28) | [MethodImpl(MethodImplOptions.AggressiveInlining)] method Index (line 43) | private Index(int value) method FromStart (line 56) | [MethodImpl(MethodImplOptions.AggressiveInlining)] method FromEnd (line 69) | [MethodImpl(MethodImplOptions.AggressiveInlining)] method GetOffset (line 107) | [MethodImpl(MethodImplOptions.AggressiveInlining)] method Equals (line 120) | public override bool Equals(object? value) => value is Index && _value... method Equals (line 124) | public bool Equals(Index other) => _value == other._value; method GetHashCode (line 127) | public override int GetHashCode() => _value; method ToString (line 133) | public override string ToString() type Range (line 151) | internal readonly struct Range : IEquatable method Range (line 162) | public Range(Index start, Index end) method Equals (line 170) | public override bool Equals(object? value) => method Equals (line 177) | public bool Equals(Range other) => other.Start.Equals(Start) && other.... method GetHashCode (line 180) | public override int GetHashCode() method ToString (line 186) | public override string ToString() method StartAt (line 192) | public static Range StartAt(Index start) => new Range(start, Index.End); method EndAt (line 195) | public static Range EndAt(Index end) => new Range(Index.Start, end); method GetOffsetAndLength (line 207) | [MethodImpl(MethodImplOptions.AggressiveInlining)] class RuntimeHelpers (line 236) | internal static class RuntimeHelpers method GetSubArray (line 241) | public static T[] GetSubArray(T[] array, Range range) FILE: Tokenizer_C#/TokenizerTest/TikTokenizerUnitTest.cs class TikTokenizerUnitTest (line 12) | [TestClass] method TikTokenizerUnitTestInitialize (line 29) | [TestInitialize] method TestEncode0 (line 39) | [TestMethod] method TestEncode1 (line 52) | [TestMethod] method TestEncode2 (line 66) | [TestMethod] method TestEncode3 (line 89) | [TestMethod] method TestEncode4 (line 103) | [TestMethod] method TestEncode5 (line 112) | [TestMethod] method TestEncodeTrimSuffix (line 128) | [TestMethod] method TestEncodeTrimSuffix2 (line 156) | [TestMethod] method TestEncodeTrimPrefix (line 178) | [TestMethod] method TestEncodeTrimPrefix2 (line 207) | [TestMethod] method TestEncodeGpt2 (line 227) | [TestMethod] method TestEncodeP50kbase (line 247) | [TestMethod] method TestEncodeP50kedit (line 267) | [TestMethod] method TestEncodeR50kbase (line 287) | [TestMethod] FILE: tokenizer_ts/perf/benchmark-folder.js function readAllFilesInFolder (line 13) | async function readAllFilesInFolder(folderPath) { FILE: tokenizer_ts/src/bytePairEncode.ts type Constant (line 4) | const enum Constant { class BinaryMap (line 33) | class BinaryMap { method get (line 37) | public get(key: Uint8Array, start: number = 0, end: number = key.lengt... method set (line 47) | public set(key: Uint8Array, value: V): void { function bytePairEncode (line 75) | function bytePairEncode( FILE: tokenizer_ts/src/lru.ts type ILRUCache (line 1) | interface ILRUCache { class LRUCache (line 14) | class LRUCache { method constructor (line 19) | constructor(public readonly size: number) {} method get (line 21) | public get(key: string): T | undefined { method set (line 30) | public set(key: string, value: T): void { method moveToHead (line 46) | private moveToHead(node: Node): void { method addNode (line 53) | private addNode(node: Node): void { method removeNode (line 64) | private removeNode(node: Node): void { class Node (line 79) | class Node { method constructor (line 83) | constructor(public key: string, public value: T) {} FILE: tokenizer_ts/src/textEncoder.ts type ITextEncoder (line 7) | interface ITextEncoder { class UniversalTextEncoder (line 22) | class UniversalTextEncoder implements ITextEncoder { method encode (line 26) | public encode(text: string): Uint8Array { class NodeTextEncoder (line 33) | class NodeTextEncoder implements ITextEncoder { method encode (line 37) | public encode(text: string): Uint8Array { FILE: tokenizer_ts/src/tikTokenizer.ts function loadTikTokenBpe (line 13) | function loadTikTokenBpe(tikTokenBpeFile: string): Map = new Map([ constant MODEL_TO_ENCODING (line 14) | const MODEL_TO_ENCODING: ReadonlyMap = new Map([ constant ENDOFTEXT (line 57) | const ENDOFTEXT: string = "<|endoftext|>"; constant FIM_PREFIX (line 58) | const FIM_PREFIX: string = "<|fim_prefix|>"; constant FIM_MIDDLE (line 59) | const FIM_MIDDLE: string = "<|fim_middle|>"; constant FIM_SUFFIX (line 60) | const FIM_SUFFIX: string = "<|fim_suffix|>"; constant ENDOFPROMPT (line 61) | const ENDOFPROMPT: string = "<|endofprompt|>"; constant REGEX_PATTERN_1 (line 66) | const REGEX_PATTERN_1: string = constant REGEX_PATTERN_2 (line 72) | const REGEX_PATTERN_2: string = constant REGEX_PATTERN_3 (line 89) | const REGEX_PATTERN_3: string = patterns.join("|"); function getEncoderFromModelName (line 91) | function getEncoderFromModelName(modelName: string): string { function fetchAndSaveFile (line 106) | async function fetchAndSaveFile( function getSpecialTokensByEncoder (line 128) | function getSpecialTokensByEncoder( function getSpecialTokensByModel (line 167) | function getSpecialTokensByModel( function getRegexByEncoder (line 182) | function getRegexByEncoder(encoder: string): string { function getRegexByModel (line 199) | function getRegexByModel(modelName: string): string { function createByModelName (line 210) | async function createByModelName( function createByEncoderName (line 226) | async function createByEncoderName( function createTokenizer (line 296) | function createTokenizer( FILE: tokenizer_ts/test/tikTokenizer.test.ts constant IM_START (line 9) | const IM_START = "<|im_start|>"; constant IM_END (line 10) | const IM_END = "<|im_end|>"; FILE: tokenizer_ts/test/tikTokenizerGpt2.test.ts constant IM_START (line 9) | const IM_START = "<|im_start|>"; constant IM_END (line 10) | const IM_END = "<|im_end|>"; FILE: tokenizer_ts/test/tikTokenizerGpt4o.test.ts constant ENDOFTEXT (line 9) | const ENDOFTEXT: string = "<|endoftext|>"; constant ENDOFPROMPT (line 10) | const ENDOFPROMPT: string = "<|endofprompt|>"; FILE: tokenizer_ts/test/tikTokenizer_p50k_base.test.ts constant IM_START (line 9) | const IM_START = "<|im_start|>"; constant IM_END (line 10) | const IM_END = "<|im_end|>"; FILE: tokenizer_ts/test/tikTokenizer_p50k_edit.test.ts constant IM_START (line 9) | const IM_START = "<|im_start|>"; constant IM_END (line 10) | const IM_END = "<|im_end|>"; FILE: tokenizer_ts/test/tikTokenizer_r50k_base.test.ts constant IM_START (line 9) | const IM_START = "<|im_start|>"; constant IM_END (line 10) | const IM_END = "<|im_end|>";