SYMBOL INDEX (157 symbols across 21 files) FILE: Extra-Chapter/generation-method/llm_generation.py function test_decoding_strategies (line 4) | def test_decoding_strategies(): function test_original_generation (line 117) | def test_original_generation(): FILE: Extra-Chapter/s1-vllm-thinking-budget/s1.py function build_input (line 5) | def build_input(prompt, tokenizer): function count_thinking_token (line 18) | def count_thinking_token(outputs, tokenizer): function count_token (line 24) | def count_token(string, tokenizer): function run_thinking_budget_sample (line 28) | def run_thinking_budget_sample(llm_model, tokenizer, user_input, thinkin... function run_sample (line 87) | def run_sample(llm_model, tokenizer, user_input): FILE: docs/chapter2/code/transformer.py class ModelArgs (line 9) | class ModelArgs: class MultiHeadAttention (line 21) | class MultiHeadAttention(nn.Module): method __init__ (line 23) | def __init__(self, args: ModelArgs, is_causal=False): method forward (line 55) | def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor): class LayerNorm (line 100) | class LayerNorm(nn.Module): method __init__ (line 102) | def __init__(self, features, eps=1e-6): method forward (line 109) | def forward(self, x): class MLP (line 116) | class MLP(nn.Module): method __init__ (line 118) | def __init__(self, dim: int, hidden_dim: int, dropout: float): method forward (line 127) | def forward(self, x): class EncoderLayer (line 134) | class EncoderLayer(nn.Module): method __init__ (line 135) | def __init__(self, args): method forward (line 144) | def forward(self, x): class Encoder (line 153) | class Encoder(nn.Module): method __init__ (line 155) | def __init__(self, args): method forward (line 161) | def forward(self, x): class DecoderLayer (line 167) | class DecoderLayer(nn.Module): method __init__ (line 169) | def __init__(self, args): method forward (line 182) | def forward(self, x, enc_out): class Decoder (line 194) | class Decoder(nn.Module): method __init__ (line 196) | def __init__(self, args): method forward (line 202) | def forward(self, x, enc_out): class PositionalEncoding (line 208) | class PositionalEncoding(nn.Module): method __init__ (line 211) | def __init__(self, args): method forward (line 229) | def forward(self, x): class Transformer (line 235) | class Transformer(nn.Module): method __init__ (line 238) | def __init__(self, args): method get_num_params (line 262) | def get_num_params(self, non_embedding=False): method _init_weights (line 272) | def _init_weights(self, module): method forward (line 283) | def forward(self, idx, targets=None): function main (line 322) | def main(): FILE: docs/chapter5/code/dataset.py class PretrainDataset (line 10) | class PretrainDataset(Dataset): method __init__ (line 11) | def __init__(self, data_path, tokenizer, max_length=512): method __len__ (line 25) | def __len__(self): method __getitem__ (line 28) | def __getitem__(self, index: int): class SFTDataset (line 48) | class SFTDataset(Dataset): method __init__ (line 49) | def __init__(self, data_path, tokenizer, max_length=512): method __len__ (line 62) | def __len__(self): method generate_loss_mask (line 65) | def generate_loss_mask(self, input_ids): method __getitem__ (line 101) | def __getitem__(self, index: int): FILE: docs/chapter5/code/ddp_pretrain.py function Logger (line 25) | def Logger(content): function get_lr (line 34) | def get_lr(it, all): function train_epoch (line 68) | def train_epoch(epoch): function init_model (line 171) | def init_model(): FILE: docs/chapter5/code/ddp_sft_full.py function Logger (line 24) | def Logger(content): function get_lr (line 28) | def get_lr(it, all): function train_epoch (line 51) | def train_epoch(epoch): function init_model (line 122) | def init_model(): FILE: docs/chapter5/code/deal_dataset.py function split_text (line 14) | def split_text(text, chunk_size=512): function convert_message (line 29) | def convert_message(data): FILE: docs/chapter5/code/export_model.py function count_parameters (line 9) | def count_parameters(model): function export_model (line 13) | def export_model(tokenizer_path, model_config, model_ckpt_path, save_dir... FILE: docs/chapter5/code/k_model.py class ModelConfig (line 14) | class ModelConfig(PretrainedConfig): method __init__ (line 16) | def __init__( class RMSNorm (line 46) | class RMSNorm(nn.Module): method __init__ (line 47) | def __init__(self, dim: int, eps: float): method _norm (line 54) | def _norm(self, x): method forward (line 61) | def forward(self, x): function precompute_freqs_cis (line 70) | def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0): function reshape_for_broadcast (line 85) | def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor): function apply_rotary_emb (line 97) | def apply_rotary_emb( function repeat_kv (line 124) | def repeat_kv(x: torch.Tensor, n_rep: int) -> torch.Tensor: class Attention (line 139) | class Attention(nn.Module): method __init__ (line 140) | def __init__(self, args: ModelConfig): method forward (line 182) | def forward(self, x: torch.Tensor, freqs_cos: torch.Tensor, freqs_sin:... class MLP (line 250) | class MLP(nn.Module): method __init__ (line 251) | def __init__(self, dim: int, hidden_dim: int, multiple_of: int, dropou... method forward (line 268) | def forward(self, x): class DecoderLayer (line 276) | class DecoderLayer(nn.Module): method __init__ (line 277) | def __init__(self, layer_id: int, args: ModelConfig): method forward (line 301) | def forward(self, x, freqs_cos, freqs_sin, attention_mask: Optional[to... class Transformer (line 309) | class Transformer(PreTrainedModel): method __init__ (line 313) | def __init__(self, args: ModelConfig = None): method _init_weights (line 355) | def _init_weights(self, module): method _prepare_attention_mask (line 364) | def _prepare_attention_mask(self, attention_mask: Optional[torch.Tenso... method _left_pad_by_attention_mask (line 378) | def _left_pad_by_attention_mask( method forward (line 403) | def forward(self, tokens: torch.Tensor, targets: Optional[torch.Tensor... method generate (line 463) | def generate( method _greedy_decode (line 527) | def _greedy_decode(self, logits: torch.Tensor) -> torch.Tensor: method _random_sample (line 540) | def _random_sample(self, logits: torch.Tensor, temperature: float = 1.... method _beam_search (line 566) | def _beam_search(self, idx: torch.Tensor, max_new_tokens: int, num_bea... method generate_super (line 677) | def generate_super(self, FILE: docs/chapter5/code/model_sample.py class TextGenerator (line 9) | class TextGenerator: method __init__ (line 10) | def __init__(self, method chat_template (line 62) | def chat_template(self, prompt): method sft_sample (line 69) | def sft_sample(self, method pretrain_sample (line 99) | def pretrain_sample(self, FILE: docs/chapter5/code/train_tokenizer.py function read_texts_from_jsonl (line 17) | def read_texts_from_jsonl(file_path: str) -> Generator[str, None, None]: function create_tokenizer_config (line 33) | def create_tokenizer_config(save_dir: str) -> None: function train_tokenizer (line 77) | def train_tokenizer(data_path: str, save_dir: str, vocab_size: int = 819... function eval_tokenizer (line 128) | def eval_tokenizer(tokenizer_path: str) -> None: function main (line 174) | def main(): FILE: docs/chapter6/code/finetune.py class ModelArguments (line 45) | class ModelArguments: class DataTrainingArguments (line 70) | class DataTrainingArguments: function preprocess (line 87) | def preprocess(sources, tokenizer, max_len, system_message: str = "You a... class SupervisedDataset (line 157) | class SupervisedDataset(Dataset): method __init__ (line 159) | def __init__(self, raw_data, tokenizer, max_len: int): method __len__ (line 169) | def __len__(self): method __getitem__ (line 172) | def __getitem__(self, i) -> Dict[str, torch.Tensor]: function main (line 180) | def main(): FILE: docs/chapter6/code/pretrain.py class ModelArguments (line 41) | class ModelArguments: class DataTrainingArguments (line 72) | class DataTrainingArguments: function main (line 92) | def main(): FILE: docs/chapter7/Agent/src/core.py class Agent (line 14) | class Agent: method __init__ (line 15) | def __init__(self, client: OpenAI, model: str = "Qwen/Qwen2.5-32B-Inst... method get_tool_schema (line 24) | def get_tool_schema(self) -> List[Dict[str, Any]]: method handle_tool_call (line 28) | def handle_tool_call(self, tool_call): method get_completion (line 42) | def get_completion(self, prompt) -> str: FILE: docs/chapter7/Agent/src/tools.py function get_current_datetime (line 6) | def get_current_datetime() -> str: function add (line 15) | def add(a: float, b: float): function mul (line 24) | def mul(a: float, b: float): function compare (line 33) | def compare(a: float, b: float): function count_letter_in_string (line 47) | def count_letter_in_string(a: str, b: str): function search_wikipedia (line 60) | def search_wikipedia(query: str) -> str: function get_current_temperature (line 84) | def get_current_temperature(latitude: float, longitude: float) -> str: FILE: docs/chapter7/Agent/src/utils.py function function_to_json (line 5) | def function_to_json(func) -> dict: FILE: docs/chapter7/Agent/web_demo.py function load_agent (line 22) | def load_agent(): FILE: docs/chapter7/RAG/Embeddings.py class BaseEmbeddings (line 21) | class BaseEmbeddings: method __init__ (line 25) | def __init__(self, path: str, is_api: bool) -> None: method get_embedding (line 35) | def get_embedding(self, text: str, model: str) -> List[float]: method cosine_similarity (line 49) | def cosine_similarity(cls, vector1: List[float], vector2: List[float])... class OpenAIEmbedding (line 82) | class OpenAIEmbedding(BaseEmbeddings): method __init__ (line 86) | def __init__(self, path: str = '', is_api: bool = True) -> None: method get_embedding (line 95) | def get_embedding(self, text: str, model: str = "BAAI/bge-m3") -> List... FILE: docs/chapter7/RAG/LLM.py class BaseModel (line 29) | class BaseModel: method __init__ (line 30) | def __init__(self, model) -> None: method chat (line 33) | def chat(self, prompt: str, history: List[dict], content: str) -> str: method load_model (line 36) | def load_model(self): class OpenAIChat (line 39) | class OpenAIChat(BaseModel): method __init__ (line 40) | def __init__(self, model: str = "Qwen/Qwen2.5-32B-Instruct") -> None: method chat (line 43) | def chat(self, prompt: str, history: List[dict], content: str) -> str: FILE: docs/chapter7/RAG/VectorBase.py class VectorStore (line 19) | class VectorStore: method __init__ (line 20) | def __init__(self, document: List[str] = ['']) -> None: method get_vector (line 23) | def get_vector(self, EmbeddingModel: BaseEmbeddings) -> List[List[floa... method persist (line 30) | def persist(self, path: str = 'storage'): method load_vector (line 39) | def load_vector(self, path: str = 'storage'): method get_similarity (line 45) | def get_similarity(self, vector1: List[float], vector2: List[float]) -... method query (line 48) | def query(self, query: str, EmbeddingModel: BaseEmbeddings, k: int = 1... FILE: docs/chapter7/RAG/utils.py class ReadFiles (line 25) | class ReadFiles: method __init__ (line 30) | def __init__(self, path: str) -> None: method get_files (line 34) | def get_files(self): method get_content (line 50) | def get_content(self, max_token_len: int = 600, cover_content: int = 1... method get_chunk (line 61) | def get_chunk(cls, text: str, max_token_len: int = 600, cover_content:... method read_file_content (line 136) | def read_file_content(cls, file_path: str): method read_pdf (line 148) | def read_pdf(cls, file_path: str): method read_markdown (line 158) | def read_markdown(cls, file_path: str): method read_text (line 171) | def read_text(cls, file_path: str): class Documents (line 177) | class Documents: method __init__ (line 181) | def __init__(self, path: str = '') -> None: method get_content (line 184) | def get_content(self):