SYMBOL INDEX (43 symbols across 6 files) FILE: evaluator.py class RewardEvaluator (line 11) | class RewardEvaluator(ABC): method compute_rewards (line 25) | def compute_rewards( method get_reward_breakdown (line 52) | def get_reward_breakdown(self, reward_scores: torch.Tensor) -> Dict[st... function get_evaluator (line 65) | def get_evaluator(name: str) -> RewardEvaluator: class GSM8kEvaluator (line 85) | class GSM8kEvaluator(RewardEvaluator): method __init__ (line 96) | def __init__(self): method _extract_xml_answer (line 99) | def _extract_xml_answer(self, text: str) -> str: method _correctness_reward (line 105) | def _correctness_reward(self, prompts, completions, answer) -> List[fl... method _int_format_reward (line 111) | def _int_format_reward(self, completions) -> List[float]: method _strict_format_reward (line 117) | def _strict_format_reward(self, completions) -> List[float]: method _soft_format_reward (line 124) | def _soft_format_reward(self, completions) -> List[float]: method _xml_count_reward (line 131) | def _xml_count_reward(self, completions) -> List[float]: method compute_rewards (line 148) | def compute_rewards( method get_reward_breakdown (line 193) | def get_reward_breakdown(self, reward_scores: torch.Tensor) -> Dict[st... FILE: llms.py function get_llm_tokenizer (line 9) | def get_llm_tokenizer(model_name: str, device: str) -> tuple[PreTrainedM... FILE: main.py function eval_on_test_set (line 17) | def eval_on_test_set( function generate_completions (line 111) | def generate_completions( function score_completions (line 191) | def score_completions( function compute_loss (line 270) | def compute_loss( function grpo_loss (line 328) | def grpo_loss( function parse_args (line 387) | def parse_args(): function get_lr (line 481) | def get_lr(step): FILE: plotter.py function moving_average (line 9) | def moving_average(data, window_size=5): function plot_metrics (line 14) | def plot_metrics(output_dir): FILE: rldatasets.py class DataLoader (line 15) | class DataLoader(ABC): method __init__ (line 28) | def __init__(self, random: bool = False) -> None: method __len__ (line 33) | def __len__(self) -> int: method __iter__ (line 38) | def __iter__(self) -> 'DataLoader': method __next__ (line 43) | def __next__(self) -> Any: function extract_hash_answer (line 48) | def extract_hash_answer(text: str) -> str | None: class GSM8KLoader (line 67) | class GSM8KLoader(DataLoader): method __init__ (line 82) | def __init__(self, questions: list[str], answers: list[str], random: b... method __len__ (line 103) | def __len__(self) -> int: method __iter__ (line 106) | def __iter__(self) -> 'GSM8KLoader': method __next__ (line 109) | def __next__(self) -> tuple[str, str]: method reset (line 121) | def reset(self): function build_gsm8k_dataloaders (line 125) | def build_gsm8k_dataloaders() -> Tuple[GSM8KLoader, GSM8KLoader]: function get_dataloaders (line 168) | def get_dataloaders(dataset_name: str) -> Tuple[DataLoader, DataLoader]: FILE: utils.py function clean_spaces_preserve_newlines (line 14) | def clean_spaces_preserve_newlines(text): function seed_everything (line 22) | def seed_everything(seed: int) -> None: function write_generation_log (line 44) | def write_generation_log(log_data: Dict[str, Any], log_file: str) -> None: function selective_log_softmax (line 79) | def selective_log_softmax(logits, index): function get_per_token_logps (line 113) | def get_per_token_logps(model, input_ids, attention_mask, logits_to_keep):