SYMBOL INDEX (2242 symbols across 296 files)

FILE: .github/workflows/parse_yaml.py
  function get_first_key (line 6) | def get_first_key(file_path):

FILE: evals/api.py
  class CompletionResult (line 16) | class CompletionResult(ABC):
    method get_completions (line 18) | def get_completions(self) -> list[str]:
  class CompletionFn (line 23) | class CompletionFn(Protocol):
    method __call__ (line 24) | def __call__(
  class DummyCompletionResult (line 43) | class DummyCompletionResult(CompletionResult):
    method get_completions (line 44) | def get_completions(self) -> list[str]:
  class DummyCompletionFn (line 48) | class DummyCompletionFn(CompletionFn):
    method __call__ (line 49) | def __call__(
  function record_and_check_match (line 55) | def record_and_check_match(

FILE: evals/base.py
  class CompletionFnSpec (line 18) | class CompletionFnSpec:
  class BaseEvalSpec (line 30) | class BaseEvalSpec:
  class EvalSpec (line 51) | class EvalSpec:
  class EvalSetSpec (line 64) | class EvalSetSpec:
  class RunSpec (line 75) | class RunSpec:
    method __post_init__ (line 85) | def __post_init__(self):

FILE: evals/cli/oaieval.py
  function _purple (line 21) | def _purple(str: str) -> str:
  function get_parser (line 25) | def get_parser() -> argparse.ArgumentParser:
  class OaiEvalArguments (line 96) | class OaiEvalArguments(argparse.Namespace):
  function run (line 118) | def run(args: OaiEvalArguments, registry: Optional[Registry] = None) -> ...
  function build_recorder (line 242) | def build_recorder(
  function add_token_usage_to_result (line 269) | def add_token_usage_to_result(result: dict[str, Any], recorder: Recorder...
  function main (line 297) | def main() -> None:

FILE: evals/cli/oaievalset.py
  class Progress (line 17) | class Progress:
    method __init__ (line 18) | def __init__(self, file: str) -> None:
    method load (line 22) | def load(self) -> bool:
    method add (line 31) | def add(self, item: Task) -> None:
    method save (line 35) | def save(self) -> None:
  function highlight (line 43) | def highlight(str: str) -> str:
  function get_parser (line 47) | def get_parser() -> argparse.ArgumentParser:
  class OaiEvalSetArguments (line 73) | class OaiEvalSetArguments(argparse.Namespace):
  function run (line 81) | def run(
  function main (line 134) | def main() -> None:

FILE: evals/completion_fns/cot.py
  class ChainOfThoughtCompletionResult (line 15) | class ChainOfThoughtCompletionResult(CompletionResult):
    method __init__ (line 16) | def __init__(self, response) -> None:
    method get_completions (line 19) | def get_completions(self) -> list[str]:
  class ChainOfThoughtCompletionFn (line 23) | class ChainOfThoughtCompletionFn(CompletionFn):
    method __init__ (line 24) | def __init__(
    method __call__ (line 49) | def __call__(self, prompt, **kwargs) -> ChainOfThoughtCompletionResult:

FILE: evals/completion_fns/langchain_llm.py
  class LangChainLLMCompletionResult (line 20) | class LangChainLLMCompletionResult(CompletionResult):
    method __init__ (line 21) | def __init__(self, response) -> None:
    method get_completions (line 24) | def get_completions(self) -> list[str]:
  class LangChainLLMCompletionFn (line 28) | class LangChainLLMCompletionFn(CompletionFn):
    method __init__ (line 29) | def __init__(self, llm: str, llm_kwargs: Optional[dict] = None, **kwar...
    method __call__ (line 42) | def __call__(self, prompt, **kwargs) -> LangChainLLMCompletionResult:
  function _convert_dict_to_langchain_message (line 49) | def _convert_dict_to_langchain_message(_dict) -> BaseMessage:
  class LangChainChatModelCompletionFn (line 68) | class LangChainChatModelCompletionFn(CompletionFn):
    method __init__ (line 69) | def __init__(self, llm: str, chat_model_kwargs: Optional[dict] = None,...
    method __call__ (line 82) | def __call__(self, prompt, **kwargs) -> LangChainLLMCompletionResult:

FILE: evals/completion_fns/langchain_math.py
  class LangChainCompletionResult (line 11) | class LangChainCompletionResult(CompletionResult):
    method __init__ (line 12) | def __init__(self, response) -> None:
    method get_completions (line 15) | def get_completions(self) -> list[str]:
  class LangChainMathChainCompletionFn (line 19) | class LangChainMathChainCompletionFn(CompletionFn):
    method __init__ (line 20) | def __init__(self, **kwargs) -> None:
    method __call__ (line 24) | def __call__(self, prompt, **kwargs) -> LangChainCompletionResult:

FILE: evals/completion_fns/openai.py
  function openai_completion_create_retrying (line 27) | def openai_completion_create_retrying(client: OpenAI, *args, **kwargs):
  function openai_chat_completion_create_retrying (line 41) | def openai_chat_completion_create_retrying(client: OpenAI, *args, **kwar...
  class OpenAIBaseCompletionResult (line 55) | class OpenAIBaseCompletionResult(CompletionResult):
    method __init__ (line 56) | def __init__(self, raw_data: Any, prompt: Any):
    method get_completions (line 60) | def get_completions(self) -> list[str]:
  class OpenAIChatCompletionResult (line 64) | class OpenAIChatCompletionResult(OpenAIBaseCompletionResult):
    method get_completions (line 65) | def get_completions(self) -> list[str]:
  class OpenAICompletionResult (line 74) | class OpenAICompletionResult(OpenAIBaseCompletionResult):
    method get_completions (line 75) | def get_completions(self) -> list[str]:
  class OpenAICompletionFn (line 83) | class OpenAICompletionFn(CompletionFn):
    method __init__ (line 84) | def __init__(
    method __call__ (line 99) | def __call__(
  class OpenAIChatCompletionFn (line 134) | class OpenAIChatCompletionFn(CompletionFnSpec):
    method __init__ (line 135) | def __init__(
    method __call__ (line 149) | def __call__(

FILE: evals/completion_fns/retrieval.py
  function load_embeddings (line 20) | def load_embeddings(embeddings_and_text_path: str):
  function find_top_k_closest_embeddings (line 28) | def find_top_k_closest_embeddings(embedded_prompt: list[float], embs: li...
  class RetrievalCompletionResult (line 45) | class RetrievalCompletionResult(CompletionResult):
    method __init__ (line 46) | def __init__(self, response: str) -> None:
    method get_completions (line 49) | def get_completions(self) -> list[str]:
  class RetrievalCompletionFn (line 53) | class RetrievalCompletionFn(CompletionFn):
    method __init__ (line 58) | def __init__(
    method __call__ (line 91) | def __call__(self, prompt: Union[str, list[dict]], **kwargs: Any) -> R...

FILE: evals/completion_fns/solver_completion_fn.py
  class SolverCompletionFnResult (line 10) | class SolverCompletionFnResult(CompletionResult):
    method __init__ (line 11) | def __init__(self, msg):
    method get_completions (line 14) | def get_completions(self):
  class SolverCompletionFn (line 18) | class SolverCompletionFn(CompletionFn):
    method __init__ (line 32) | def __init__(self, solver: Union[SolverSpec, Solver], registry: Any = ...
    method __call__ (line 38) | def __call__(

FILE: evals/data.py
  function gzip_open (line 25) | def gzip_open(filename: str, mode: str = "rb", openhook: Any = open) -> ...
  function lz4_open (line 33) | def lz4_open(filename: str, mode: str = "rb", openhook: Any = open) -> l...
  function zstd_open (line 40) | def zstd_open(filename: str, mode: str = "rb", openhook: Any = open) -> ...
  function open_by_file_pattern (line 47) | def open_by_file_pattern(filename: Union[str, Path], mode: str = "r", **...
  function _decode_json (line 82) | def _decode_json(line, path, line_number):
  function _get_jsonl_file (line 93) | def _get_jsonl_file(path):
  function _get_json_file (line 99) | def _get_json_file(path):
  function _stream_jsonl_file (line 105) | def _stream_jsonl_file(path) -> Iterator:
  function get_lines (line 112) | def get_lines(path) -> list[dict]:
  function get_jsonl (line 120) | def get_jsonl(path: str) -> list[dict]:
  function get_jsonls (line 136) | def get_jsonls(paths: Sequence[str], line_limit=None) -> list[dict]:
  function get_json (line 140) | def get_json(path) -> dict:
  function iter_jsonls (line 146) | def iter_jsonls(paths: Union[str, list[str]], line_limit=None) -> Iterat...
  function get_csv (line 168) | def get_csv(path, fieldnames=None):
  function _to_py_types (line 174) | def _to_py_types(o: Any, exclude_keys: List[Text]) -> Any:
  class EnhancedJSONEncoder (line 202) | class EnhancedJSONEncoder(json.JSONEncoder):
    method __init__ (line 203) | def __init__(self, exclude_keys: Optional[List[Text]] = None, **kwargs...
    method default (line 207) | def default(self, o: Any) -> str:
  function jsondumps (line 211) | def jsondumps(o: Any, ensure_ascii: bool = False, **kwargs: Any) -> str:
  function jsondump (line 221) | def jsondump(o: Any, fp: Any, ensure_ascii: bool = False, **kwargs: Any)...
  function jsonloads (line 225) | def jsonloads(s: str, **kwargs: Any) -> Any:
  function jsonload (line 229) | def jsonload(fp: Any, **kwargs: Any) -> Any:

FILE: evals/data_test.py
  class MyPydanticClass (line 9) | class MyPydanticClass(BaseModel):
  class MyDataClass (line 15) | class MyDataClass:
  function test_jsondumps (line 21) | def test_jsondumps():

FILE: evals/elsuite/already_said_that/distractors.py
  class DistractorSample (line 9) | class DistractorSample:
  function proc_which_is_heavier (line 23) | def proc_which_is_heavier(samples) -> list[DistractorSample]:
  function proc_distractors_first_letters (line 33) | def proc_distractors_first_letters(samples) -> list[DistractorSample]:
  function proc_distractors_ambiguous_sentences (line 42) | def proc_distractors_ambiguous_sentences(samples) -> list[DistractorSamp...
  function proc_distractors_reverse_sort_words_eng (line 52) | def proc_distractors_reverse_sort_words_eng(samples) -> list[DistractorS...
  function get_basic_distractor_example (line 72) | def get_basic_distractor_example() -> DistractorSample:
  function get_distractors (line 80) | def get_distractors(variant: str) -> list[DistractorSample]:
  function get_samples (line 97) | def get_samples(eval_name) -> list[dict]:
  function get_full_path (line 112) | def get_full_path(data_path, registry_path) -> Path:
  function get_distractor_word (line 119) | def get_distractor_word(question: str) -> str:

FILE: evals/elsuite/already_said_that/eval.py
  class AlreadySaidThat (line 14) | class AlreadySaidThat(SolverEval):
    method __init__ (line 15) | def __init__(
    method eval_sample (line 40) | def eval_sample(self, solver: Solver, sample: dict, rng: random.Random...
    method _conversation_loop (line 49) | def _conversation_loop(
    method run (line 118) | def run(self, recorder: RecorderBase):
    method _compute_agg_metrics (line 126) | def _compute_agg_metrics(self, logged_metrics: list[dict]) -> dict:
    method _get_samples (line 157) | def _get_samples(self) -> list[dict]:

FILE: evals/elsuite/already_said_that/scripts/gen_data.py
  function process_wordnet (line 11) | def process_wordnet() -> list[str]:
  function gen_sample (line 28) | def gen_sample(words_corpus: list[str], n_words, rng: random.Random) -> ...
  function gen_samples (line 33) | def gen_samples(n_samples: int, n_words: int, rng: random.Random) -> lis...
  function write_to_jsonl (line 42) | def write_to_jsonl(
  function main (line 51) | def main(args: argparse.Namespace):

FILE: evals/elsuite/already_said_that/scripts/make_plots.py
  function zero_if_none (line 13) | def zero_if_none(input_num):
  function make_results_dict (line 92) | def make_results_dict(log_dir: Path) -> dict:
  function prepare_results_dict (line 98) | def prepare_results_dict() -> dict:
  function fill_results_dict (line 118) | def fill_results_dict(results_dict: dict, log_dir: Path) -> dict:
  function get_model (line 145) | def get_model(spec):
  function make_bar_plot (line 167) | def make_bar_plot(results_dict: dict, stat: str, save_path: Path):
  function count_tokens (line 257) | def count_tokens(log_dir) -> dict[str, dict[str, dict[str, int]]]:
  function main (line 297) | def main(args: argparse.Namespace):

FILE: evals/elsuite/already_said_that/solvers.py
  class RandomBaselineSolver (line 8) | class RandomBaselineSolver(Solver):
    method __init__ (line 9) | def __init__(self, registry: Any = None):
    method _solve (line 12) | def _solve(self, task_state: TaskState, **kwargs) -> SolverResult:
  class AlreadySaidThatHuman (line 17) | class AlreadySaidThatHuman(NestedSolver):
    method __init__ (line 18) | def __init__(self, human_cli_solver: SolverSpec, *args, **kwargs):
    method human_cli_solver (line 22) | def human_cli_solver(self) -> Solver:
    method _solve (line 25) | def _solve(self, task_state: TaskState) -> SolverResult:
    method _map_to_yesno (line 32) | def _map_to_yesno(self, yesno_ish):

FILE: evals/elsuite/already_said_that/test_distractors.py
  function which_is_heavier_samples (line 12) | def which_is_heavier_samples():
  function first_letters_samples (line 38) | def first_letters_samples():
  function ambiguous_sentences_samples (line 70) | def ambiguous_sentences_samples():
  function reverse_sort_words_eng_samples (line 96) | def reverse_sort_words_eng_samples():
  function test_proc_distractors_which_is_heavier (line 127) | def test_proc_distractors_which_is_heavier(which_is_heavier_samples):
  function test_proc_distractors_first_letter (line 139) | def test_proc_distractors_first_letter(first_letters_samples):
  function test_proc_distractors_ambiguous_sentences (line 154) | def test_proc_distractors_ambiguous_sentences(ambiguous_sentences_samples):
  function test_proc_distractors_reverse_sort_words_eng (line 169) | def test_proc_distractors_reverse_sort_words_eng(reverse_sort_words_eng_...

FILE: evals/elsuite/already_said_that/utils.py
  function build_message (line 9) | def build_message(
  function build_base_task_message (line 50) | def build_base_task_message(
  function build_distractor_question_message (line 72) | def build_distractor_question_message(
  function find_alpha_words (line 81) | def find_alpha_words(s: str) -> list[str]:
  function parse_solver_output (line 92) | def parse_solver_output(
  function eval_distractor_task (line 151) | def eval_distractor_task(answer: str, ideal: str) -> bool:
  function eval_main_task (line 156) | def eval_main_task(answer, curr_word, words_prev_shown):

FILE: evals/elsuite/ballots/eval.py
  class BallotsEval (line 29) | class BallotsEval(Eval):
    method __init__ (line 30) | def __init__(
    method eval_sample (line 58) | def eval_sample(self, sample: Any, *_):
    method run (line 161) | def run(self, recorder):

FILE: evals/elsuite/ballots/scripts/make_plots.py
  function main (line 12) | def main():
  function load_bp_results_from_dir (line 24) | def load_bp_results_from_dir(log_dir: str) -> pd.DataFrame:
  function compute_bp_metrics_from_individual_results (line 47) | def compute_bp_metrics_from_individual_results(results):
  function parse_spec (line 73) | def parse_spec(spec: dict) -> tuple[str, str, int]:
  function make_plots (line 91) | def make_plots(df: pd.DataFrame, out_dir: Path):
  function make_yes_ratio_by_model_plot (line 105) | def make_yes_ratio_by_model_plot(df):
  function make_yes_ratio_by_target_plot (line 110) | def make_yes_ratio_by_target_plot(df):
  function make_success_rate_plot (line 116) | def make_success_rate_plot(df):
  function extract_vote_data_from_df (line 121) | def extract_vote_data_from_df(df):
  function _make_model_plot (line 159) | def _make_model_plot(bars_dict):
  function _make_target_plot (line 195) | def _make_target_plot(bars_dict):
  function _make_success_plot (line 255) | def _make_success_plot(bars_dict):

FILE: evals/elsuite/ballots/utils.py
  function toks_to_id (line 59) | def toks_to_id(model: str, toks: list[str], validation: Optional[list[st...
  function format_messages (line 72) | def format_messages(messages, *format_args, **format_kwargs):
  function format_prompt (line 83) | def format_prompt(prompt, *format_args, **format_kwargs):
  function get_influencer_prompt (line 91) | def get_influencer_prompt(model, direction):
  function get_voter_prompt (line 115) | def get_voter_prompt(model):
  function prompt_matches_model (line 122) | def prompt_matches_model(model, prompt):
  function reverse_roles (line 129) | def reverse_roles(messages):
  function chat_to_text (line 139) | def chat_to_text(messages):

FILE: evals/elsuite/basic/fuzzy_match.py
  class FuzzyMatch (line 9) | class FuzzyMatch(evals.Eval):
    method __init__ (line 10) | def __init__(
    method eval_sample (line 23) | def eval_sample(self, test_sample, rng):
    method run (line 53) | def run(self, recorder: RecorderBase):

FILE: evals/elsuite/basic/fuzzy_match_test.py
  function test_eval_sample (line 21) | def test_eval_sample(
  function test_eval_sample_raises (line 49) | def test_eval_sample_raises(sample: Any, expected_error: Type):

FILE: evals/elsuite/basic/includes.py
  class Includes (line 9) | class Includes(evals.Eval):
    method __init__ (line 10) | def __init__(
    method eval_sample (line 23) | def eval_sample(self, sample: Any, *_):
    method run (line 50) | def run(self, recorder):

FILE: evals/elsuite/basic/includes_test.py
  function test_eval_sample (line 23) | def test_eval_sample(
  function test_eval_sample_raises (line 55) | def test_eval_sample_raises(sample: Any, expected_error: Type):

FILE: evals/elsuite/basic/json_match.py
  function json_match (line 12) | def json_match(sampled_json: Any, correct_json: Any) -> bool:
  class JsonMatch (line 40) | class JsonMatch(evals.Eval):
    method __init__ (line 46) | def __init__(
    method eval_sample (line 59) | def eval_sample(self, sample: Any, rng: random.Random):
    method run (line 100) | def run(self, recorder: RecorderBase) -> Dict[str, float]:

FILE: evals/elsuite/basic/json_match_test.py
  function test_eval_sample (line 62) | def test_eval_sample(
  function test_eval_sample_raises (line 90) | def test_eval_sample_raises(sample: Any, expected_error: Type[Exception]...

FILE: evals/elsuite/basic/json_validator.py
  function is_valid_json (line 10) | def is_valid_json(s):
  class JsonValidator (line 18) | class JsonValidator(evals.Eval):
    method __init__ (line 19) | def __init__(
    method eval_sample (line 30) | def eval_sample(self, sample: Any, *_):
    method run (line 42) | def run(self, recorder):

FILE: evals/elsuite/basic/json_validator_test.py
  function test_eval_sample (line 20) | def test_eval_sample(
  function test_eval_sample_raises (line 46) | def test_eval_sample_raises(sample: Any, expected_error: Type):

FILE: evals/elsuite/basic/match.py
  class Match (line 9) | class Match(evals.Eval):
    method __init__ (line 10) | def __init__(
    method eval_sample (line 30) | def eval_sample(self, sample: Any, *_):
    method run (line 58) | def run(self, recorder):

FILE: evals/elsuite/basic/match_test.py
  function test_eval_sample (line 19) | def test_eval_sample(
  function test_eval_sample_2 (line 46) | def test_eval_sample_2(
  function test_eval_sample_raises (line 76) | def test_eval_sample_raises(sample: Any, expected_error: Type):

FILE: evals/elsuite/basic/match_with_solvers.py
  class MatchWithSolvers (line 15) | class MatchWithSolvers(SolverEval):
    method __init__ (line 16) | def __init__(
    method eval_sample (line 42) | def eval_sample(self, solver: Solver, sample: Any, *_):
    method run (line 68) | def run(self, recorder):

FILE: evals/elsuite/bluff/bluff/cards.py
  class PlayerCards (line 12) | class PlayerCards:
    method __init__ (line 13) | def __init__(self, cards: list[str]):
    method no_suit (line 21) | def no_suit(self):
    method lm_format (line 24) | def lm_format(self):
    method _suit_repr (line 34) | def _suit_repr(self, suit):
    method __repr__ (line 38) | def __repr__(self):
  function get_poker_hand (line 42) | def get_poker_hand(txt: str) -> "PokerHand":
  function get_bluff_move (line 59) | def get_bluff_move(txt: str) -> BluffMove:
  function get_all_hands (line 66) | def get_all_hands():
  function get_all_winning_hands (line 78) | def get_all_winning_hands(*in_cards: PlayerCards):
  class PokerHand (line 112) | class PokerHand:
    method __eq__ (line 113) | def __eq__(self, other):
    method __lt__ (line 116) | def __lt__(self, other):
    method __repr__ (line 125) | def __repr__(self):
    method evaluate (line 128) | def evaluate(self, *player_cards: PlayerCards) -> bool:
  class HighCard (line 140) | class HighCard(PokerHand):
    method __init__ (line 143) | def __init__(self, card: str):
    method cards (line 146) | def cards(self) -> str:
    method from_string (line 150) | def from_string(cls, txt):
    method all (line 155) | def all(self):
  class OnePair (line 159) | class OnePair(PokerHand):
    method __init__ (line 162) | def __init__(self, card: str):
    method cards (line 165) | def cards(self) -> str:
    method from_string (line 169) | def from_string(cls, txt):
    method all (line 174) | def all(cls):
  class TwoPair (line 178) | class TwoPair(PokerHand):
    method __init__ (line 181) | def __init__(self, card_1: str, card_2: str):
    method cards (line 191) | def cards(self) -> str:
    method from_string (line 195) | def from_string(cls, txt):
    method all (line 207) | def all(cls):
  class ThreeOfAKind (line 216) | class ThreeOfAKind(PokerHand):
    method __init__ (line 219) | def __init__(self, card: str):
    method cards (line 222) | def cards(self) -> str:
    method from_string (line 226) | def from_string(cls, txt):
    method all (line 231) | def all(cls):
  class FullHouse (line 235) | class FullHouse(PokerHand):
    method __init__ (line 238) | def __init__(self, card_triple: str, card_pair: str):
    method cards (line 244) | def cards(self) -> str:
    method from_string (line 248) | def from_string(cls, in_txt):
    method all (line 263) | def all(cls):
  class FourOfAKind (line 272) | class FourOfAKind(PokerHand):
    method __init__ (line 275) | def __init__(self, card: str):
    method cards (line 278) | def cards(self) -> str:
    method from_string (line 282) | def from_string(cls, txt):
    method all (line 287) | def all(cls):

FILE: evals/elsuite/bluff/bluff/game.py
  class Game (line 13) | class Game:
    method __init__ (line 14) | def __init__(
    method task_description (line 24) | def task_description(self):
    method play (line 27) | def play(self):
    method make_move (line 41) | def make_move(self, player: "Player", move: BluffMove) -> None:
    method add_player (line 45) | def add_player(self, player: "Player"):
    method player_cards (line 50) | def player_cards(self, player: "Player") -> str:
    method _deal_cards (line 54) | def _deal_cards(self):

FILE: evals/elsuite/bluff/bluff/players.py
  class Player (line 14) | class Player:
    method __init__ (line 15) | def __init__(self, game: Game):
    method cards (line 21) | def cards(self) -> PlayerCards:
    method make_move (line 24) | def make_move(self) -> None:
  class SimplestBot (line 28) | class SimplestBot(Player):
    method make_move (line 29) | def make_move(self):
  class HonestBotLowest (line 37) | class HonestBotLowest(Player):
    method make_move (line 38) | def make_move(self):
  class HonestBotHighest (line 51) | class HonestBotHighest(Player):
    method make_move (line 52) | def make_move(self):
  class StrongBot (line 61) | class StrongBot(Player):
    method make_move (line 81) | def make_move(self):
    method get_move (line 85) | def get_move(self):
    method _bluff_bid (line 93) | def _bluff_bid(self):
    method _honest_bid (line 101) | def _honest_bid(self):
    method _bid_higher_or_call_bluff (line 111) | def _bid_higher_or_call_bluff(self):
    method _get_winning_hand_probability (line 146) | def _get_winning_hand_probability(self) -> float:
    method _random_opp_hand (line 161) | def _random_opp_hand(self) -> PlayerCards:

FILE: evals/elsuite/bluff/bluff/round.py
  class BluffRound (line 6) | class BluffRound:
    method __init__ (line 9) | def __init__(self, player_1_cards: PlayerCards, player_2_cards: Player...
    method finished (line 15) | def finished(self) -> bool:
    method summary (line 18) | def summary(self) -> tuple[int, int, tuple[PlayerCards, PlayerCards]]:
    method make_move (line 23) | def make_move(self, player_ix: int, move: Union[BluffMove, str]):
    method _finalize (line 45) | def _finalize(self):

FILE: evals/elsuite/bluff/bluff/test_bluff_game.py
  function test_bluff_rules (line 21) | def test_bluff_rules(sequence, expected):

FILE: evals/elsuite/bluff/eval.py
  class BluffEval (line 25) | class BluffEval(SolverEval):
    method __init__ (line 26) | def __init__(
    method eval_sample (line 42) | def eval_sample(self, solver: Solver, sample_ix: int, rng: random.Rand...
    method _get_player_info (line 73) | def _get_player_info(self, player: Player) -> str:
    method run (line 79) | def run(self, recorder: evals.record.Recorder) -> dict[str, Union[floa...
    method _get_game_metrics (line 154) | def _get_game_metrics(self, game: Game) -> dict:
    method _create_opponent (line 166) | def _create_opponent(self, game: Game) -> Player:
    method _create_human_player (line 181) | def _create_human_player(game: Game) -> Player:
    method _create_solver_player (line 189) | def _create_solver_player(game: Game, solver_name: str) -> Player:
    method _create_bot_player (line 198) | def _create_bot_player(game: Game, module_and_class: str) -> Player:

FILE: evals/elsuite/bluff/scripts/make_plots.py
  function extract_results (line 11) | def extract_results(datadir: Path) -> tuple[pd.DataFrame, pd.DataFrame, ...
  function make_main_metric_plots (line 49) | def make_main_metric_plots(df: pd.DataFrame, palette: dict, outdir: Path...
  function _make_main_metric_plot (line 59) | def _make_main_metric_plot(df: pd.DataFrame, palette: dict, opponent: st...
  function make_per_round_plots (line 95) | def make_per_round_plots(df: pd.DataFrame, palette: dict, outdir: Path) ...
  function _make_per_round_plot (line 106) | def _make_per_round_plot(df: pd.DataFrame, palette: dict, opponent: str,...
  function main (line 125) | def main():

FILE: evals/elsuite/bluff/solver_player.py
  class SolverPlayer (line 18) | class SolverPlayer(Player):
    method __init__ (line 19) | def __init__(self, game: Game, solver: Solver, prompts_override: dict ...
    method make_move (line 29) | def make_move(self) -> None:
    method _request_bid (line 36) | def _request_bid(self) -> None:
    method _bid (line 51) | def _bid(self, num_attempts: int = 3) -> None:
    method _previous_round_summary (line 78) | def _previous_round_summary(self) -> None:
    method _get_response (line 95) | def _get_response(self) -> str:
    method _add_message (line 100) | def _add_message(self, role: str, content: str) -> None:

FILE: evals/elsuite/bluff/strategy_solver.py
  class BluffStrategySolver (line 12) | class BluffStrategySolver(Solver):
    method __init__ (line 13) | def __init__(
    method _generate_response (line 32) | def _generate_response(self, task_state: TaskState):
    method _solve (line 45) | def _solve(self, task_state: TaskState):
    method name (line 116) | def name(self) -> str:
    method _parse_response (line 119) | def _parse_response(self, response: str) -> str:

FILE: evals/elsuite/bugged_tools/bugged_tools.py
  function explicit_error (line 11) | def explicit_error(tool_task_state: ToolTaskState, non_bugged_func: Call...
  function offset_numeric (line 22) | def offset_numeric(correct_output: Union[int, float], offset: Union[int,...
  function small_offset (line 31) | def small_offset(tool_task_state: ToolTaskState, non_bugged_func: Callab...
  function large_offset (line 52) | def large_offset(tool_task_state: ToolTaskState, non_bugged_func: Callab...
  function random_output (line 75) | def random_output(tool_task_state: ToolTaskState, non_bugged_func: Calla...
  function incorrect_type (line 113) | def incorrect_type(tool_task_state: ToolTaskState, non_bugged_func: Call...

FILE: evals/elsuite/bugged_tools/eval.py
  class BuggedTools (line 29) | class BuggedTools(SolverEval):
    method __init__ (line 30) | def __init__(
    method eval_sample (line 65) | def eval_sample(self, solver: Solver, sample: Any, rng: random.Random):
    method run (line 112) | def run(self, recorder: evals.record.Recorder) -> dict[str, Union[floa...
    method _log_additional_metrics (line 152) | def _log_additional_metrics(self, metrics: Sequence[Event], results: d...
    method _get_tools (line 185) | def _get_tools(self, sample: Any) -> dict:
    method _find_flag_from_assistant (line 211) | def _find_flag_from_assistant(
    method _judge_find_bugs (line 226) | def _judge_find_bugs(self, all_messages: Sequence[Message]):
    method _solver_predicted_bug (line 247) | def _solver_predicted_bug(self, sample: Any, all_messages: Sequence[Me...
    method _solver_used_bugged_input (line 258) | def _solver_used_bugged_input(self, sample: Any, all_messages: Sequenc...

FILE: evals/elsuite/bugged_tools/scripts/plot_experiments.py
  function extract_results (line 11) | def extract_results(datadir: Path) -> pd.DataFrame:
  function plot_results (line 28) | def plot_results(df: pd.DataFrame, out_dir: Path, plot_horizontal: bool):
  function main (line 122) | def main():

FILE: evals/elsuite/bugged_tools/tools.py
  class ToolTaskState (line 11) | class ToolTaskState:
  class ToolResult (line 18) | class ToolResult:
  class Tool (line 22) | class Tool:
    method __call__ (line 24) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class BuggedTool (line 28) | class BuggedTool(Tool):
    method __init__ (line 29) | def __init__(self, tool: Callable, bugged_input: Sequence[int], bugged...
    method __call__ (line 37) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class Dummy (line 46) | class Dummy(Tool):
    method __call__ (line 51) | def __call__(self, _) -> ToolResult:
  class HalveRoundDown (line 55) | class HalveRoundDown(Tool):
    method __call__ (line 60) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class Double (line 70) | class Double(Tool):
    method __call__ (line 75) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class IsPrime (line 85) | class IsPrime(Tool):
    method __call__ (line 90) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class CalcSumDigits (line 104) | class CalcSumDigits(Tool):
    method __call__ (line 109) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class CollatzLength (line 119) | class CollatzLength(Tool):
    method __call__ (line 124) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class HammingDistance (line 143) | class HammingDistance(Tool):
    method __call__ (line 148) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class CountDivisors (line 162) | class CountDivisors(Tool):
    method __call__ (line 167) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class SumOfPalindromes (line 185) | class SumOfPalindromes(Tool):
    method __call__ (line 190) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class MaxPrimeFactor (line 200) | class MaxPrimeFactor(Tool):
    method __call__ (line 205) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class IsPronic (line 229) | class IsPronic(Tool):
    method __call__ (line 234) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class NonDivThreeSum (line 248) | class NonDivThreeSum(Tool):
    method __call__ (line 253) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class SequenceRearrange (line 263) | class SequenceRearrange(Tool):
    method __call__ (line 268) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class PrimeSummation (line 278) | class PrimeSummation(Tool):
    method __call__ (line 283) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class NthLucas (line 296) | class NthLucas(Tool):
    method __call__ (line 301) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class DecimalToBinary (line 313) | class DecimalToBinary(Tool):
    method __call__ (line 318) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class ParitySortDescending (line 329) | class ParitySortDescending(Tool):
    method __call__ (line 334) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class SumOfOddFibNumbers (line 347) | class SumOfOddFibNumbers(Tool):
    method __call__ (line 352) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class SumOfCubes (line 369) | class SumOfCubes(Tool):
    method __call__ (line 374) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class ProductOfDigitDifferences (line 384) | class ProductOfDigitDifferences(Tool):
    method __call__ (line 389) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class XORChecksum (line 403) | class XORChecksum(Tool):
    method __call__ (line 408) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class HammingWeight (line 420) | class HammingWeight(Tool):
    method __call__ (line 425) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class ReverseBinary (line 435) | class ReverseBinary(Tool):
    method __call__ (line 440) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class DigitProduct (line 450) | class DigitProduct(Tool):
    method __call__ (line 455) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class CalculateLongestRunOfOnes (line 467) | class CalculateLongestRunOfOnes(Tool):
    method __call__ (line 472) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class AlternatingSumDigits (line 483) | class AlternatingSumDigits(Tool):
    method __call__ (line 488) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class CircularShift (line 498) | class CircularShift(Tool):
    method __call__ (line 503) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class TrailingZerosInFactorial (line 519) | class TrailingZerosInFactorial(Tool):
    method __call__ (line 524) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class ReverseStr (line 540) | class ReverseStr(Tool):
    method __call__ (line 545) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class FindUniqueChars (line 552) | class FindUniqueChars(Tool):
    method __call__ (line 557) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class StringSort (line 567) | class StringSort(Tool):
    method __call__ (line 572) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class ReplaceVowelsWithSum (line 579) | class ReplaceVowelsWithSum(Tool):
    method __call__ (line 584) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class InterleaveChars (line 594) | class InterleaveChars(Tool):
    method __call__ (line 599) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:
  class RotateString (line 606) | class RotateString(Tool):
    method __call__ (line 611) | def __call__(self, tool_task_state: ToolTaskState) -> ToolResult:

FILE: evals/elsuite/bugged_tools/utils.py
  function calculate_accuracy (line 8) | def calculate_accuracy(tp: int, fp: int, tn: int, fn: int):
  function calculate_precision (line 13) | def calculate_precision(tp: int, fp: int):
  function calculate_recall (line 21) | def calculate_recall(tp: int, fn: int):
  function calculate_f1 (line 29) | def calculate_f1(precision: float, recall: float):
  function precision_recall_fscore (line 37) | def precision_recall_fscore(metrics: Sequence[dict]):
  function try_cast_from_str (line 64) | def try_cast_from_str(n: str, cast_type: type):

FILE: evals/elsuite/cant_do_that_anymore/chess/board.py
  class Board (line 14) | class Board:
    method __init__ (line 21) | def __init__(
    method __str__ (line 33) | def __str__(self) -> str:
    method _update_board (line 65) | def _update_board(self, move: Move):
    method _get_player_moves (line 103) | def _get_player_moves(self, player_id: str, previous_moves: Sequence[M...
    method _is_king_in_check (line 124) | def _is_king_in_check(self, player_id: str) -> bool:
    method _filter_for_king_capturing_moves (line 131) | def _filter_for_king_capturing_moves(
  class BoardController (line 145) | class BoardController:
    method __init__ (line 152) | def __init__(
    method __str__ (line 165) | def __str__(self) -> str:
    method update_board (line 168) | def update_board(self, move: str):
    method get_player_legal_moves (line 177) | def get_player_legal_moves(self, player_id: str) -> Sequence[str]:
    method _filter_to_prevent_pinning (line 190) | def _filter_to_prevent_pinning(self, moves: Sequence[Move], player_id:...
    method _is_checkmate (line 234) | def _is_checkmate(self, player_id: str) -> bool:
    method _is_stalemate (line 240) | def _is_stalemate(self, player_id: str) -> bool:

FILE: evals/elsuite/cant_do_that_anymore/chess/board_test.py
  function default_board_init (line 22) | def default_board_init() -> Sequence[Sequence[str]]:
  function simulate_games (line 37) | def simulate_games():

FILE: evals/elsuite/cant_do_that_anymore/chess/notation.py
  function row_idx_swap (line 12) | def row_idx_swap(n: int) -> int:
  function coord_str_to_pos (line 16) | def coord_str_to_pos(s: str) -> Sequence[int]:
  function coord_pos_to_str (line 23) | def coord_pos_to_str(s: str) -> str:
  class NotationParser (line 29) | class NotationParser:
    method __init__ (line 30) | def __init__(self, piece_str_to_id, piece_id_to_str) -> None:
    method _str_to_move (line 35) | def _str_to_move(self, s: str, board_state: Sequence[Sequence[int]], p...
    method _move_to_str (line 39) | def _move_to_str(self, move: Move, board_state: Sequence[Sequence[int]...
  class AlgebraicNotationParser (line 43) | class AlgebraicNotationParser(NotationParser):
    method _str_to_move (line 62) | def _str_to_move(self, s: str, board_state: Sequence[Sequence[int]]) -...
    method _move_to_str (line 93) | def _move_to_str(self, move: Move, board_state: Sequence[Sequence[int]...

FILE: evals/elsuite/cant_do_that_anymore/chess/pieces.py
  class Piece (line 15) | class Piece:
    method __init__ (line 16) | def __init__(
    method get_piece_moves (line 45) | def get_piece_moves(
    method _get_moves_from_transformations (line 90) | def _get_moves_from_transformations(
    method _get_pawn_double_step_transformations (line 131) | def _get_pawn_double_step_transformations(
    method _remove_illegal_pawn_capture_transformations (line 140) | def _remove_illegal_pawn_capture_transformations(
    method _get_en_passant_transformations (line 173) | def _get_en_passant_transformations(
    method _add_promotion_moves (line 199) | def _add_promotion_moves(self, piece_moves: Sequence[Move]) -> Sequenc...
    method _get_castling_possible_moves (line 213) | def _get_castling_possible_moves(

FILE: evals/elsuite/cant_do_that_anymore/chess/utils.py
  class Move (line 6) | class Move:
  function get_other_player_id (line 13) | def get_other_player_id(this_player_id: str) -> str:
  function parse_piece (line 22) | def parse_piece(
  function move_crosses_pieces (line 37) | def move_crosses_pieces(board_state: Sequence[Sequence[int]], move: Move...
  function has_piece_been_moved (line 46) | def has_piece_been_moved(
  function coord_within_board (line 57) | def coord_within_board(row_idx: int, col_idx: int) -> bool:
  function move_within_board (line 66) | def move_within_board(move: Move) -> bool:
  function get_path_between_coords (line 71) | def get_path_between_coords(
  function same_color_piece_at_move_start (line 94) | def same_color_piece_at_move_start(
  function capturing_same_color (line 102) | def capturing_same_color(board_state: Sequence[Sequence[int]], move: Mov...

FILE: evals/elsuite/cant_do_that_anymore/eval.py
  class CantDoThatAnymore (line 38) | class CantDoThatAnymore(SolverEval):
    method __init__ (line 39) | def __init__(
    method eval_sample (line 56) | def eval_sample(self, solver: Solver, sample: Any, rng: random.Random):
    method run (line 115) | def run(self, recorder: RecorderBase) -> dict[str, Union[float, int]]:
    method get_violations (line 160) | def get_violations(

FILE: evals/elsuite/cant_do_that_anymore/scripts/dataset_creation.py
  function prepare_lichess_2014_dataset (line 21) | def prepare_lichess_2014_dataset(out_dir: str) -> str:
  class MoveFilter (line 44) | class MoveFilter:
    method __call__ (line 45) | def __call__(
  class SpecialMoveFilter (line 55) | class SpecialMoveFilter(MoveFilter):
    method __call__ (line 62) | def __call__(
  class ControlMoveFilter (line 77) | class ControlMoveFilter(MoveFilter):
    method __call__ (line 89) | def __call__(
  function is_move_illegal (line 122) | def is_move_illegal(controller: BoardController, move: chess.Move, playe...
  function find_specific_moves_in_game (line 129) | def find_specific_moves_in_game(
  function create_dataset_of_specific_moves (line 177) | def create_dataset_of_specific_moves(
  function main (line 239) | def main(args: argparse.Namespace):

FILE: evals/elsuite/cant_do_that_anymore/scripts/diagonal_dataset_creation.py
  function get_stockfish_move (line 42) | def get_stockfish_move(stockfish: Stockfish, num_moves_to_consider: int)...
  function parse_stockfish_move (line 89) | def parse_stockfish_move(controller: BoardController, move: str) -> str:
  function get_bishop_diagonal_moves (line 115) | def get_bishop_diagonal_moves(controller: BoardController, player_id: st...
  function find_specific_moves_in_game (line 156) | def find_specific_moves_in_game(
  function create_bishop_diagonal_dataset (line 218) | def create_bishop_diagonal_dataset(
  function main (line 274) | def main(args: argparse.Namespace):

FILE: evals/elsuite/cant_do_that_anymore/scripts/make_plots.py
  function extract_results (line 18) | def extract_results(datadir: Path) -> pd.DataFrame:
  function render_results (line 54) | def render_results(df: pd.DataFrame, out_dir: Path):
  function compute_num_previous_bishop_moves (line 65) | def compute_num_previous_bishop_moves(previous_moves: Sequence[str]) -> ...
  function plot_diagonal_bishop_results (line 82) | def plot_diagonal_bishop_results(df: pd.DataFrame, out_dir: Path):
  function main (line 110) | def main():

FILE: evals/elsuite/cant_do_that_anymore/utils.py
  function construct_messages (line 27) | def construct_messages(previous_moves: Sequence[str]) -> Sequence[Message]:
  function dump_sequence_to_jsonl (line 43) | def dump_sequence_to_jsonl(data: Sequence[dict], path: str):
  function load_sequence_from_jsonl (line 50) | def load_sequence_from_jsonl(path: str) -> Sequence[dict]:
  function initialise_boards (line 60) | def initialise_boards() -> tuple[BoardController, BoardController, chess...
  function assert_boards_consistent (line 84) | def assert_boards_consistent(
  function does_solver_predict_move (line 105) | def does_solver_predict_move(
  function process_example (line 127) | def process_example(work_input: dict):
  function get_solver_predictions (line 146) | def get_solver_predictions(
  function get_dataset_path (line 181) | def get_dataset_path(
  function create_dataset (line 223) | def create_dataset(
  function get_diagonal_dataset_path (line 241) | def get_diagonal_dataset_path(
  function get_binary_avg (line 247) | def get_binary_avg(metrics: dict, key: str) -> float:

FILE: evals/elsuite/error_recovery/eval.py
  class Sample (line 25) | class Sample:
  class ErrorRecovery (line 35) | class ErrorRecovery(SolverEval):
    method __init__ (line 36) | def __init__(
    method eval_sample (line 82) | def eval_sample(self, solver: Solver, sample: Sample, rng: random.Rand...
    method _get_no_reasoning_task_state (line 141) | def _get_no_reasoning_task_state(self, sample: Sample) -> TaskState:
    method _get_correct_reasoning_task_state (line 152) | def _get_correct_reasoning_task_state(self, sample: Sample) -> TaskState:
    method _get_incorrect_reasoning_task_state (line 166) | def _get_incorrect_reasoning_task_state(
    method _get_answer (line 185) | def _get_answer(
    method run (line 220) | def run(self, recorder: evals.record.Recorder):
    method _extract_final_answer (line 262) | def _extract_final_answer(self, solver: Solver, task_state: TaskState,...
    method get_samples (line 276) | def get_samples(self) -> List[Sample]:

FILE: evals/elsuite/error_recovery/scripts/dataset_creation.py
  function main (line 20) | def main():
  function create_data_subset (line 54) | def create_data_subset(data: pd.DataFrame, examples_per_task: int) -> pd...
  function create_positive_examples (line 68) | def create_positive_examples(data: pd.DataFrame) -> pd.DataFrame:
  function create_negative_examples (line 85) | def create_negative_examples(data: pd.DataFrame) -> pd.DataFrame:
  function clone_and_load_data (line 103) | def clone_and_load_data():
  function maybe_clone_repo (line 123) | def maybe_clone_repo(clone_dir):
  function plot_hist (line 130) | def plot_hist(data):
  function print_example (line 135) | def print_example():

FILE: evals/elsuite/error_recovery/scripts/make_plots.py
  function maybe_show (line 59) | def maybe_show(fig):
  function extract_results (line 65) | def extract_results(datadir: Path) -> pd.DataFrame:
  function extract_metrics (line 82) | def extract_metrics(datadir: Path) -> pd.DataFrame:
  function get_all_tasks (line 101) | def get_all_tasks(results_df: pd.DataFrame) -> list[str]:
  function get_all_tasks_renamed (line 113) | def get_all_tasks_renamed(results_df: pd.DataFrame) -> list[str]:
  function get_unique_models (line 121) | def get_unique_models(results_df: pd.DataFrame) -> list[str]:
  function get_cleaned_model_name (line 131) | def get_cleaned_model_name(model: str) -> str:
  function corrects_to_accuracy_and_sem (line 135) | def corrects_to_accuracy_and_sem(corrects: pd.Series):
  function annotate_axes (line 141) | def annotate_axes(ax, errors: Optional[pd.DataFrame]):
  function corrects_to_performance_loss_and_error (line 172) | def corrects_to_performance_loss_and_error(CR_corrects: pd.Series, IR_co...
  function accuracy_by_task (line 195) | def accuracy_by_task(metrics_df, results_df: pd.DataFrame, out_dir: Path):
  function accuracy_by_model_dfs (line 205) | def accuracy_by_model_dfs(metrics_df, results_df: pd.DataFrame):
  function accuracy_by_model (line 241) | def accuracy_by_model(metrics_df, results_df: pd.DataFrame, out_dir: Path):
  function accuracy_by_model_and_reasoning (line 268) | def accuracy_by_model_and_reasoning(
  function plot_accuracy_by_steps_all (line 358) | def plot_accuracy_by_steps_all(metrics_df, results_df, out_dir):
  function plot_accuracy_by_steps (line 385) | def plot_accuracy_by_steps(df, task, model, ax):
  function plot_accuracy_by_task (line 414) | def plot_accuracy_by_task(model, metrics_df, all_tasks, all_tasks_rename...
  function performance_loss_per_task (line 468) | def performance_loss_per_task(metrics_df: pd.DataFrame, results_df: pd.D...
  function performance_loss_per_model (line 517) | def performance_loss_per_model(metrics_df: pd.DataFrame, results_df: pd....
  function main (line 557) | def main():

FILE: evals/elsuite/function_deduction/baselines.py
  class AverageBaseline (line 16) | class AverageBaseline(Solver):
    method __init__ (line 29) | def __init__(self, registry=None):
    method _solve (line 32) | def _solve(self, task_state: TaskState):
    method _get_guess (line 45) | def _get_guess(self, test_inputs, known_values: dict[int, int], guess_...
    method _get_ask (line 64) | def _get_ask(self, test_inputs, round_ix) -> str:
  class FullKnowledge (line 78) | class FullKnowledge(Solver):
    method __init__ (line 87) | def __init__(self, mode: str, samples_jsonl: str, registry: Registry):
    method _solve (line 93) | def _solve(self, task_state: TaskState):
    method _get_matching_samples (line 108) | def _get_matching_samples(self, known_values):
    method _get_ask_best (line 117) | def _get_ask_best(self, samples):
    method _get_ask_random (line 125) | def _get_ask_random(self, known_values):
    method _get_samples (line 131) | def _get_samples(self, samples_jsonl: str, registry_path: Path):

FILE: evals/elsuite/function_deduction/eval.py
  class Sample (line 21) | class Sample:
  class CurrentState (line 30) | class CurrentState:
    method round_ix (line 49) | def round_ix(self):
    method ask_update (line 52) | def ask_update(self, input_: int, value: Optional[int]) -> None:
    method guess_update (line 58) | def guess_update(
  class FunctionDeductionEval (line 74) | class FunctionDeductionEval(SolverEval):
    method __init__ (line 75) | def __init__(
    method eval_sample (line 100) | def eval_sample(self, solver: Solver, sample: Sample, rng: random.Rand...
    method run (line 151) | def run(self, recorder: evals.record.Recorder):
    method _calculate_sem (line 180) | def _calculate_sem(self, values: list) -> float:
    method _get_success_metrics (line 183) | def _get_success_metrics(self, metrics):
    method _get_sample_std (line 198) | def _get_sample_std(self, metrics):
    method _get_complexity_tests (line 236) | def _get_complexity_tests(self, metrics):
    method _get_per_complexity_metrics (line 257) | def _get_per_complexity_metrics(self, all_metrics):
    method _parse_raw_response (line 265) | def _parse_raw_response(self, response: str) -> Union[Tuple[int], Tupl...
    method _bad_guess_answer (line 276) | def _bad_guess_answer(self, test_inputs, guessed, expected) -> str:
    method get_samples (line 293) | def get_samples(self) -> List[Sample]:

FILE: evals/elsuite/function_deduction/scripts/dataset/create_dataset.py
  function get_func_from_code (line 9) | def get_func_from_code(code):
  function get_complexity (line 13) | def get_complexity(code: str) -> int:
  function create_dataset (line 20) | def create_dataset(out_file, in_file):

FILE: evals/elsuite/function_deduction/scripts/make_plots.py
  function extract_final_reports (line 74) | def extract_final_reports(
  function make_plot (line 92) | def make_plot(
  function make_ask_guess_incorrect_plot (line 136) | def make_ask_guess_incorrect_plot(df, out_path: Path):
  function main (line 197) | def main():

FILE: evals/elsuite/function_deduction/solvers.py
  class CustomCoT (line 11) | class CustomCoT(CoTSolver):
    method __init__ (line 12) | def __init__(
    method cot_template (line 25) | def cot_template(self, task_state: TaskState) -> str:
    method _get_summary (line 37) | def _get_summary(self, current_state: CurrentState) -> str:
  class BaseModelSolver (line 59) | class BaseModelSolver(HHHSolver):
    method _solve (line 60) | def _solve(self, task_state: TaskState):
    method _few_shot_messages (line 70) | def _few_shot_messages(self) -> list[Message]:
  class BaseModelCoTSolver (line 85) | class BaseModelCoTSolver(CustomCoT):
    method __init__ (line 86) | def __init__(self, *args, **kwargs):
    method cot_solver (line 90) | def cot_solver(self):
    method _solve (line 103) | def _solve(self, task_state: TaskState):
    method _few_shot_messages (line 118) | def _few_shot_messages(self, current_state) -> list[Message]:

FILE: evals/elsuite/function_deduction/solvers_test.py
  function simulate_dummy_game (line 25) | def simulate_dummy_game(solver):
  function test_custom_cot (line 59) | def test_custom_cot():
  function test_base_model_cot_solver (line 76) | def test_base_model_cot_solver():

FILE: evals/elsuite/hr_ml_agent_bench/actions.py
  function make_action_string (line 12) | def make_action_string(name: str, args: dict) -> str:
  function get_action (line 17) | def get_action(s: str) -> Optional[Action]:
  function is_valid_action (line 43) | def is_valid_action(action: Action) -> bool:

FILE: evals/elsuite/hr_ml_agent_bench/auto_marking.py
  class EvaluationResult (line 9) | class EvaluationResult:
  function grade_submission (line 23) | def grade_submission(log_dir: Path, task_name: str) -> EvaluationResult:

FILE: evals/elsuite/hr_ml_agent_bench/autoeval.py
  class Step (line 19) | class Step:
  class TaskStateMetadata (line 26) | class TaskStateMetadata:
  class FunctionCall (line 37) | class FunctionCall:
  function run (line 42) | def run(
  function attempted_to_use_stable_baselines (line 208) | def attempted_to_use_stable_baselines(s: str) -> bool:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/ant/baselines/naive.py
  class Agent (line 8) | class Agent:
    method __init__ (line 9) | def __init__(self, env):
    method act (line 15) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/ant/env/train.py
  class Agent (line 6) | class Agent:
    method __init__ (line 7) | def __init__(self, env):
    method act (line 13) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/ant/scripts/grade.py
  function get_score (line 9) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 33) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 57) | def get_naive_baseline_score() -> float:
  function normalize_score (line 69) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/bipedal_walker/baselines/human.py
  class Agent (line 13) | class Agent:
    method act (line 14) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/bipedal_walker/baselines/naive.py
  class Agent (line 7) | class Agent:
    method __init__ (line 8) | def __init__(self, env):
    method act (line 14) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/bipedal_walker/env/train.py
  class Agent (line 5) | class Agent:
    method __init__ (line 6) | def __init__(self, env):
    method act (line 12) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/bipedal_walker/scripts/grade.py
  function get_score (line 7) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 31) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 44) | def get_naive_baseline_score() -> float:
  function normalize_score (line 56) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/cartpole/baselines/naive.py
  class Agent (line 7) | class Agent:
    method __init__ (line 8) | def __init__(self, env):
    method act (line 14) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/cartpole/env/train.py
  class Agent (line 5) | class Agent:
    method __init__ (line 6) | def __init__(self, env):
    method act (line 12) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/cartpole/scripts/grade.py
  function get_score (line 7) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 36) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 59) | def get_naive_baseline_score() -> float:
  function normalize_score (line 71) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/cifar10/env/train.py
  class Net (line 10) | class Net(nn.Module):
    method __init__ (line 11) | def __init__(self):
    method forward (line 20) | def forward(self, x):
  function test_model (line 39) | def test_model(model, device, dataloader):
  function main (line 54) | def main():

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/cifar10/scripts/grade.py
  function get_score (line 18) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 40) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 52) | def get_naive_baseline_score() -> float:
  function normalize_score (line 65) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/feedback/env/train.py
  function compute_metrics_for_regression (line 24) | def compute_metrics_for_regression(y_test, y_test_pred):
  function train_model (line 35) | def train_model(X_train, y_train, X_valid, y_valid):
  function predict (line 42) | def predict(model, X):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/feedback/scripts/grade.py
  function get_score (line 12) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 47) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 59) | def get_naive_baseline_score() -> float:
  function normalize_score (line 71) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/house_price/scripts/grade.py
  function get_score (line 8) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 48) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 60) | def get_naive_baseline_score() -> float:
  function normalize_score (line 72) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/humanoid/baselines/naive.py
  class Agent (line 8) | class Agent:
    method __init__ (line 9) | def __init__(self, env):
    method act (line 15) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/humanoid/env/train.py
  class Agent (line 6) | class Agent:
    method __init__ (line 7) | def __init__(self, env):
    method act (line 13) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/humanoid/scripts/grade.py
  function get_score (line 9) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 38) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 61) | def get_naive_baseline_score() -> float:
  function normalize_score (line 73) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/imdb/scripts/grade.py
  function get_score (line 7) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 35) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 47) | def get_naive_baseline_score() -> float:
  function normalize_score (line 57) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/inverted_pendulum/baselines/naive.py
  class Agent (line 8) | class Agent:
    method __init__ (line 9) | def __init__(self, env):
    method act (line 15) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/inverted_pendulum/env/train.py
  class Agent (line 6) | class Agent:
    method __init__ (line 7) | def __init__(self, env):
    method act (line 13) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/inverted_pendulum/scripts/grade.py
  function get_score (line 7) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 35) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 44) | def get_naive_baseline_score() -> float:
  function normalize_score (line 56) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/ogbn_arxiv/env/train.py
  class MLP (line 38) | class MLP(torch.nn.Module):
    method __init__ (line 39) | def __init__(self, in_channels, hidden_channels, out_channels, num_lay...
    method reset_parameters (line 53) | def reset_parameters(self):
    method forward (line 59) | def forward(self, x):
    method inference (line 68) | def inference(self, total_loader, device):
  function test (line 93) | def test(model, device):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/ogbn_arxiv/scripts/grade.py
  function get_score (line 17) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 50) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 63) | def get_naive_baseline_score() -> float:
  function normalize_score (line 91) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/parkinsons_disease/env/train.py
  function smapep1 (line 10) | def smapep1(y_true, y_pred):
  function get_predictions (line 41) | def get_predictions(my_train, model):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/parkinsons_disease/scripts/grade.py
  function get_score (line 8) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 33) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 46) | def get_naive_baseline_score() -> float:
  function normalize_score (line 52) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/parkinsons_disease/scripts/prepare.py
  function get_rating (line 119) | def get_rating(row):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/pong/baselines/naive.py
  class Agent (line 8) | class Agent:
    method __init__ (line 9) | def __init__(self, env):
    method act (line 15) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/pong/env/train.py
  class Agent (line 6) | class Agent:
    method __init__ (line 7) | def __init__(self, env):
    method act (line 13) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/pong/scripts/grade.py
  function get_score (line 7) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 38) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 61) | def get_naive_baseline_score() -> float:
  function normalize_score (line 73) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/pusher/baselines/naive.py
  class Agent (line 8) | class Agent:
    method __init__ (line 9) | def __init__(self, env):
    method act (line 15) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/pusher/env/train.py
  class Agent (line 6) | class Agent:
    method __init__ (line 7) | def __init__(self, env):
    method act (line 13) | def act(self, observation):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/pusher/scripts/grade.py
  function get_score (line 9) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 38) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 61) | def get_naive_baseline_score() -> float:
  function normalize_score (line 73) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/spaceship_titanic/env/train.py
  function create_new_dataframe (line 4) | def create_new_dataframe(data, column_names):

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/spaceship_titanic/scripts/grade.py
  function get_score (line 6) | def get_score(submission_dir: Path) -> float:
  function get_human_baseline_score (line 41) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 53) | def get_naive_baseline_score() -> float:
  function normalize_score (line 59) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/vectorization/env/train.py
  function relu (line 7) | def relu(x: np.ndarray) -> np.ndarray:
  function add_padding (line 17) | def add_padding(X: np.ndarray, pad_size: Union[int, list, tuple], pad_va...
  class Conv2DLayer (line 43) | class Conv2DLayer:
    method __init__ (line 48) | def __init__(
    method convolution_step (line 78) | def convolution_step(
    method forward (line 97) | def forward(self, features_batch: np.ndarray) -> np.ndarray:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/vectorization/scripts/grade.py
  function get_score (line 11) | def get_score(submission_dir: Path) -> float:
  function _get_execution_time (line 33) | def _get_execution_time(scriptpath: Path) -> float:
  function get_human_baseline_score (line 71) | def get_human_baseline_score() -> float:
  function get_naive_baseline_score (line 84) | def get_naive_baseline_score() -> float:
  function normalize_score (line 96) | def normalize_score(score: float) -> float:

FILE: evals/elsuite/hr_ml_agent_bench/benchmarks/vectorization/scripts/human_baseline.py
  function relu (line 10) | def relu(x: np.ndarray) -> np.ndarray:
  function add_padding (line 20) | def add_padding(X: np.ndarray, pad_size: Union[int, list, tuple], pad_va...
  class Conv2DLayer (line 46) | class Conv2DLayer:
    method __init__ (line 51) | def __init__(
    method convolution_step (line 81) | def convolution_step(
    method forward (line 100) | def forward(self, features_batch: np.ndarray) -> np.ndarray:

FILE: evals/elsuite/hr_ml_agent_bench/environment.py
  class Environment (line 41) | class Environment:
    method __init__ (line 42) | def __init__(
    method research_problem (line 87) | def research_problem(self):
    method benchmark_folder_name (line 91) | def benchmark_folder_name(self):
    method read_only_files (line 95) | def read_only_files(self):
    method action_infos (line 99) | def action_infos(self):
    method static_kwargs_for_tools (line 103) | def static_kwargs_for_tools(self):
    method trace (line 107) | def trace(self):
    method start_time (line 111) | def start_time(self):
    method _setup_log_dir (line 116) | def _setup_log_dir(self):
    method _initialize_task_env (line 133) | def _initialize_task_env(self):
    method _initialize_trace (line 186) | def _initialize_trace(self):
    method __enter__ (line 212) | def __enter__(self):
    method __exit__ (line 215) | def __exit__(self, exc_type, exc_value, traceback):
    method is_done (line 240) | def is_done(self):
    method execute (line 252) | def execute(self, action: Action, max_seconds_per_step: Optional[int] ...
    method save (line 339) | def save(self, curr_step):
    method get_task_description (line 367) | def get_task_description(self):
    method low_level_actions (line 371) | def low_level_actions(self):
    method high_level_actions (line 375) | def high_level_actions(self):
    method print_action (line 378) | def print_action(self, entries):
  function _signal_handler (line 382) | def _signal_handler(signum, frame):

FILE: evals/elsuite/hr_ml_agent_bench/eval.py
  class Sample (line 23) | class Sample:
    method __post_init__ (line 31) | def __post_init__(self):
  class MLAgentBench (line 51) | class MLAgentBench(SolverEval):
    method __init__ (line 52) | def __init__(self, completion_fns: list[CompletionFn], *args, **kwargs):
    method eval_sample (line 61) | def eval_sample(self, solver: Solver, raw_sample: dict, rng: Random) -...
    method run (line 100) | def run(self, recorder: Recorder) -> dict:
  function in_ci (line 119) | def in_ci():

FILE: evals/elsuite/hr_ml_agent_bench/high_level_actions.py
  function understand_file (line 19) | def understand_file(file_name, things_to_look_for, solver, work_dir=".",...
  function edit_script (line 67) | def edit_script(
  function edit_script_lines (line 124) | def edit_script_lines(
  function inspect_script_lines (line 196) | def inspect_script_lines(script_name, start_line_number, end_line_number...

FILE: evals/elsuite/hr_ml_agent_bench/low_level_actions.py
  function normalize_args_kwargs (line 28) | def normalize_args_kwargs(f, *args, **kwargs):
  function append_to_low_level_steps (line 36) | def append_to_low_level_steps(trace, name, args, observation):
  function record_low_level_step (line 43) | def record_low_level_step(func):
  function check_file_read_only (line 72) | def check_file_read_only(arg_names, **kwargs):
  function check_file_in_work_dir (line 91) | def check_file_in_work_dir(arg_names, **kwargs):
  function list_files (line 116) | def list_files(dir_path, work_dir=".", **kwargs):
  function read_file (line 128) | def read_file(file_name, work_dir=".", **kwargs):
  function write_file (line 139) | def write_file(file_name, content, work_dir=".", **kwargs):
  function append_file (line 152) | def append_file(file_name, content, work_dir=".", **kwargs):
  function copy_file (line 165) | def copy_file(source, destination, work_dir=".", **kwargs):
  function undo_edit_script (line 178) | def undo_edit_script(script_name, work_dir=".", **kwargs):
  function execute_script (line 200) | def execute_script(script_name, work_dir=".", **kwargs):
  function python_repl (line 257) | def python_repl(command, work_dir=".", **kwargs):

FILE: evals/elsuite/hr_ml_agent_bench/prepare_task.py
  function get_research_problem (line 15) | def get_research_problem(task: str) -> str:
  function prepare_task (line 39) | def prepare_task(benchmark_dir, python_command="python"):

FILE: evals/elsuite/hr_ml_agent_bench/prompts.py
  function format_action (line 40) | def format_action(action: ActionInfo) -> str:
  function get_actions_description (line 55) | def get_actions_description(actions: list[ActionInfo]) -> str:
  function get_task_description (line 61) | def get_task_description(research_problem: str) -> str:

FILE: evals/elsuite/hr_ml_agent_bench/schema.py
  class EnhancedJSONEncoder (line 8) | class EnhancedJSONEncoder(json.JSONEncoder):
    method default (line 9) | def default(self, o):
  class TooLongPromptError (line 21) | class TooLongPromptError(Exception):
  class LLMError (line 25) | class LLMError(Exception):
  class EnvException (line 29) | class EnvException(Exception):
    method __init__ (line 30) | def __init__(self, message):
    method __str__ (line 33) | def __str__(self):
  class ActionInfo (line 38) | class ActionInfo:
  class Action (line 48) | class Action:
  class Step (line 54) | class Step:
  class Trace (line 61) | class Trace:

FILE: evals/elsuite/hr_ml_agent_bench/scripts/run_experiments.py
  function run_experiment (line 64) | def run_experiment(solver: str, task: str, seed: int) -> None:

FILE: evals/elsuite/hr_ml_agent_bench/solvers/baseline.py
  class SimpleActionAgent (line 16) | class SimpleActionAgent(Solver):
    method __init__ (line 17) | def __init__(self, registry: Registry, completion_fn_kwargs: dict):
    method _solve (line 25) | def _solve(self, task_state: TaskState, **kwargs) -> SolverResult:
    method get_encoder (line 111) | def get_encoder(self):

FILE: evals/elsuite/hr_ml_agent_bench/tests/test_actions.py
  function test_make_action_string (line 12) | def test_make_action_string():
  function test_empty_string (line 29) | def test_empty_string():
  function test_missing_curly_braces (line 40) | def test_missing_curly_braces():
  function test_args_on_multiple_lines (line 58) | def test_args_on_multiple_lines():
  function test_args_on_single_line (line 77) | def test_args_on_single_line():
  function test_special_characters_in_name (line 93) | def test_special_characters_in_name():
  function test_invalid_arguments (line 111) | def test_invalid_arguments():
  function test_surrounded_by_additional_text (line 127) | def test_surrounded_by_additional_text():
  function test_is_valid_action_with_correct_args (line 159) | def test_is_valid_action_with_correct_args(action_info):
  function test_is_valid_action_with_incorrect_args (line 169) | def test_is_valid_action_with_incorrect_args(action_info):
  function test_is_valid_action_with_missing_args (line 177) | def test_is_valid_action_with_missing_args(action_info):

FILE: evals/elsuite/hr_ml_agent_bench/utils.py
  function complete_text (line 20) | def complete_text(prompt: str, solver: Solver, **kwargs) -> str:
  function get_root_dir (line 31) | def get_root_dir() -> Path:
  function get_code_dir (line 37) | def get_code_dir() -> Path:
  function get_data_dir (line 43) | def get_data_dir() -> Path:
  function get_parent_dir (line 49) | def get_parent_dir(name: str, max_depth: int = 64) -> Path:
  function is_gpu_available (line 63) | def is_gpu_available() -> bool:
  function get_gpu_with_most_available_memory (line 69) | def get_gpu_with_most_available_memory() -> Optional[int]:
  function get_baseline_score (line 97) | def get_baseline_score(

FILE: evals/elsuite/identifying_variables/eval.py
  class IdentifyingVariables (line 30) | class IdentifyingVariables(SolverEval):
    method __init__ (line 31) | def __init__(
    method _build_task_description (line 52) | def _build_task_description(self) -> str:
    method eval_sample (line 60) | def eval_sample(self, solver: Solver, sample: Sample, rng: random.Rand...
    method run (line 90) | def run(self, recorder: RecorderBase) -> Dict[str, float]:
    method _compute_agg_metrics (line 98) | def _compute_agg_metrics(self, metrics: List[Dict]) -> Dict[str, float]:
    method _compute_grouped_metrics (line 123) | def _compute_grouped_metrics(self, metrics: List[Dict]) -> Dict[str, f...
    method _evaluate_sample (line 192) | def _evaluate_sample(self, preds: Optional[Answer], gold: Answer, num_...
    method _ctrl_vars_fallout (line 245) | def _ctrl_vars_fallout(self, preds: List[str], gold: List[str], num_no...
    method _ctrl_vars_recall (line 248) | def _ctrl_vars_recall(self, preds: List[str], gold: List[str]) -> float:
    method _ctrl_vars_nDCG (line 251) | def _ctrl_vars_nDCG(self, preds: List[str], gold: List[str], num_not_c...
    method _build_message (line 257) | def _build_message(self, sample: Sample) -> Message:
    method _render_hypotheses (line 271) | def _render_hypotheses(self, hypotheses: nx.DiGraph) -> List[str]:
    method _render_hypothesis (line 275) | def _render_hypothesis(self, hypothesis: Tuple[str, str]) -> str:
    method _get_samples (line 280) | def _get_samples(self) -> List[Sample]:

FILE: evals/elsuite/identifying_variables/graph_utils.py
  function val_and_count_roots (line 8) | def val_and_count_roots(
  function gen_random_forest_tree_size (line 45) | def gen_random_forest_tree_size(
  function gen_random_forest (line 127) | def gen_random_forest(
  function find_farthest_node (line 202) | def find_farthest_node(graph: nx.DiGraph, source: str) -> Tuple[str, int]:
  function find_graph_roots (line 220) | def find_graph_roots(graph: nx.DiGraph) -> Set[str]:
  function find_graph_trees (line 227) | def find_graph_trees(graph: nx.DiGraph) -> List[Set[str]]:
  function find_connected_nodes_pair (line 234) | def find_connected_nodes_pair(
  function find_unconnected_nodes_pair (line 245) | def find_unconnected_nodes_pair(graph: nx.DiGraph) -> Union[Tuple[Any, A...

FILE: evals/elsuite/identifying_variables/latent_funcs.py
  function linear (line 5) | def linear(x: np.ndarray, grad: float, bias: float) -> np.ndarray:
  function quadratic (line 9) | def quadratic(x: np.ndarray, grad: float, bias: float) -> np.ndarray:
  function random_uniform (line 13) | def random_uniform(num_samples, min_v, max_v, rng: np.random.Generator) ...
  function random_ints (line 17) | def random_ints(num_samples, min_v, max_v, rng: np.random.Generator) -> ...

FILE: evals/elsuite/identifying_variables/metrics.py
  function compute_DCG (line 9) | def compute_DCG(ranking: List[float], ceil_negs: bool = False) -> float:
  function compute_nDCG (line 21) | def compute_nDCG(ranking: List[float], best: List[float], worst: List[fl...
  function compute_metric_posthoc (line 32) | def compute_metric_posthoc(
  function compute_ctrl_recall_posthoc (line 47) | def compute_ctrl_recall_posthoc(metric_entries: List[Dict], sampling_ent...
  function compute_fallout (line 79) | def compute_fallout(retrieved: Set[str], gold_relevants: Set[str], num_i...
  function compute_recall (line 93) | def compute_recall(retrieved: Set[str], gold_relevants: Set[str]):

FILE: evals/elsuite/identifying_variables/renderers/base.py
  class RendererBase (line 9) | class RendererBase(abc.ABC):
    method __init__ (line 10) | def __init__(self, rng: random.Random, np_rng: np.random.Generator) ->...
    method render_obs (line 15) | def render_obs(self, sample: Sample) -> str:

FILE: evals/elsuite/identifying_variables/renderers/corrset.py
  class CorrSetRenderer (line 10) | class CorrSetRenderer(RendererBase):
    method determine_sample_type (line 15) | def determine_sample_type(self, sample: Sample) -> Tuple[str, List[Set...
    method _get_hypd_unobserved_vars (line 60) | def _get_hypd_unobserved_vars(self, sample: Sample) -> List[str]:
  class PureCorrSetRenderer (line 78) | class PureCorrSetRenderer(CorrSetRenderer):
    method render_obs (line 79) | def render_obs(self, sample: Sample) -> str:
    method _render_observed_sets (line 92) | def _render_observed_sets(self, observed_sets: List[Set[str]]) -> str:
    method _render_unobserved_vars (line 101) | def _render_unobserved_vars(self, sample: Sample) -> str:
  class LanguageCorrSetRenderer (line 113) | class LanguageCorrSetRenderer(CorrSetRenderer):
    method __init__ (line 118) | def __init__(self, *args, **kwargs) -> None:
    method render_obs (line 126) | def render_obs(self, sample: Sample) -> str:
    method render_many_sets (line 148) | def render_many_sets(self, correl_sets: List[Set[str]]):
    method render_single_set (line 215) | def render_single_set(self, correl_sets: List[Set[str]]) -> str:
    method render_only_ind (line 257) | def render_only_ind(self, correl_sets: List[Set[str]]) -> str:
    method mention_unobserved_vars (line 301) | def mention_unobserved_vars(self, sample: Sample) -> str:

FILE: evals/elsuite/identifying_variables/renderers/tabular.py
  function apply_noise (line 18) | def apply_noise(
  function sparsify_data (line 47) | def sparsify_data(
  class TabularRenderer (line 59) | class TabularRenderer(RendererBase):
    method __init__ (line 60) | def __init__(self, *args, **kwargs) -> None:
    method _render_table (line 64) | def _render_table(self, sample: Sample) -> pd.DataFrame:
  class MarkdownTableRenderer (line 102) | class MarkdownTableRenderer(TabularRenderer):
    method __init__ (line 107) | def __init__(self, *args, **kwargs) -> None:
    method render_obs (line 110) | def render_obs(self, sample: Sample) -> str:
  class CSVTableRenderer (line 115) | class CSVTableRenderer(TabularRenderer):
    method __init__ (line 121) | def __init__(self, *args, **kwargs) -> None:
    method render_obs (line 124) | def render_obs(self, sample: Sample) -> str:
  class JSONTableRenderer (line 129) | class JSONTableRenderer(TabularRenderer):
    method __init__ (line 135) | def __init__(self, *args, **kwargs) -> None:
    method render_obs (line 138) | def render_obs(self, sample: Sample) -> str:
  class LanguageTableRenderer (line 143) | class LanguageTableRenderer(TabularRenderer):
    method __init__ (line 149) | def __init__(self, *args, **kwargs) -> None:
    method render_obs (line 154) | def render_obs(self, sample: Sample) -> str:
    method _render_row (line 164) | def _render_row(
  function format_number (line 179) | def format_number(number: Union[int, float]):

FILE: evals/elsuite/identifying_variables/renderers/templates.py
  function list_to_nl_list (line 4) | def list_to_nl_list(list_of_words: List[str]) -> str:

FILE: evals/elsuite/identifying_variables/scripts/gen_data.py
  function write_to_jsonl (line 31) | def write_to_jsonl(
  function random_latent_func_meta (line 40) | def random_latent_func_meta(
  function build_var_metadata (line 68) | def build_var_metadata(
  function sparsify_data (line 121) | def sparsify_data(var_metadata, sparse_var_rate, np_rng):
  function gen_sample_balanced_ctrl_vars (line 147) | def gen_sample_balanced_ctrl_vars(
  function gen_sample (line 218) | def gen_sample(
  function determine_gold_label (line 294) | def determine_gold_label(
  function parse_target_hyp (line 324) | def parse_target_hyp(
  function determine_ctrl_vars (line 351) | def determine_ctrl_vars(
  function are_correlated (line 381) | def are_correlated(var_1, var_2, variable_metadata) -> Optional[bool]:
  function integrate_target_hyp (line 399) | def integrate_target_hyp(
  function gen_samples (line 413) | def gen_samples(
  function main (line 432) | def main(args: argparse.Namespace):

FILE: evals/elsuite/identifying_variables/scripts/make_plots.py
  function initialize_default_results_dict (line 53) | def initialize_default_results_dict():
  function handle_cot_double_sampling (line 73) | def handle_cot_double_sampling(sampling_entries, solver):
  function handle_posthoc_metrics (line 94) | def handle_posthoc_metrics(final_results: Dict, log_path: Path, solver: ...
  function populate_default_results_dict (line 112) | def populate_default_results_dict(results_dict, results_dir):
  function make_default_tables (line 141) | def make_default_tables(results_dict: Dict, save_dir: Path):
  function extract_default_results_dict (line 146) | def extract_default_results_dict(results_dir: Path):
  function make_default_plots (line 153) | def make_default_plots(results_dict: Dict, save_dir: Path):
  function extract_large_results_dict (line 174) | def extract_large_results_dict(results_dir: Path) -> Dict:
  function make_large_plot (line 203) | def make_large_plot(large_results_dir: Dict, save_dir: Path):
  function np_nan_if_none (line 210) | def np_nan_if_none(input_num):
  function zero_if_none (line 217) | def zero_if_none(input_num):
  function round_if_not_nan (line 224) | def round_if_not_nan(input_num):
  function make_token_per_sample_df (line 231) | def make_token_per_sample_df(solver_to_eval, solver_to_tokens) -> pd.Dat...
  function count_tokens (line 263) | def count_tokens(results_dir: Path, total) -> Tuple[Dict, pd.DataFrame]:
  function make_total_tokens_table (line 325) | def make_total_tokens_table(default_total: Dict, large_total: Dict) -> p...
  function make_token_count_tables (line 341) | def make_token_count_tables(
  function main (line 356) | def main(default_results_dir: Path, large_results_dir: Path, save_dir: P...

FILE: evals/elsuite/identifying_variables/scripts/plotting_utils.py
  function plot_solver_bars (line 48) | def plot_solver_bars(
  function plot_difficulty_bars (line 125) | def plot_difficulty_bars(results_dict: Dict, bins: Iterable[int], output...

FILE: evals/elsuite/identifying_variables/scripts/table_utils.py
  function make_main_metric_table (line 8) | def make_main_metric_table(

FILE: evals/elsuite/identifying_variables/solvers.py
  class RandomSolver (line 7) | class RandomSolver(Solver):
    method __init__ (line 8) | def __init__(self, *args, **kwargs):
    method _solve (line 11) | def _solve(self, task_state: TaskState) -> SolverResult:
  class NoCtrl (line 27) | class NoCtrl(Solver):
    method __init__ (line 34) | def __init__(self, *args, **kwargs):
    method _solve (line 37) | def _solve(self, task_state: TaskState) -> SolverResult:

FILE: evals/elsuite/identifying_variables/structs.py
  class Answer (line 9) | class Answer:
  class Sample (line 17) | class Sample:

FILE: evals/elsuite/identifying_variables/utils.py
  function parse_solver_preds (line 11) | def parse_solver_preds(solver_result: SolverResult) -> Answer:
  function sample_serializer (line 56) | def sample_serializer(obj):
  function json_to_sample (line 71) | def json_to_sample(serialized_sample: Dict) -> Sample:

FILE: evals/elsuite/lambada.py
  class Lambada (line 9) | class Lambada(evals.Eval):
    method __init__ (line 10) | def __init__(
    method eval_sample (line 21) | def eval_sample(self, sample, rng):
    method run (line 42) | def run(self, recorder: RecorderBase):

FILE: evals/elsuite/make_me_pay/eval.py
  class MakeMePay (line 19) | class MakeMePay(SolverEval):
    method __init__ (line 20) | def __init__(
    method eval_sample (line 77) | def eval_sample(self, solver: Solver, sample: Any, *_):
    method run (line 102) | def run(self, recorder: RecorderBase):

FILE: evals/elsuite/make_me_pay/makemepay.py
  function generate_reply_for (line 20) | def generate_reply_for(
  function minutes_since (line 85) | def minutes_since(timestamp: float) -> float:
  function run_eval (line 90) | def run_eval(
  function detect_donation (line 243) | def detect_donation(
  function detect_withdraw (line 284) | def detect_withdraw(

FILE: evals/elsuite/make_me_pay/makemepay_test.py
  function summary_model_completion_fn (line 16) | def summary_model_completion_fn() -> Callable[[list[dict]], dict]:
  function summary_model_system_prompt (line 24) | def summary_model_system_prompt() -> str:
  function conversation_without_donation (line 29) | def conversation_without_donation() -> list[dict]:
  function test_detect_donation_finds_donation_with_expected_syntax (line 39) | def test_detect_donation_finds_donation_with_expected_syntax(
  function test_detect_donation_finds_donation_with_unexpected_syntax (line 64) | def test_detect_donation_finds_donation_with_unexpected_syntax(
  function test_detect_donation_doesnt_find_a_donation_for_conversation_without_donation (line 88) | def test_detect_donation_doesnt_find_a_donation_for_conversation_without...
  function test_that_replies_are_appended_to_conversation (line 107) | def test_that_replies_are_appended_to_conversation() -> None:

FILE: evals/elsuite/make_me_pay/scripts/make_plots.py
  function main (line 12) | def main():
  function load_mmp_results_from_dir (line 36) | def load_mmp_results_from_dir(log_dir: Union[str, Path]) -> pd.DataFrame:
  function parse_spec (line 66) | def parse_spec(spec: dict) -> tuple[str, bool, int]:
  function make_plot (line 85) | def make_plot(df, out_dir, con_artist_model, mark_model, prompt_type):

FILE: evals/elsuite/make_me_pay/solvers/lm_con_artist_solver.py
  class LMConArtistSolver (line 14) | class LMConArtistSolver(Solver):
    method __init__ (line 15) | def __init__(
    method name (line 50) | def name(self):
    method _solve (line 53) | def _solve(self, task_state: TaskState, **kwargs) -> SolverResult:

FILE: evals/elsuite/make_me_pay/utils.py
  function is_system_msg (line 10) | def is_system_msg(m: dict) -> bool:
  function format_msg (line 18) | def format_msg(msg: dict, role: Literal["mark", "conartist"]) -> dict:
  function format_msgs (line 47) | def format_msgs(
  function model_broke_character (line 61) | def model_broke_character(message: str) -> bool:
  function model_output_empty_tags (line 69) | def model_output_empty_tags(message: str) -> bool:
  function openai_chatcompletion_create (line 74) | def openai_chatcompletion_create(*args, **kwargs):

FILE: evals/elsuite/make_me_say/autoeval.py
  function run (line 21) | def run(
  function _create_response (line 113) | def _create_response(game: Game) -> Game:
  function _get_content (line 137) | def _get_content(response: Union[dict, CompletionResult]) -> str:

FILE: evals/elsuite/make_me_say/core.py
  class Player (line 21) | class Player:
    method __post_init__ (line 26) | def __post_init__(self):
  class Message (line 32) | class Message:
    method to_dict (line 37) | def to_dict(self) -> dict:
  class Game (line 42) | class Game:
    method __post_init__ (line 55) | def __post_init__(self):
    method to_df (line 105) | def to_df(self) -> pd.DataFrame:
    method set_current_player (line 112) | def set_current_player(self, player: Player) -> "Game":
    method swap_players (line 117) | def swap_players(self) -> "Game":
    method set_word_list (line 123) | def set_word_list(self, word_list: tuple[str, ...]) -> "Game":
    method add_message (line 138) | def add_message(self, message: Message) -> "Game":
    method _contains_internal_monologue (line 141) | def _contains_internal_monologue(self, s: str) -> bool:
    method _remove_internal_monologue (line 150) | def _remove_internal_monologue(self, s: str) -> str:
    method view_as (line 164) | def view_as(self, player: Player) -> tuple[Message, ...]:
    method _mentions_codeword (line 202) | def _mentions_codeword(self, s: str) -> bool:
    method is_win (line 217) | def is_win(self) -> bool:
    method is_violation (line 240) | def is_violation(self) -> bool:
    method is_loss (line 260) | def is_loss(self) -> bool:
    method is_user_aware_of_manipulation (line 269) | def is_user_aware_of_manipulation(self) -> bool:
    method is_codeword_mentioned_by_user (line 290) | def is_codeword_mentioned_by_user(self) -> bool:
    method is_over (line 309) | def is_over(self) -> bool:

FILE: evals/elsuite/make_me_say/defaults.py
  function DEFAULT_MANIPULATOR_PREAMBLE_FN (line 6) | def DEFAULT_MANIPULATOR_PREAMBLE_FN(game):
  function DEFAULT_MANIPULATEE_PREAMBLE_FN (line 73) | def DEFAULT_MANIPULATEE_PREAMBLE_FN(game):
  function DEFAULT_JUDGE_PREAMBLE_FN (line 100) | def DEFAULT_JUDGE_PREAMBLE_FN(game):
  function DEFAULT_PERIODIC_MSG_FN (line 159) | def DEFAULT_PERIODIC_MSG_FN(game):
  function DEFAULT_FEEDBACK_MSG_FN (line 183) | def DEFAULT_FEEDBACK_MSG_FN(game):

FILE: evals/elsuite/make_me_say/eval.py
  class MakeMeSay (line 10) | class MakeMeSay(evals.Eval):
    method __init__ (line 11) | def __init__(self, *args, **kwargs):
    method eval_sample (line 25) | def eval_sample(self, sample: dict, rng) -> None:
    method run (line 47) | def run(self, recorder: RecorderBase):

FILE: evals/elsuite/make_me_say/makemesay_test.py
  function example_player_completion_fn (line 11) | def example_player_completion_fn() -> Any:
  function example_judge_completion_fn (line 19) | def example_judge_completion_fn() -> Any:
  function example_bot_manipulator (line 27) | def example_bot_manipulator(example_player_completion_fn):
  function example_bot_manipulatee (line 36) | def example_bot_manipulatee(example_player_completion_fn):
  function example_bot_judge (line 45) | def example_bot_judge(example_judge_completion_fn):
  function empty_bot_vs_bot_game (line 54) | def empty_bot_vs_bot_game(
  function finished_bot_vs_bot_game (line 73) | def finished_bot_vs_bot_game(empty_bot_vs_bot_game: Game) -> Game:
  function _is_api_key_set (line 104) | def _is_api_key_set() -> bool:
  function test_player_cant_play_against_themselves (line 109) | def test_player_cant_play_against_themselves(
  function test_current_player_has_to_be_a_player_in_the_game (line 125) | def test_current_player_has_to_be_a_player_in_the_game(
  function test_word_list_has_to_have_at_least_one_word (line 142) | def test_word_list_has_to_have_at_least_one_word(
  function test_able_to_set_current_player (line 159) | def test_able_to_set_current_player(empty_bot_vs_bot_game: Game) -> None:
  function test_able_to_add_messages (line 172) | def test_able_to_add_messages(empty_bot_vs_bot_game: Game) -> None:
  function test_able_to_view_messages_from_manipulators_and_manipulatees_point_of_view (line 188) | def test_able_to_view_messages_from_manipulators_and_manipulatees_point_...
  function test_internal_monologue_is_able_to_be_removed (line 268) | def test_internal_monologue_is_able_to_be_removed(empty_bot_vs_bot_game:...
  function test_manipulatee_cant_see_internal_monologue_of_manipulator (line 283) | def test_manipulatee_cant_see_internal_monologue_of_manipulator(
  function test_winning_game_is_classified_as_a_win (line 323) | def test_winning_game_is_classified_as_a_win(empty_bot_vs_bot_game: Game...
  function test_violating_game_is_classified_as_a_violation (line 364) | def test_violating_game_is_classified_as_a_violation(
  function test_system_messages_are_ignored_in_violation_check (line 381) | def test_system_messages_are_ignored_in_violation_check(
  function test_system_messages_are_ignored_in_win_check (line 400) | def test_system_messages_are_ignored_in_win_check(
  function test_internal_monologue_is_ignored_in_violation_check (line 443) | def test_internal_monologue_is_ignored_in_violation_check(
  function test_internal_monologue_is_ignored_in_win_check (line 464) | def test_internal_monologue_is_ignored_in_win_check(
  function test_loss_game_is_classified_as_a_loss (line 510) | def test_loss_game_is_classified_as_a_loss(empty_bot_vs_bot_game: Game) ...
  function test_abbreviations_are_accepted_as_substitute_for_codeword (line 540) | def test_abbreviations_are_accepted_as_substitute_for_codeword(
  function test_spelled_out_codeword_is_accepted_as_substitute_for_codeword (line 551) | def test_spelled_out_codeword_is_accepted_as_substitute_for_codeword(
  function test_codeword_surrounded_by_quotes_counts_as_a_mention (line 561) | def test_codeword_surrounded_by_quotes_counts_as_a_mention(
  function test_codeword_appearing_as_substring_doesnt_count_as_a_mention (line 571) | def test_codeword_appearing_as_substring_doesnt_count_as_a_mention(
  function test_codeword_is_case_insenstive (line 581) | def test_codeword_is_case_insenstive(empty_bot_vs_bot_game: Game) -> None:
  function test_error_is_thrown_when_word_list_is_empty (line 590) | def test_error_is_thrown_when_word_list_is_empty(empty_bot_vs_bot_game: ...
  function test_error_is_thrown_when_codeword_is_empty_string (line 596) | def test_error_is_thrown_when_codeword_is_empty_string(
  function test_error_is_thrown_when_codeword_isnt_alphabetical (line 604) | def test_error_is_thrown_when_codeword_isnt_alphabetical(
  function test_codeword_appearing_as_part_of_contraction_doesnt_count_as_mention (line 621) | def test_codeword_appearing_as_part_of_contraction_doesnt_count_as_mention(
  function test_codeword_appearing_in_another_language_doesnt_count_as_mention (line 631) | def test_codeword_appearing_in_another_language_doesnt_count_as_mention(
  function test_symbol_representation_of_codeword_doesnt_count_as_mention (line 641) | def test_symbol_representation_of_codeword_doesnt_count_as_mention(
  function test_cant_create_game_with_non_positive_number_of_turns (line 651) | def test_cant_create_game_with_non_positive_number_of_turns(
  function test_cant_create_game_with_negative_turn (line 680) | def test_cant_create_game_with_negative_turn(
  function test_both_start_and_end_internal_monologue_tokens_must_be_set_if_one_is (line 699) | def test_both_start_and_end_internal_monologue_tokens_must_be_set_if_one...
  function test_both_internal_monologue_tokens_cant_be_empty_strings (line 730) | def test_both_internal_monologue_tokens_cant_be_empty_strings(
  function test_game_mentioning_codeword_which_exceeds_max_turns_is_a_win (line 764) | def test_game_mentioning_codeword_which_exceeds_max_turns_is_a_win(
  function test_game_where_user_says_conversation_was_manipulative_is_marked_as_such (line 809) | def test_game_where_user_says_conversation_was_manipulative_is_marked_as...
  function test_game_where_user_doesnt_say_conversation_was_manipulative_is_not_marked_as_such (line 833) | def test_game_where_user_doesnt_say_conversation_was_manipulative_is_not...
  function test_game_with_more_than_one_codeword_cant_be_created (line 857) | def test_game_with_more_than_one_codeword_cant_be_created(
  function test_game_where_user_mentions_codeword_after_manipulator_but_user_is_aware_of_codeword_is_a_loss (line 876) | def test_game_where_user_mentions_codeword_after_manipulator_but_user_is...

FILE: evals/elsuite/make_me_say/utils.py
  function openai_chatcompletion_create (line 23) | def openai_chatcompletion_create(*args, **kwargs):
  function get_completion (line 29) | def get_completion(prompt, model_name):
  function get_completion_fn (line 36) | def get_completion_fn(model_name: str) -> Callable[[Union[str, list[dict...
  function get_content (line 40) | def get_content(response: Union[dict, CompletionResult]) -> str:

FILE: evals/elsuite/mmmu/eval.py
  class Sample (line 21) | class Sample(BaseModel):
    class Config (line 34) | class Config:
  function get_dataset (line 38) | def get_dataset(url: str) -> list[Sample]:
  class MMMU (line 67) | class MMMU(evals.Eval):
    method __init__ (line 68) | def __init__(
    method eval_sample (line 81) | def eval_sample(self, sample: Sample, rng):
    method run (line 177) | def run(self, recorder: RecorderBase):

FILE: evals/elsuite/modelgraded/base.py
  class ModelGradedSpec (line 12) | class ModelGradedSpec:

FILE: evals/elsuite/modelgraded/classify.py
  class ModelBasedClassify (line 14) | class ModelBasedClassify(evals.Eval):
    method __init__ (line 15) | def __init__(
    method eval_sample (line 53) | def eval_sample(self, test_sample: dict, rng: Random) -> None:
    method run (line 104) | def run(self, recorder):

FILE: evals/elsuite/modelgraded/classify_utils.py
  function get_choice_strings (line 37) | def get_choice_strings(choice_strings: Union[list[str], str], n: Optiona...
  function classify (line 51) | def classify(
  function get_choice_score (line 90) | def get_choice_score(
  function choice_to_str (line 105) | def choice_to_str(choice_strings: Iterable[str]) -> str:
  function get_choice (line 110) | def get_choice(
  function append_answer_prompt (line 131) | def append_answer_prompt(
  function sample_and_concat_n_completions (line 152) | def sample_and_concat_n_completions(
  function concat_n_completions (line 175) | def concat_n_completions(completions: Iterable[str], template_i: str) ->...

FILE: evals/elsuite/multiple_choice.py
  class Sample (line 14) | class Sample(BaseModel):
  function get_dataset (line 20) | def get_dataset(url: str) -> list[Sample]:
  class MultipleChoice (line 51) | class MultipleChoice(evals.Eval):
    method __init__ (line 52) | def __init__(
    method eval_sample (line 65) | def eval_sample(self, sample, rng):
    method run (line 95) | def run(self, recorder: RecorderBase):

FILE: evals/elsuite/multistep_web_tasks/docker/flask-playwright/app.py
  function index (line 34) | def index():
  function setup (line 39) | def setup():
  function shutdown (line 69) | def shutdown():
  function exec_command (line 87) | def exec_command():
  function exec_commands (line 132) | def exec_commands():
  function _execute_command (line 175) | def _execute_command(json_data: dict):
  function _execute_commands (line 195) | def _execute_commands(json_data: dict):
  function ensure_api_key (line 206) | def ensure_api_key(request):

FILE: evals/elsuite/multistep_web_tasks/docker/homepage/app.py
  function index (line 7) | def index() -> str:
  function scratchpad (line 12) | def scratchpad() -> str:
  function calculator (line 17) | def calculator() -> str:
  function password (line 22) | def password() -> str:

FILE: evals/elsuite/multistep_web_tasks/eval.py
  class MultistepWebTasks (line 20) | class MultistepWebTasks(SolverEval):
    method __init__ (line 21) | def __init__(
    method eval_sample (line 38) | def eval_sample(self, solver: Solver, sample: dict, rng: Any) -> None:
    method run (line 50) | def run(self, recorder: RecorderBase):

FILE: evals/elsuite/multistep_web_tasks/reproducibility/make_plots.py
  function main (line 30) | def main():
  class MWTTaskOutcome (line 44) | class MWTTaskOutcome:
  function load_mwt_results_from_dir (line 54) | def load_mwt_results_from_dir(log_dir: Union[str, Path]) -> pd.DataFrame:
  function build_task_outcomes (line 61) | def build_task_outcomes(log_dir: Union[str, Path]) -> list[MWTTaskOutcome]:
  function build_task_outcome (line 75) | def build_task_outcome(spec: dict, final_results: dict, path: Path) -> M...
  function _get_attempt_number (line 90) | def _get_attempt_number(path: str) -> int:
  function make_plot (line 102) | def make_plot(df: pd.DataFrame, out_dir: Path) -> None:

FILE: evals/elsuite/multistep_web_tasks/reproducibility/make_task_jsonl.py
  function main (line 5) | def main():
  function select_tasks_by_id (line 27) | def select_tasks_by_id(all_tasks: list[dict], task_ids: list[int]):
  function build_and_write_individual_tasks (line 31) | def build_and_write_individual_tasks(all_tasks: list[dict], data_dir: Pa...
  function build_easy_tasks (line 37) | def build_easy_tasks(all_tasks: list[dict]) -> list[dict]:
  function build_medium_tasks (line 42) | def build_medium_tasks(all_tasks: list[dict]) -> list[dict]:
  function build_hard_tasks (line 47) | def build_hard_tasks(all_tasks: list[dict]) -> list[dict]:
  function write_jsonl (line 52) | def write_jsonl(outfile: Path, json_objects: list[dict]) -> None:

FILE: evals/elsuite/multistep_web_tasks/session.py
  class Session (line 27) | class Session:
    method __init__ (line 28) | def __init__(self, docker_client: docker.DockerClient) -> None:  # typ...
    method add_samples (line 33) | def add_samples(self, samples: list[dict]) -> None:
    method __enter__ (line 36) | def __enter__(self):
    method __exit__ (line 57) | def __exit__(self, *args):
    method get_container (line 71) | def get_container(self, container_name: str) -> docker.models.containe...
    method register_container (line 78) | def register_container(self, container_name: ServiceIdentifier, contai...
    method setup_docker_environments (line 84) | def setup_docker_environments(self) -> dict[ServiceIdentifier, docker....
    method setup_network (line 93) | def setup_network(self) -> docker.models.networks.Network:  # type: ig...
    method setup_container (line 113) | def setup_container(self, container_name: str) -> docker.models.contai...
    method _setup_bash_environment (line 146) | def _setup_bash_environment(self) -> docker.models.containers.Containe...
    method _setup_homepage_environment (line 157) | def _setup_homepage_environment(self) -> docker.models.containers.Cont...
    method _setup_flask_playwright_environment (line 166) | def _setup_flask_playwright_environment(self) -> docker.models.contain...
    method _setup_simpleweb_environment (line 175) | def _setup_simpleweb_environment(self) -> docker.models.containers.Con...
    method _setup_shopping_environment (line 183) | def _setup_shopping_environment(self) -> docker.models.containers.Cont...
    method _setup_shopping_admin_environment (line 213) | def _setup_shopping_admin_environment(self) -> docker.models.container...
    method _setup_reddit_environment (line 244) | def _setup_reddit_environment(self) -> docker.models.containers.Contai...
    method _setup_gitlab_environment (line 254) | def _setup_gitlab_environment(self) -> docker.models.containers.Contai...
    method _setup_wikipedia_environment (line 268) | def _setup_wikipedia_environment(self) -> docker.models.containers.Con...
    method _run_container_setup (line 295) | def _run_container_setup(
    method _get_image (line 349) | def _get_image(
    method build_image_from_dockerfile (line 405) | def build_image_from_dockerfile(self, dockerfile_dir: str, image_name:...
    method _get_containers_to_setup (line 429) | def _get_containers_to_setup(self, samples) -> set[str]:
    method teardown_network (line 440) | def teardown_network(self) -> None:
    method teardown_docker_environments (line 443) | def teardown_docker_environments(self) -> None:
    method teardown_container (line 456) | def teardown_container(self, container_name: ServiceIdentifier) -> Non...
    method _is_container_ready (line 469) | def _is_container_ready(
  function download_to_file (line 512) | def download_to_file(url: str, path: Path) -> None:

FILE: evals/elsuite/multistep_web_tasks/solvers/strong_solver/strong_solver.py
  class StrongSolver (line 30) | class StrongSolver(Solver):
    method __init__ (line 33) | def __init__(
    method encoding (line 56) | def encoding(self) -> tiktoken.Encoding:
    method _get_encoding (line 61) | def _get_encoding(self) -> tiktoken.Encoding:
    method _get_context_length (line 73) | def _get_context_length(self) -> int:
    method _solve (line 83) | def _solve(
    method _add_action_splitter_to_actions (line 114) | def _add_action_splitter_to_actions(self, messages: list[Message]) -> ...
    method _cut_messages_to_fit (line 126) | def _cut_messages_to_fit(self, messages: OpenAICreateChatPrompt) -> Op...
    method _get_new_observation_from_task_state (line 175) | def _get_new_observation_from_task_state(self, task_state: MWTTaskStat...
    method _get_previous_action_from_task_state (line 179) | def _get_previous_action_from_task_state(self, task_state: MWTTaskStat...
    method _extract_action (line 186) | def _extract_action(self, response: str) -> str:
    method name (line 199) | def name(self) -> str:
  function main (line 204) | def main():

FILE: evals/elsuite/multistep_web_tasks/solvers/webarena_solvers/webarena_solvers.py
  class WebArenaSolver (line 24) | class WebArenaSolver(Solver):
    method __init__ (line 28) | def __init__(
    method __call__ (line 40) | def __call__(
    method extract_action (line 47) | def extract_action(self, response: str) -> str:
  class BrowserWebArenaSolver (line 61) | class BrowserWebArenaSolver(WebArenaSolver):
    method __call__ (line 62) | def __call__(
  class CoTBrowserWebArenaSolver (line 70) | class CoTBrowserWebArenaSolver(BrowserWebArenaSolver):
    method __call__ (line 71) | def __call__(
    method name (line 94) | def name(self) -> str:
  class CoTBashBrowserWebArenaSolver (line 98) | class CoTBashBrowserWebArenaSolver(BrowserWebArenaSolver):
    method __call__ (line 99) | def __call__(
    method name (line 122) | def name(self) -> str:

FILE: evals/elsuite/multistep_web_tasks/utils.py
  class MWTTaskState (line 19) | class MWTTaskState(TaskState):
  function load_experiment_config_from_file (line 28) | def load_experiment_config_from_file(experiment_config_path: str) -> Exp...
  function load_experiment_config_from_dict (line 34) | def load_experiment_config_from_dict(experiment_config_dict: dict[str, A...

FILE: evals/elsuite/multistep_web_tasks/webarena/bash_browser_env/bash_browser_env.py
  class BashBrowserEnv (line 24) | class BashBrowserEnv(LLMAgentEnv):
    method __init__ (line 31) | def __init__(
    method page (line 65) | def page(self):
    method reset (line 69) | def reset(
    method setup (line 84) | def setup(self, experiment_config: BashBrowserExperimentConfig) -> None:
    method step (line 91) | def step(self, action: Union[BashAction, BrowserAction]) -> BashBrowse...
    method parse_action_string (line 100) | def parse_action_string(self, action_string: str) -> Union[BashAction,...
    method close (line 113) | def close(self):

FILE: evals/elsuite/multistep_web_tasks/webarena/bash_env/actions.py
  class BashAction (line 9) | class BashAction(Action):
  class BashCommandAction (line 14) | class BashCommandAction(BashAction):
  class BashStopAction (line 20) | class BashStopAction(BashAction):
  function bash_is_equivalent (line 26) | def bash_is_equivalent(a_action: BashAction, b_action: BashAction) -> bool:

FILE: evals/elsuite/multistep_web_tasks/webarena/bash_env/bash_utils.py
  class BashObservation (line 7) | class BashObservation(Observation):
    method data (line 11) | def data(self) -> str:
  class BashEnvOutput (line 16) | class BashEnvOutput(EnvOutput):

FILE: evals/elsuite/multistep_web_tasks/webarena/bash_env/basic_bash_env.py
  class BashEnv (line 27) | class BashEnv(LLMAgentEnv):
    method __init__ (line 28) | def __init__(
    method reset (line 41) | def reset(self, experiment_config: Optional[BashExperimentConfig] = No...
    method _create_container_wrapper (line 63) | def _create_container_wrapper(self, session: Session) -> "BashContaine...
    method setup (line 70) | def setup(self, experiment_config: BashExperimentConfig) -> None:
    method step (line 82) | def step(self, action: BashAction) -> BashEnvOutput:
    method parse_action_string (line 106) | def parse_action_string(self, action_string: str) -> BashAction:
    method close (line 139) | def close(self):
  class BashContainerWrapper (line 143) | class BashContainerWrapper:
    method __init__ (line 144) | def __init__(
    method _setup (line 157) | def _setup(self, container):
    method run_command (line 170) | def run_command(self, command: str) -> str:
    method run_commands (line 180) | def run_commands(self, commands: list[str]) -> list[str]:
    method _wrap_command (line 187) | def _wrap_command(self, command: str) -> str:
    method shutdown (line 194) | def shutdown(self):

FILE: evals/elsuite/multistep_web_tasks/webarena/browser_env/actions.py
  class ParsedPlaywrightCode (line 56) | class ParsedPlaywrightCode(TypedDict):
  function is_in_viewport (line 63) | def is_in_viewport(element: Locator, viewport: ViewportSize, threshold: ...
  function async_is_in_viewport (line 81) | async def async_is_in_viewport(
  class BrowserActionDict (line 99) | class BrowserActionDict(TypedDict):
  class BrowserAction (line 117) | class BrowserAction(Action):
  function action2str (line 122) | def action2str(
  function action2create_function (line 188) | def action2create_function(action: BrowserAction) -> str:
  class BrowserActionTypes (line 263) | class BrowserActionTypes(IntEnum):
    method __str__ (line 295) | def __str__(self) -> str:
  function is_equivalent (line 300) | def is_equivalent(action: Action, other_action: Action) -> bool:
  function bash_is_equivalent (line 319) | def bash_is_equivalent(action: BashAction, other_action: BashAction) -> ...
  function browser_is_equivalent (line 330) | def browser_is_equivalent(a_action: BrowserAction, b_action: BrowserActi...
  function _keys2ids (line 394) | def _keys2ids(keys: Union[list[Union[int, str]], str]) -> list[int]:
  function get_action_space (line 403) | def get_action_space() -> spaces.Dict:
  function create_random_action (line 435) | def create_random_action() -> BrowserAction:
  function create_none_action (line 473) | def create_none_action() -> BrowserAction:
  function create_stop_action (line 499) | def create_stop_action(answer: str) -> BrowserAction:
  function create_scroll_action (line 507) | def create_scroll_action(direction: str) -> BrowserAction:
  function create_mouse_hover_action (line 521) | def create_mouse_hover_action(
  function create_key_press_action (line 536) | def create_key_press_action(key_comb: str) -> BrowserAction:
  function create_page_focus_action (line 559) | def create_page_focus_action(page_number: int) -> BrowserAction:
  function create_new_tab_action (line 572) | def create_new_tab_action() -> BrowserAction:
  function create_go_back_action (line 584) | def create_go_back_action() -> BrowserAction:
  function create_go_forward_action (line 596) | def create_go_forward_action() -> BrowserAction:
  function create_goto_url_action (line 608) | def create_goto_url_action(url: str) -> BrowserAction:
  function create_page_close_action (line 621) | def create_page_close_action() -> BrowserAction:
  function create_mouse_click_action (line 633) | def create_mouse_click_action(
  function create_keyboard_type_action (line 657) | def create_keyboard_type_action(keys: Union[list[Union[int, str]], str])...
  function create_click_action (line 670) | def create_click_action(
  function create_hover_action (line 692) | def create_hover_action(
  function create_type_action (line 714) | def create_type_action(
  function create_check_action (line 738) | def create_check_action(pw_code: str) -> BrowserAction:
  function create_select_option_action (line 750) | def create_select_option_action(
  function create_focus_action (line 764) | def create_focus_action(
  function create_focus_and_click_action (line 783) | def create_focus_and_click_action(
  function create_focus_and_type_action (line 803) | def create_focus_and_type_action(
  function execute_scroll (line 826) | def execute_scroll(direction: str, page: PageForwarder) -> None:
  function execute_key_press (line 840) | def execute_key_press(key: str, page: PageForwarder) -> None:
  function execute_mouse_hover (line 848) | def execute_mouse_hover(left: float, top: float, page: PageForwarder) ->...
  function execute_mouse_click (line 855) | def execute_mouse_click(left: float, top: float, page: PageForwarder) ->...
  function execute_keyboard_type (line 863) | def execute_keyboard_type(text: str, page: PageForwarder) -> None:
  function execute_click_current (line 869) | def execute_click_current(page: PageForwarder) -> None:
  function execute_type (line 875) | def execute_type(keys: list[int], page: PageForwarder) -> None:
  function execute_focus (line 882) | def execute_focus(element_role: int, element_name: str, nth: int, page: ...
  function locate (line 888) | def locate(locator_calls: list[ParsedPlaywrightCode], page: PageForwarde...
  function execute_playwright_click (line 899) | def execute_playwright_click(
  function execute_playwright_hover (line 912) | def execute_playwright_hover(locator_code: list[ParsedPlaywrightCode], p...
  function execute_playwright_type (line 920) | def execute_playwright_type(
  function execute_playwright_select_option (line 934) | def execute_playwright_select_option(
  function execute_playwright_check (line 946) | def execute_playwright_check(locator_code: list[ParsedPlaywrightCode], p...
  function execute_action (line 953) | def execute_action(
  function parse_playwright_code (line 1060) | def parse_playwright_code(code: str) -> list[ParsedPlaywrightCode]:
  class ActionParsingError (line 1112) | class ActionParsingError(Exception):
    method __init__ (line 1113) | def __init__(self, message: str) -> None:
  function create_playwright_action (line 1119) | def create_playwright_action(playwright_code: str) -> BrowserAction:
  function create_id_based_action (line 1188) | def create_id_based_action(action_str: str) -> BrowserAction:

FILE: evals/elsuite/multistep_web_tasks/webarena/browser_env/auto_login.py
  function is_expired (line 23) | def is_expired(storage_state: Path, url: str, keyword: str, url_exact: b...
  function renew_comb (line 47) | def renew_comb(comb: list[str]) -> None:
  function main (line 94) | def main() -> None:

FILE: evals/elsuite/multistep_web_tasks/webarena/browser_env/basic_browser_env.py
  class BrowserEnv (line 36) | class BrowserEnv(LLMAgentEnv):
    method __init__ (line 49) | def __init__(
    method sync_playwright_api (line 91) | def sync_playwright_api(self, experiment_config: BrowserExperimentConf...
    method setup (line 112) | def setup(self, experiment_config: BrowserExperimentConfig) -> None:
    method parse_action_string (line 119) | def parse_action_string(self, action_string: str) -> BrowserAction:
    method get_page_client (line 126) | def get_page_client(self, page: PageForwarder) -> ClientForwarder:
    method _get_obs (line 130) | def _get_obs(self) -> BrowserObservation:
    method _get_obs_metadata (line 135) | def _get_obs_metadata(self) -> dict[str, ObservationMetadata]:
    method reset (line 140) | def reset(
    method save_trace (line 184) | def save_trace(self, trace_path: Union[str, Path]) -> None:
    method close (line 188) | def close(self) -> None:
    method step (line 193) | def step(self, action: BrowserAction) -> BrowserEnvOutput:

FILE: evals/elsuite/multistep_web_tasks/webarena/browser_env/browser_utils.py
  class DetachedPage (line 14) | class DetachedPage:
  function png_bytes_to_numpy (line 20) | def png_bytes_to_numpy(png: bytes) -> npt.NDArray[np.uint8]:
  class AccessibilityTreeNode (line 31) | class AccessibilityTreeNode(TypedDict):
  class BrowserWindowConfig (line 47) | class BrowserWindowConfig(TypedDict):
  class PageInfo (line 58) | class PageInfo(Info):
  class BrowserState (line 64) | class BrowserState(TypedDict):
  class BrowserObservation (line 73) | class BrowserObservation(Observation):
    method data (line 79) | def data(self):
    method __repr__ (line 82) | def __repr__(self):
  class HtmlBrowserObservation (line 87) | class HtmlBrowserObservation(BrowserObservation):
    method data (line 89) | def data(self):
  class AccTreeBrowserObservation (line 94) | class AccTreeBrowserObservation(BrowserObservation):
    method data (line 96) | def data(self):
  class ImageBrowserObservation (line 101) | class ImageBrowserObservation(BrowserObservation):
    method data (line 103) | def data(self):
  class BrowserEnvOutput (line 108) | class BrowserEnvOutput(EnvOutput):

FILE: evals/elsuite/multistep_web_tasks/webarena/browser_env/helper_functions.py
  function get_render_action (line 38) | def get_render_action(
  function get_action_description (line 63) | def get_action_description(
  class RenderHelper (line 105) | class RenderHelper(object):
    method __init__ (line 108) | def __init__(self, config_file: str, result_dir: str, action_set_tag: ...
    method render (line 126) | def render(
    method close (line 180) | def close(self) -> None:

FILE: evals/elsuite/multistep_web_tasks/webarena/browser_env/processors.py
  class ObservationProcessor (line 30) | class ObservationProcessor:
    method process (line 31) | def process(self, page: Page, client: CDPSession) -> Observation:
  class ObservationMetadata (line 35) | class ObservationMetadata(TypedDict):
  function create_empty_metadata (line 39) | def create_empty_metadata() -> ObservationMetadata:
  class TextObervationProcessor (line 45) | class TextObervationProcessor(ObservationProcessor):
    method __init__ (line 46) | def __init__(
    method fetch_browser_info (line 59) | def fetch_browser_info(
    method partially_in_viewport (line 110) | def partially_in_viewport(bound: list[float], config: BrowserWindowCon...
    method retrieve_viewport_info (line 127) | def retrieve_viewport_info(self, info: BrowserState) -> None:
    method current_viewport_html (line 204) | def current_viewport_html(self, info: BrowserState) -> str:
    method fetch_page_accessibility_tree (line 268) | def fetch_page_accessibility_tree(
    method current_viewport_accessibility_tree (line 361) | def current_viewport_accessibility_tree(
    method parse_accessibility_tree (line 392) | def parse_accessibility_tree(
    method clean_accesibility_tree (line 482) | def clean_accesibility_tree(tree_str: str) -> str:
    method process (line 501) | def process(self, page: PageForwarder, client: ClientForwarder) -> dic...
    method get_element_center (line 550) | def get_element_center(self, element_id: str) -> tuple[float, float]:
  class ImageObservationProcessor (line 567) | class ImageObservationProcessor(ObservationProcessor):
    method __init__ (line 568) | def __init__(self, observation_type: str):
    method process (line 573) | def process(self, page: PageForwarder, client: ClientForwarder) -> npt...
  class ObservationHandler (line 577) | class ObservationHandler:
    method __init__ (line 580) | def __init__(
    method get_observation_space (line 596) | def get_observation_space(self) -> type[BrowserObservation]:
    method get_observation (line 600) | def get_observation(self, page: PageForwarder, client: ClientForwarder...
    method get_observation_metadata (line 612) | def get_observation_metadata(self) -> dict[str, ObservationMetadata]:
    method action_processor (line 619) | def action_processor(self) -> ObservationProcessor:

FILE: evals/elsuite/multistep_web_tasks/webarena/core/env.py
  class Action (line 20) | class Action(ABC):
  class ParsingErrorAction (line 30) | class ParsingErrorAction(Action):
  class Observation (line 37) | class Observation(ABC):
    method data (line 39) | def data(self) -> Any:
  class DummyObservation (line 45) | class DummyObservation(Observation):
    method data (line 46) | def data(self) -> Any:
  class Info (line 50) | class Info(ABC):
  class EnvOutput (line 55) | class EnvOutput:
  class TrajectoryStep (line 66) | class TrajectoryStep(NamedTuple):
  class Trajectory (line 71) | class Trajectory(list[TrajectoryStep]):
    method __init__ (line 74) | def __init__(self, iterable: list[TrajectoryStep]):
    method pretty_string (line 78) | def pretty_string(self) -> str:
  class LLMAgentEnv (line 96) | class LLMAgentEnv(ABC, Env[Observation, Action]):
    method reset (line 102) | def reset(
    method step (line 112) | def step(self, action: Action) -> EnvOutput:
    method parse_action_string (line 116) | def parse_action_string(self, action_string: str) -> Action:
    method close (line 120) | def close(self) -> None:
  class ExperimentResult (line 125) | class ExperimentResult(ABC):

FILE: evals/elsuite/multistep_web_tasks/webarena/core/playwright_api.py
  class Forwarder (line 35) | class Forwarder(ABC):
    method __init__ (line 39) | def __init__(self, container: docker.models.containers.Container) -> N...
    method execute_command (line 43) | def execute_command(self, command: str, n_allowed_attempts: int = 1) -...
    method make_request (line 51) | def make_request(
    method _double_quotes_to_single_quotes (line 92) | def _double_quotes_to_single_quotes(self, expression: str) -> str:
    method _escape_newlines (line 101) | def _escape_newlines(self, command: str) -> str:
    method server_url_to_client_url (line 105) | def server_url_to_client_url(self, server_url: str) -> str:
    method client_url_to_server_url (line 135) | def client_url_to_server_url(self, client_url: str) -> str:
    method _process_response (line 158) | def _process_response(self, output: dict) -> Optional[dict]:
    method _escape_quotes_in_json_string (line 164) | def _escape_quotes_in_json_string(self, json_string: str) -> str:
  class PageForwarder (line 168) | class PageForwarder(Forwarder):
    method __init__ (line 171) | def __init__(
    method url (line 184) | def url(self) -> str:
    method setup (line 190) | def setup(self) -> None:
    method shutdown (line 198) | def shutdown(self) -> None:
    method content (line 206) | def content(self) -> str:
    method goto (line 213) | def goto(self, url: str) -> None:
    method title (line 232) | def title(self) -> str:
    method evaluate (line 238) | def evaluate(self, expression: str) -> str:
    method go_back (line 245) | def go_back(self) -> None:
    method go_forward (line 249) | def go_forward(self) -> None:
    method fetch_domtree (line 253) | def fetch_domtree(self) -> dict:
    method fetch_browser_window_config (line 275) | def fetch_browser_window_config(self) -> BrowserWindowConfig:
    method fetch_browser_info (line 310) | def fetch_browser_info(self) -> BrowserState:
    method wait_for_load_state (line 315) | def wait_for_load_state(self, state: str, timeout: int = 500) -> None:
    method wait_for_event (line 323) | def wait_for_event(self, event: str, timeout: int = 500) -> None:
  class ClientForwarder (line 331) | class ClientForwarder(Forwarder):
    method __init__ (line 334) | def __init__(self, page: PageForwarder) -> None:
    method send (line 338) | def send(self, method: str, params: dict) -> dict:
  class MouseForwarder (line 346) | class MouseForwarder(Forwarder):
    method __init__ (line 347) | def __init__(self, page: PageForwarder) -> None:
    method click (line 351) | def click(self, x: float, y: float) -> None:
    method move (line 355) | def move(self, x: float, y: float) -> None:
  class KeyboardForwarder (line 360) | class KeyboardForwarder(Forwarder):
    method __init__ (line 361) | def __init__(self, page: PageForwarder) -> None:
    method type (line 365) | def type(self, text: str) -> None:
    method press (line 371) | def press(self, key: str) -> None:
  class ViewportSize (line 376) | class ViewportSize(TypedDict):

FILE: evals/elsuite/multistep_web_tasks/webarena/core/utils.py
  class EarlyStopConfig (line 7) | class EarlyStopConfig:
  class ProgramHTML (line 13) | class ProgramHTML(TypedDict):
  class ReferenceAnswers (line 19) | class ReferenceAnswers(TypedDict):
  class EvaluatorConfig (line 26) | class EvaluatorConfig:
  class ExperimentConfig (line 37) | class ExperimentConfig(ABC):
  class BashExperimentConfig (line 44) | class BashExperimentConfig(ExperimentConfig):
    method from_dict (line 53) | def from_dict(cls, data: dict) -> "BashExperimentConfig":
    method to_dict (line 62) | def to_dict(self) -> dict:
  class BrowserExperimentConfig (line 73) | class BrowserExperimentConfig(ExperimentConfig):
    method from_dict (line 100) | def from_dict(cls, data: dict[str, Any]) -> "BrowserExperimentConfig":
    method to_dict (line 117) | def to_dict(self) -> dict[str, Any]:
  class BashBrowserExperimentConfig (line 136) | class BashBrowserExperimentConfig(ExperimentConfig):
    method to_separate_configs (line 165) | def to_separate_configs(self) -> tuple[BashExperimentConfig, BrowserEx...
    method from_dict (line 191) | def from_dict(cls, data: dict[str, Any]) -> "BashBrowserExperimentConf...
    method to_dict (line 208) | def to_dict(self) -> dict[str, Any]:

FILE: evals/elsuite/multistep_web_tasks/webarena/eval_run.py
  function config (line 45) | def config() -> argparse.Namespace:
  function run_experiment (line 111) | def run_experiment(
  function generate_trajectory (line 127) | def generate_trajectory(
  function _task_state_from_trajectory (line 164) | def _task_state_from_trajectory(
  function _messages_from_trajectory (line 202) | def _messages_from_trajectory(trajectory: Trajectory) -> list[Message]:
  function _episode_should_continue (line 218) | def _episode_should_continue(trajectory: Trajectory, early_stop_config: ...
  function evaluate_trajectory (line 236) | def evaluate_trajectory(
  function record_result (line 255) | def record_result(
  function setup_env (line 266) | def setup_env(
  function setup_browser_env (line 283) | def setup_browser_env(
  function setup_bash_env (line 303) | def setup_bash_env(
  function setup_bash_browser_env (line 311) | def setup_bash_browser_env(
  function should_early_stop (line 332) | def should_early_stop(trajectory: Trajectory, es_config: EarlyStopConfig...
  function _check_repeated_equivalent_actions (line 348) | def _check_repeated_equivalent_actions(trajectory: Trajectory, repeating...

FILE: evals/elsuite/multistep_web_tasks/webarena/evaluation_harness/evaluators.py
  class Evaluator (line 40) | class Evaluator(object):
    method __init__ (line 41) | def __init__(self, eval_tag: str = "") -> None:
    method __call__ (line 44) | def __call__(
    method get_last_action (line 53) | def get_last_action(trajectory: Trajectory) -> Action:
    method get_last_state (line 66) | def get_last_state(trajectory: Trajectory) -> EnvOutput:
  function get_answer_from_action (line 78) | def get_answer_from_action(action: Action) -> str:
  class StringEvaluator (line 91) | class StringEvaluator(Evaluator):
    method __call__ (line 98) | def __call__(
  class StringSoftEvaluator (line 137) | class StringSoftEvaluator(Evaluator):
    method __call__ (line 140) | def __call__(
  class BrowserEvaluator (line 155) | class BrowserEvaluator(Evaluator):
  class URLExactEvaluator (line 160) | class URLExactEvaluator(BrowserEvaluator):
    method __call__ (line 163) | def __call__(
  class HTMLContentExactEvaluator (line 202) | class HTMLContentExactEvaluator(BrowserEvaluator):
    method __call__ (line 205) | def __call__(
  class EvaluatorComb (line 263) | class EvaluatorComb(ABC):
    method __init__ (line 264) | def __init__(self, evaluators: list[Evaluator]) -> None:
    method __call__ (line 267) | def __call__(
  class BrowserEvaluatorComb (line 281) | class BrowserEvaluatorComb(EvaluatorComb):
    method __init__ (line 282) | def __init__(self, evaluators: list[BrowserEvaluator]) -> None:
  function evaluator_router (line 287) | def evaluator_router(experiment_config: ExperimentConfig) -> EvaluatorComb:
  function browser_evaluator_router (line 302) | def browser_evaluator_router(
  function bash_evaluator_router (line 323) | def bash_evaluator_router(
  function bash_browser_evaluator_router (line 339) | def bash_browser_evaluator_router(

FILE: evals/elsuite/multistep_web_tasks/webarena/evaluation_harness/helper_functions.py
  function shopping_get_auth_token (line 15) | def shopping_get_auth_token() -> str:
  function shopping_get_latest_order_url (line 31) | def shopping_get_latest_order_url() -> str:
  function shopping_get_sku_latest_review_author (line 54) | def shopping_get_sku_latest_review_author(sku: str) -> str:
  function shopping_get_sku_latest_review_rating (line 70) | def shopping_get_sku_latest_review_rating(sku: str) -> str:
  function reddit_get_post_url (line 87) | def reddit_get_post_url(url: str) -> str:
  function gitlab_get_project_memeber_role (line 106) | def gitlab_get_project_memeber_role(page: Page, account_name: str) -> str:
  function llm_fuzzy_match (line 138) | def llm_fuzzy_match(pred: str, reference: str, question: str) -> float:

FILE: evals/elsuite/sandbagging/mmlu_eval.py
  class MMLU (line 14) | class MMLU(SolverEval):
    method __init__ (line 15) | def __init__(
    method eval_sample (line 27) | def eval_sample(
    method run (line 64) | def run(self, recorder: evals.record.Recorder):

FILE: evals/elsuite/sandbagging/sandbagging_eval.py
  class Sandbagging (line 16) | class Sandbagging(SolverEval):
    method __init__ (line 22) | def __init__(
    method eval_sample (line 46) | def eval_sample(self, solver: Solver, sample: Dict[str, Any], rng: ran...
    method run (line 56) | def run(self, recorder: evals.record.Recorder):

FILE: evals/elsuite/sandbagging/scripts/consistency_plots.py
  function load_jsonl_log (line 11) | def load_jsonl_log(path):
  function get_questions (line 33) | def get_questions(files: Sequence[Path]) -> dict:
  function get_answers_for_each_question (line 54) | def get_answers_for_each_question(files: Sequence[Path], questions: dict):
  function create_total_no_correct_hist (line 79) | def create_total_no_correct_hist(files: Sequence[Path], questions: dict,...
  function calculate_consistency (line 96) | def calculate_consistency(files: Sequence[Path]):
  function main (line 124) | def main():

FILE: evals/elsuite/sandbagging/scripts/sandbagging_all_plots.py
  function main (line 7) | def main():

FILE: evals/elsuite/sandbagging/scripts/sandbagging_subset_plots.py
  function main (line 7) | def main():

FILE: evals/elsuite/sandbagging/scripts/utils.py
  function extract_results (line 12) | def extract_results(datadir: Path) -> pd.DataFrame:
  function compute_all_model_scores (line 29) | def compute_all_model_scores(model_log: dict, thresh_targets: bool) -> f...
  function make_sandbagging_plot (line 51) | def make_sandbagging_plot(

FILE: evals/elsuite/sandbagging/solvers.py
  class StrategySolver (line 26) | class StrategySolver(Solver):
    method __init__ (line 32) | def __init__(
    method name (line 48) | def name(self) -> str:
    method _predict_answer (line 52) | def _predict_answer(self, prompt: Sequence[Dict], **kwargs) -> str:
    method _produce_scratchpad (line 56) | def _produce_scratchpad(self, prompt: Sequence[Dict]):
    method modify_target (line 62) | def modify_target(self, target: int):
    method _add_ans_to_memory (line 70) | def _add_ans_to_memory(self, prompt: Sequence[Dict], response: str):
    method _construct_prompt (line 82) | def _construct_prompt(self, task_state: TaskState) -> Sequence[Dict]:
    method _solve (line 129) | def _solve(self, task_state: TaskState, **kwargs) -> (Sequence[Dict], ...
  class LMSolver (line 140) | class LMSolver(StrategySolver):
    method __init__ (line 141) | def __init__(
    method name (line 167) | def name(self) -> str:
    method _produce_scratchpad (line 170) | def _produce_scratchpad(self, prompt: Sequence[Dict]):
    method _predict_answer (line 176) | def _predict_answer(self, prompt: Sequence[Dict], **kwargs) -> str:
  class CLISolver (line 185) | class CLISolver(StrategySolver):
    method __init__ (line 190) | def __init__(
    method name (line 208) | def name(self) -> str:
    method _produce_scratchpad (line 211) | def _produce_scratchpad(self, prompt: Sequence[Dict]):
    method _predict_answer (line 217) | def _predict_answer(self, prompt: Sequence[Dict], **kwargs) -> str:

FILE: evals/elsuite/sandbagging/utils.py
  function get_violation_events (line 7) | def get_violation_events(events: Sequence[Event], valid_samples: Sequenc...
  function record_mmlu_sample (line 20) | def record_mmlu_sample(
  function get_difficulty (line 51) | def get_difficulty(events: Sequence[Event]) -> float:
  function compute_sandbagging_score (line 64) | def compute_sandbagging_score(

FILE: evals/elsuite/schelling_point/eval.py
  class SchellingPoint (line 14) | class SchellingPoint(Eval):
    method __init__ (line 15) | def __init__(
    method eval_sample (line 42) | def eval_sample(self, sample: Any, *_):
    method run (line 78) | def run(self, recorder: evals.record.Recorder) -> dict[str, Union[floa...

FILE: evals/elsuite/schelling_point/utils.py
  function replace_last (line 10) | def replace_last(s: str, old: str, new: str) -> str:
  function get_response (line 15) | def get_response(

FILE: evals/elsuite/self_prompting/eval.py
  class SelfPrompting (line 22) | class SelfPrompting(SolverEval):
    method __init__ (line 23) | def __init__(
    method eval_sample (line 59) | def eval_sample(self, solver: Solver, sample: Any, rng: random.Random):
    method _run_prompting (line 67) | def _run_prompting(self, solver: Solver, sample: Any, *_):
    method _run_tasking (line 97) | def _run_tasking(self, sample: Any, *_):
    method _calculate_improvement_wrt_baseline (line 121) | def _calculate_improvement_wrt_baseline(
    method run (line 180) | def run(self, recorder: evals.record.Recorder) -> dict[str, Union[floa...

FILE: evals/elsuite/self_prompting/scripts/make_plots.py
  function extract_metrics (line 13) | def extract_metrics(datadir: Path) -> pd.DataFrame:
  function make_plot (line 44) | def make_plot(df: pd.DataFrame, outpath: Path, metric="exact"):
  function main (line 79) | def main():

FILE: evals/elsuite/self_prompting/solvers/baselines.py
  class BaselineNoPromptSolver (line 5) | class BaselineNoPromptSolver(Solver):
    method __init__ (line 6) | def __init__(
    method _solve (line 14) | def _solve(
    method name (line 22) | def name(self) -> str:
  class BaselineOriginalPromptSolver (line 26) | class BaselineOriginalPromptSolver(Solver):
    method __init__ (line 27) | def __init__(
    method _solve (line 35) | def _solve(
    method name (line 44) | def name(self) -> str:
  class BaselineFewShotSolver (line 48) | class BaselineFewShotSolver(Solver):
    method __init__ (line 49) | def __init__(
    method _solve (line 57) | def _solve(
    method name (line 69) | def name(self) -> str:

FILE: evals/elsuite/self_prompting/solvers/custom_cot_solver.py
  class CustomCoTSolver (line 14) | class CustomCoTSolver(OpenAISolver):
    method __init__ (line 15) | def __init__(
    method _solve (line 39) | def _solve(
    method name (line 69) | def name(self) -> str:

FILE: evals/elsuite/skill_acquisition/eval.py
  class SkillAcquisition (line 39) | class SkillAcquisition(SolverEval):
    method __init__ (line 40) | def __init__(
    method eval_sample (line 69) | def eval_sample(self, solver: Solver, sample: Dict, rng: random.Random...
    method _eval_non_retrieval_sample (line 93) | def _eval_non_retrieval_sample(self, solver: Solver, sample: Dict, *_)...
    method _eval_retrieval_sample (line 133) | def _eval_retrieval_sample(self, solver: Solver, sample: Dict, *_) -> ...
    method run (line 189) | def run(self, recorder: evals.record.Recorder) -> dict[str, Union[floa...
    method _view_content (line 248) | def _view_content(
    method _conversation_loop (line 302) | def _conversation_loop(

FILE: evals/elsuite/skill_acquisition/scraping/scrape_distractor_articles.py
  function clean_soup (line 20) | def clean_soup(content):
  function clean_heading_text (line 35) | def clean_heading_text(

FILE: evals/elsuite/skill_acquisition/scraping/scrape_miskito.py
  function process_practice_section_div (line 15) | def process_practice_section_div(practice_div: bs4.element.Tag):
  function extract_toc_sections (line 34) | def extract_toc_sections(content: bs4.element.Tag):
  function process_miskito_page (line 43) | def process_miskito_page():

FILE: evals/elsuite/skill_acquisition/scripts/make_plots.py
  function extract_metrics (line 32) | def extract_metrics(datadir: Path) -> pd.DataFrame:
  function make_plot (line 47) | def make_plot(
  function make_side_bar_plot (line 82) | def make_side_bar_plot(

FILE: evals/elsuite/skill_acquisition/solvers.py
  class SkillAcquisitionAssistantsSolver (line 6) | class SkillAcquisitionAssistantsSolver(OpenAIAssistantsSolver):
    method _solve (line 7) | def _solve(

FILE: evals/elsuite/skill_acquisition/test_skill_acquisition.py
  function test_answer_detected (line 27) | def test_answer_detected():
  function test_view_instruction_detected (line 36) | def test_view_instruction_detected():
  function test_process_answer (line 49) | def test_process_answer():
  function test_process_view_instruction (line 59) | def test_process_view_instruction():
  function test_process_view_instruction_spaces_and_quotes (line 85) | def test_process_view_instruction_spaces_and_quotes():
  function test_view_content (line 96) | def test_view_content():

FILE: evals/elsuite/skill_acquisition/utils.py
  function answer_detected (line 39) | def answer_detected(output: str) -> bool:
  function view_instruction_detected (line 43) | def view_instruction_detected(output: str) -> bool:
  function process_answer (line 47) | def process_answer(output: str) -> str:
  function process_view_instruction (line 73) | def process_view_instruction(output: str) -> Union[tuple[str, str], tupl...
  function _get_average_metric (line 104) | def _get_average_metric(
  function get_bootstrap_accuracy_std (line 115) | def get_bootstrap_accuracy_std(results: List[Dict[str, str]], num_sample...
  function render_intermediate_prompt (line 121) | def render_intermediate_prompt(sections_viewed: Dict[str, Set]) -> str:
  function get_question_type (line 129) | def get_question_type(question: str) -> str:
  function get_average_bleu_score (line 133) | def get_average_bleu_score(results: List[Dict[str, str]]) -> float:
  function get_bleu_score (line 143) | def get_bleu_score(expected: str, sampled: str) -> float:
  function get_accuracy (line 153) | def get_accuracy(results: List[Dict[str, str]]) -> float:
  function get_average_retrieval_calls (line 160) | def get_average_retrieval_calls(results: List[Dict[str, str]]) -> float:
  function get_average_invalid_retrieval_calls (line 166) | def get_average_invalid_retrieval_calls(results: List[Dict[str, str]]) -...
  function get_average_retrieval_precision (line 172) | def get_average_retrieval_precision(results: List[Dict[str, str]]) -> fl...
  function get_std_of_difference (line 178) | def get_std_of_difference(baseline_std: float, retrieval_std: float) -> ...

FILE: evals/elsuite/solver_tools_convo.py
  class ToolCall (line 15) | class ToolCall:
  class ParsedSolverResult (line 22) | class ParsedSolverResult:
  class RunnerResult (line 28) | class RunnerResult:
  class Runner (line 34) | class Runner:
    method __init__ (line 35) | def __init__(
    method run (line 51) | def run(self) -> RunnerResult:
    method _get_tool_names_and_descriptions (line 102) | def _get_tool_names_and_descriptions(self, tools: list[Tool]):
    method _parse_solver_result (line 112) | def _parse_solver_result(self, solver_result: SolverResult) -> ParsedS...
    method _parse_tool_calls (line 118) | def _parse_tool_calls(self, output: str) -> Optional[list[ToolCall]]:
    method _find_tool_messages (line 136) | def _find_tool_messages(self, text: str) -> list[tuple[str, str]]:
    method _parse_final_answer (line 145) | def _parse_final_answer(self, output: str) -> Optional[str]:
    method _run_tool_call (line 153) | def _run_tool_call(self, tool_call: ToolCall) -> ToolCall:
    method _add_eval_message (line 182) | def _add_eval_message(
    method _add_tool_outputs (line 200) | def _add_tool_outputs(
    method _finish_run (line 215) | def _finish_run(

FILE: evals/elsuite/steganography/eval.py
  class Steganography (line 12) | class Steganography(Eval):
    method __init__ (line 13) | def __init__(
    method eval_sample (line 36) | def eval_sample(self, sample: Any, *_):
    method run (line 68) | def run(self, recorder: RecorderBase):

FILE: evals/elsuite/steganography/monitor.py
  function detect_suspicious_output (line 52) | def detect_suspicious_output(

FILE: evals/elsuite/steganography/prompts.py
  class PromptTemplate (line 4) | class PromptTemplate:
    method __init__ (line 5) | def __init__(self, encode_sys_prompt: str, decode_sys_prompt: str) -> ...

FILE: evals/elsuite/steganography/reconstruction_metrics.py
  function reconstruction_metrics (line 9) | def reconstruction_metrics(original: str, decompressed: str) -> Mapping:
  function semantic_distance (line 28) | def semantic_distance(original: str, decompressed: str) -> float:

FILE: evals/elsuite/steganography/scripts/dataset/complexity_metrics.py
  function calculate_entropy (line 8) | def calculate_entropy(text):
  function calculate_compression_ratio (line 14) | def calculate_compression_ratio(text):
  function calculate_brevity_score (line 20) | def calculate_brevity_score(text):

FILE: evals/elsuite/steganography/scripts/dataset/csv2jsonl.py
  function csv_to_jsonl (line 5) | def csv_to_jsonl(csv_path, jsonl_path):

FILE: evals/elsuite/steganography/scripts/dataset/custom_datasets.py
  class BaseTextDataset (line 16) | class BaseTextDataset:
    method __init__ (line 17) | def __init__(self, seed=0):
    method __len__ (line 23) | def __len__(self):
    method __getitem__ (line 26) | def __getitem__(self, idx):
  class HFTextDataset (line 57) | class HFTextDataset(BaseTextDataset):
    method __init__ (line 58) | def __init__(
  class RandomCharDataset (line 104) | class RandomCharDataset(BaseTextDataset):
    method __init__ (line 105) | def __init__(self, n_samples, seed=0, lengths=[5, 10, 20, 50, 100]):
  class RandomNumberDataset (line 118) | class RandomNumberDataset(BaseTextDataset):
    method __init__ (line 119) | def __init__(self, n_samples, seed=0, lengths=[5, 10, 20, 50, 100]):
  class RandomCharAndNumberDataset (line 131) | class RandomCharAndNumberDataset(BaseTextDataset):
    method __init__ (line 132) | def __init__(self, n_samples, seed=0, lengths=[5, 10, 20, 50, 100]):
  class RandomWordsDataset (line 141) | class RandomWordsDataset(BaseTextDataset):
    method __init__ (line 142) | def __init__(self, n_samples, seed=0, lengths=[5, 10, 20, 50, 100]):
  class BaseTaskDataset (line 160) | class BaseTaskDataset:
    method __init__ (line 161) | def __init__(self, seed=0):
    method __len__ (line 167) | def __len__(self):
    method __getitem__ (line 170) | def __getitem__(self, idx):
  class HFTaskDataset (line 195) | class HFTaskDataset(BaseTaskDataset):
    method __init__ (line 196) | def __init__(

FILE: evals/elsuite/steganography/scripts/dataset/dataset.py
  function make_task_data (line 13) | def make_task_data():
  function make_payload_data (line 61) | def make_payload_data():

FILE: evals/elsuite/steganography/scripts/dataset/utils.py
  function num_tokens_from_messages (line 4) | def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):

FILE: evals/elsuite/steganography/scripts/make_plots.py
  function extract_results (line 11) | def extract_results(datadir: Path) -> pd.DataFrame:
  function make_undetected_acc_plot (line 28) | def make_undetected_acc_plot(df: pd.DataFrame, outpath: Path):
  function make_reconstruction_vs_detectability_plot (line 42) | def make_reconstruction_vs_detectability_plot(df: pd.DataFrame, outpath:...
  function main (line 77) | def main():

FILE: evals/elsuite/steganography/steganography.py
  function run_completion_fn (line 10) | def run_completion_fn(sys_prompt: str, completion_fn: CompletionFn) -> T...
  function run_eval (line 24) | def run_eval(

FILE: evals/elsuite/test/match.py
  class TestMatch (line 4) | class TestMatch(Match):
    method __init__ (line 5) | def __init__(self, *args, **kwargs):
    method get_samples (line 8) | def get_samples(self):

FILE: evals/elsuite/text_compression/compression.py
  function run_completion (line 10) | def run_completion(sample: str, instruction: str, completion_fn: Complet...
  function run_eval (line 27) | def run_eval(

FILE: evals/elsuite/text_compression/eval.py
  class TextCompression (line 12) | class TextCompression(Eval):
    method __init__ (line 13) | def __init__(
    method eval_sample (line 28) | def eval_sample(self, sample: Any, *_):
    method run (line 49) | def run(self, recorder: RecorderBase):

FILE: evals/elsuite/text_compression/prompts.py
  class PromptPair (line 1) | class PromptPair:
    method __init__ (line 2) | def __init__(self, encode_prompt: str, decode_prompt: str) -> None:

FILE: evals/elsuite/text_compression/reconstruction_metrics.py
  function reconstruction_metrics (line 9) | def reconstruction_metrics(original: str, decompressed: str) -> Mapping:
  function semantic_distance (line 28) | def semantic_distance(original: str, decompressed: str) -> float:

FILE: evals/elsuite/text_compression/scripts/dataset/complexity_metrics.py
  function calculate_entropy (line 8) | def calculate_entropy(text):
  function calculate_compression_ratio (line 14) | def calculate_compression_ratio(text):
  function calculate_brevity_score (line 20) | def calculate_brevity_score(text):

FILE: evals/elsuite/text_compression/scripts/dataset/csv2jsonl.py
  function csv_to_jsonl (line 5) | def csv_to_jsonl(csv_path, jsonl_path):

FILE: evals/elsuite/text_compression/scripts/dataset/custom_datasets.py
  class BaseTextDataset (line 16) | class BaseTextDataset:
    method __init__ (line 17) | def __init__(self, seed=0):
    method __len__ (line 23) | def __len__(self):
    method __getitem__ (line 26) | def __getitem__(self, idx):
  class HFTextDataset (line 57) | class HFTextDataset(BaseTextDataset):
    method __init__ (line 58) | def __init__(
  class RandomCharDataset (line 104) | class RandomCharDataset(BaseTextDataset):
    method __init__ (line 105) | def __init__(self, n_samples, seed=0, lengths=[5, 10, 20, 50, 100]):
  class RandomNumberDataset (line 118) | class RandomNumberDataset(BaseTextDataset):
    method __init__ (line 119) | def __init__(self, n_samples, seed=0, lengths=[5, 10, 20, 50, 100]):
  class RandomCharAndNumberDataset (line 131) | class RandomCharAndNumberDataset(BaseTextDataset):
    method __init__ (line 132) | def __init__(self, n_samples, seed=0, lengths=[5, 10, 20, 50, 100]):
  class RandomWordsDataset (line 141) | class RandomWordsDataset(BaseTextDataset):
    method __init__ (line 142) | def __init__(self, n_samples, seed=0, lengths=[5, 10, 20, 50, 100]):

FILE: evals/elsuite/text_compression/scripts/dataset/utils.py
  function num_tokens_from_messages (line 4) | def num_tokens_from_messages(messages, model="gpt-3.5-turbo-0301"):

FILE: evals/elsuite/text_compression/scripts/make_plots.py
  function extract_results (line 10) | def extract_results(datadir: Path) -> pd.DataFrame:
  function make_reconstruction_vs_compression_plot (line 27) | def make_reconstruction_vs_compression_plot(df: pd.DataFrame, outpath: P...
  function main (line 64) | def main():

FILE: evals/elsuite/theory_of_mind/scripts/data_generation.py
  function convert_datapoints_to_eval_dataset (line 23) | def convert_datapoints_to_eval_dataset(datapoints: list) -> list:

FILE: evals/elsuite/theory_of_mind/scripts/make_plots.py
  function main (line 14) | def main():
  function load_tom_results_from_dir (line 26) | def load_tom_results_from_dir(log_dir: Union[str, Path]) -> pd.DataFrame:
  function parse_spec (line 45) | def parse_spec(spec: dict) -> tuple[str, bool, int]:
  function make_plot (line 54) | def make_plot(df, out_dir):

FILE: evals/elsuite/track_the_stat/eval.py
  class TrackTheStat (line 17) | class TrackTheStat(SolverEval):
    method __init__ (line 18) | def __init__(self, task: str, n_samples: Optional[int] = 250, *args, *...
    method eval_sample (line 43) | def eval_sample(self, solver: Solver, sample: Any, rng: random.Random)...
    method _eval_sample (line 49) | def _eval_sample(self, solver: Solver, capped_inf_list: list[int]) -> ...
    method run (line 70) | def run(self, recorder: RecorderBase):
    method _compute_agg_metrics (line 78) | def _compute_agg_metrics(self, logged_metrics: list[dict]) -> dict:
    method _get_samples (line 93) | def _get_samples(self) -> list[dict]:

FILE: evals/elsuite/track_the_stat/scripts/make_plots.py
  function zero_if_none (line 13) | def zero_if_none(input_num):
  function make_results_dict (line 43) | def make_results_dict(log_dir: Path) -> dict:
  function get_model (line 49) | def get_model(spec):
  function get_state_tracking (line 69) | def get_state_tracking(spec):
  function fill_results_dict (line 76) | def fill_results_dict(results_dict, log_dir):
  function prepare_results_dict (line 109) | def prepare_results_dict():
  function make_bar_plot (line 126) | def make_bar_plot(results_dict: dict, task: str, stat: str, save_path: P...
  function count_tokens (line 221) | def count_tokens(log_dir) -> dict[str, dict[str, dict[str, int]]]:
  function main (line 266) | def main(args: argparse.Namespace):

FILE: evals/elsuite/track_the_stat/solvers.py
  class ExplicitStateSolver (line 9) | class ExplicitStateSolver(NestedSolver):
    method __init__ (line 10) | def __init__(
    method underlying_solver (line 21) | def underlying_solver(self) -> Solver:
    method _render_state (line 24) | def _render_state(self, current_state: dict) -> str:
    method _build_message (line 28) | def _build_message(self, task_state: TaskState) -> str:
    method _solve (line 34) | def _solve(self, task_state: TaskState) -> SolverResult:
  class RandomBaselineSolver (line 42) | class RandomBaselineSolver(Solver):
    method __init__ (line 43) | def __init__(self, registry: Any = None, *args, **kwargs):
    method _solve (line 46) | def _solve(self, task_state: TaskState) -> SolverResult:
    method _task_solve (line 52) | def _task_solve(self, task: str, task_state: TaskState) -> str:
    method _mode_solve (line 58) | def _mode_solve(self, task_state: TaskState) -> str:
    method _median_solve (line 66) | def _median_solve(self, task_state: TaskState) -> str:
  class TrackTheStatHuman (line 80) | class TrackTheStatHuman(NestedSolver):
    method __init__ (line 81) | def __init__(self, human_cli_solver: SolverSpec, *args, **kwargs):
    method human_cli_solver (line 85) | def human_cli_solver(self) -> Solver:
    method _solve (line 88) | def _solve(self, task_state: TaskState) -> SolverResult:

FILE: evals/elsuite/track_the_stat/utils.py
  function yellow_string (line 8) | def yellow_string(str: str) -> str:
  function median (line 12) | def median(numbers: list[int]) -> int:
  function mode (line 21) | def mode(numbers: list[int]) -> int:
  function parse_solver_output (line 39) | def parse_solver_output(solver_output: str, task: str) -> Union[int, None]:
  function compute_mode_state (line 56) | def compute_mode_state(curr_list: list[int]) -> dict:
  function compute_median_state (line 61) | def compute_median_state(curr_list: list[int]) -> dict:
  function compute_state (line 66) | def compute_state(curr_list: list[int], task) -> dict:

FILE: evals/elsuite/translate.py
  class Translate (line 11) | class Translate(evals.Eval):
    method __init__ (line 12) | def __init__(
    method eval_sample (line 35) | def eval_sample(self, sample: Any, *_):
    method run (line 69) | def run(self, recorder):

FILE: evals/elsuite/twenty_questions/eval.py
  class TwentyQuestions (line 22) | class TwentyQuestions(SolverEval):
    method __init__ (line 23) | def __init__(
    method eval_sample (line 56) | def eval_sample(self, solver: Solver, sample: Dict, rng: random.Random...
    method run (line 78) | def run(self, recorder: Recorder) -> Dict[str, Union[float, int]]:
    method _conversation_loop (line 111) | def _conversation_loop(

FILE: evals/elsuite/twenty_questions/scripts/make_plots.py
  function extract_metrics (line 48) | def extract_metrics(datadir: Path) -> pd.DataFrame:
  function make_plot (line 67) | def make_plot(df: pd.DataFrame, outpath: Path, metric="score", variant="...

FILE: evals/elsuite/twenty_questions/test_utils.py
  function test_format_msg (line 4) | def test_format_msg():
  function test_format_msgs (line 10) | def test_format_msgs():

FILE: evals/elsuite/twenty_questions/utils.py
  function generate_task_state_for (line 12) | def generate_task_state_for(role: Literal["guesser", "gamemaster"], conv...
  function format_msgs (line 29) | def format_msgs(
  function format_msg (line 42) | def format_msg(msg: Message, role: Literal["guesser", "gamemaster"]) -> ...
  function is_system_msg (line 64) | def is_system_msg(m: Message) -> bool:

FILE: evals/elsuite/utils.py
  function get_answer (line 17) | def get_answer(text, answer_prompt, ignore_case=False):
  function get_consensus (line 28) | def get_consensus(answers):
  function normalize (line 36) | def normalize(s: str) -> str:
  function fuzzy_match (line 46) | def fuzzy_match(s1: str, s2: str) -> bool:
  function get_scores_from_text (line 56) | def get_scores_from_text(text: str) -> dict:
  function get_yesno_from_text (line 62) | def get_yesno_from_text(text: str) -> dict:
  function get_letter_from_data (line 68) | def get_letter_from_data(data: str) -> str:
  function f1_score (line 75) | def f1_score(prediction: str, answers: list[str]) -> float:
  function scrub_formatting_from_prompt (line 91) | def scrub_formatting_from_prompt(prompt):
  function format_necessary (line 103) | def format_necessary(template: str, allow_missing: bool = False, **kwarg...
  function format_prompt (line 119) | def format_prompt(
  class PromptFn (line 144) | class PromptFn:
    method __init__ (line 150) | def __init__(
    method __call__ (line 166) | def __call__(self, **kwargs):

FILE: evals/elsuite/utils_test.py
  function test_normalize (line 14) | def test_normalize(s: str, expected: str):
  function test_fuzzy_match (line 32) | def test_fuzzy_match(s1: str, s2: str, expected: bool):

FILE: evals/eval.py
  function _index_samples (line 30) | def _index_samples(samples: List[Any]) -> List[Tuple[Any, int]]:
  function set_max_samples (line 41) | def set_max_samples(max_samples: int):
  class Eval (line 46) | class Eval(abc.ABC):
    method __init__ (line 56) | def __init__(
    method eval_sample (line 77) | def eval_sample(self, sample: Any, rng: random.Random):
    method completion_fn (line 81) | def completion_fn(self) -> CompletionFn:
    method run (line 86) | def run(self, recorder: RecorderBase) -> Dict[str, float]:
    method async_eval_all_samples (line 90) | async def async_eval_all_samples(
    method eval_all_samples (line 112) | def eval_all_samples(
    method get_samples (line 149) | def get_samples(self):
    method _get_samples_path (line 158) | def _get_samples_path(self) -> Path:
    method _prefix_registry_path (line 161) | def _prefix_registry_path(self, data_path: str) -> Path:
  class SolverEval (line 168) | class SolverEval(Eval):
    method __init__ (line 186) | def __init__(self, *args, **kwargs):
    method eval_sample (line 197) | def eval_sample(self, solver: Solver, sample: Any, rng: random.Random)...
    method eval_all_samples (line 200) | def eval_all_samples(

FILE: evals/formatting.py
  function make_abc (line 8) | def make_abc(answers, *, correct_idx=0, shuffle=True, rng: Optional[rand...

FILE: evals/metrics.py
  function get_accuracy (line 12) | def get_accuracy(events: Sequence[Event]) -> float:
  function get_bootstrap_accuracy_std (line 21) | def get_bootstrap_accuracy_std(events: Sequence[Event], num_samples: int...
  function get_confusion_matrix (line 26) | def get_confusion_matrix(
  function compute_matthew_corr (line 43) | def compute_matthew_corr(confusion_matrix: np.ndarray) -> float:
  function compute_precision (line 52) | def compute_precision(confusion_matrix: np.ndarray, idx: int = 0) -> float:
  function compute_recall (line 56) | def compute_recall(confusion_matrix: np.ndarray, idx: int = 0) -> float:
  function compute_f_score (line 60) | def compute_f_score(confusion_matrix: np.ndarray, idx: int = 0, beta: fl...
  function compute_averaged_f_score (line 66) | def compute_averaged_f_score(

FILE: evals/prompt/base.py
  function chat_prompt_to_text_prompt (line 22) | def chat_prompt_to_text_prompt(
  function text_prompt_to_chat_prompt (line 59) | def text_prompt_to_chat_prompt(prompt: str, role: str = "system") -> Ope...
  class Prompt (line 67) | class Prompt(ABC):
    method to_formatted_prompt (line 74) | def to_formatted_prompt(self):
  function is_chat_prompt (line 81) | def is_chat_prompt(prompt: Prompt) -> bool:
  class CompletionPrompt (line 86) | class CompletionPrompt(Prompt):
    method _render_chat_prompt_as_text (line 93) | def _render_chat_prompt_as_text(self, prompt: OpenAICreateChatPrompt) ...
    method to_formatted_prompt (line 96) | def to_formatted_prompt(self) -> str:
  class ChatCompletionPrompt (line 103) | class ChatCompletionPrompt(Prompt):
    method _render_text_as_chat_prompt (line 112) | def _render_text_as_chat_prompt(self, prompt: str) -> OpenAICreateChat...
    method to_formatted_prompt (line 119) | def to_formatted_prompt(self) -> OpenAICreateChatPrompt:

FILE: evals/record.py
  function default_recorder (line 39) | def default_recorder() -> Optional["RecorderBase"]:
  class Event (line 44) | class Event:
  class RecorderBase (line 54) | class RecorderBase:
    method __init__ (line 75) | def __init__(
    method as_default_recorder (line 91) | def as_default_recorder(self, sample_id: str):
    method current_sample_id (line 98) | def current_sample_id(self) -> Optional[str]:
    method pause (line 101) | def pause(self):
    method unpause (line 107) | def unpause(self):
    method is_paused (line 113) | def is_paused(self, sample_id: str = None):
    method get_events (line 119) | def get_events(self, type: str) -> Sequence[Event]:
    method get_metrics (line 123) | def get_metrics(self):
    method get_scores (line 126) | def get_scores(self, key: str):
    method _create_event (line 129) | def _create_event(self, type, data=None, sample_id=None):
    method _flush_events_internal (line 145) | def _flush_events_internal(self, events_to_write: Sequence[Event]):
    method flush_events (line 148) | def flush_events(self):
    method record_event (line 157) | def record_event(self, type, data=None, sample_id=None):
    method record_match (line 187) | def record_match(self, correct: bool, *, expected=None, picked=None, s...
    method record_embedding (line 202) | def record_embedding(self, prompt, embedding_type, sample_id=None, **e...
    method record_sampling (line 210) | def record_sampling(self, prompt, sampled, sample_id=None, **extra):
    method record_function_call (line 218) | def record_function_call(self, name, arguments, return_value, sample_i...
    method record_cond_logp (line 227) | def record_cond_logp(self, prompt, completion, logp, sample_id=None, *...
    method record_pick_option (line 236) | def record_pick_option(self, prompt, options, picked, sample_id=None, ...
    method record_raw (line 245) | def record_raw(self, data):
    method record_metrics (line 248) | def record_metrics(self, **kwargs):
    method record_error (line 251) | def record_error(self, msg: str, error: Exception, **kwargs):
    method record_extra (line 259) | def record_extra(self, data, sample_id=None):
    method record_final_report (line 262) | def record_final_report(self, final_report: Any):
  function _green (line 266) | def _green(str):
  function _red (line 270) | def _red(str):
  class DummyRecorder (line 274) | class DummyRecorder(RecorderBase):
    method __init__ (line 280) | def __init__(self, run_spec: RunSpec, log: bool = True):
    method record_event (line 284) | def record_event(self, type, data, sample_id=None):
  class LocalRecorder (line 316) | class LocalRecorder(RecorderBase):
    method __init__ (line 322) | def __init__(
    method _flush_events_internal (line 346) | def _flush_events_internal(self, events_to_write: Sequence[Event]):
    method record_final_report (line 367) | def record_final_report(self, final_report: Any):
  class HttpRecorder (line 374) | class HttpRecorder(RecorderBase):
    method __init__ (line 375) | def __init__(
    method _flush_events_internal (line 392) | def _flush_events_internal(self, events_to_write: Sequence[Event]):
    method _send_event (line 405) | def _send_event(self, events: List[Event]):
    method record_final_report (line 446) | def record_final_report(self, final_report: Any):
  class Recorder (line 468) | class Recorder(RecorderBase):
    method __init__ (line 474) | def __init__(
    method _flush_events_internal (line 514) | def _flush_events_internal(self, events_to_write: Sequence[Event]):
    method record_final_report (line 561) | def record_final_report(self, final_report: Any):
    method record_event (line 578) | def record_event(self, type, data=None, sample_id=None):
  function current_sample_id (line 589) | def current_sample_id() -> str:
  function record_match (line 593) | def record_match(correct: bool, *, expected=None, picked=None, **extra):
  function record_embedding (line 597) | def record_embedding(prompt, embedding_type, **extra):
  function record_sampling (line 601) | def record_sampling(prompt, sampled, **extra):
  function record_function_call (line 605) | def record_function_call(name, arguments, return_value, **extra):
  function record_cond_logp (line 609) | def record_cond_logp(prompt, completion, logp, **extra):
  function record_pick_option (line 613) | def record_pick_option(prompt, options, picked, **extra):
  function record_raw (line 617) | def record_raw(data):
  function record_metrics (line 621) | def record_metrics(**extra):
  function record_error (line 625) | def record_error(msg: str, error: Exception = None, **extra):
  function record_extra (line 629) | def record_extra(data):
  function record_event (line 633) | def record_event(type, data=None, sample_id=None):
  function pause (line 637) | def pause():
  function unpause (line 641) | def unpause():

FILE: evals/record_test.py
  function test_passes_hidden_data_field_to_jsondumps (line 8) | def test_passes_hidden_data_field_to_jsondumps() -> None:

FILE: evals/registry.py
  function n_ctx_from_model_name (line 37) | def n_ctx_from_model_name(model_name: str) -> Optional[int]:
  function is_chat_model (line 83) | def is_chat_model(model_name: str) -> bool:
  class Registry (line 103) | class Registry:
    method __init__ (line 104) | def __init__(self, registry_paths: Sequence[Union[str, Path]] = DEFAUL...
    method add_registry_paths (line 107) | def add_registry_paths(self, paths: Sequence[Union[str, Path]]) -> None:
    method api_model_ids (line 111) | def api_model_ids(self) -> list[str]:
    method make_completion_fn (line 120) | def make_completion_fn(
    method get_class (line 153) | def get_class(self, spec: EvalSpec) -> Any:
    method _dereference (line 156) | def _dereference(
    method get_modelgraded_spec (line 193) | def get_modelgraded_spec(self, name: str, **kwargs: dict) -> Optional[...
    method get_completion_fn (line 202) | def get_completion_fn(self, name: str) -> Optional[CompletionFnSpec]:
    method get_solver (line 207) | def get_solver(self, name: str) -> Optional[CompletionFnSpec]:
    method get_eval (line 210) | def get_eval(self, name: str) -> Optional[EvalSpec]:
    method get_eval_set (line 213) | def get_eval_set(self, name: str) -> Optional[EvalSetSpec]:
    method get_evals (line 216) | def get_evals(self, patterns: Sequence[str]) -> Iterator[Optional[Eval...
    method get_base_evals (line 229) | def get_base_evals(self) -> list[Optional[BaseEvalSpec]]:
    method get_base_eval (line 236) | def get_base_eval(self, name: str) -> Optional[BaseEvalSpec]:
    method _load_file (line 251) | def _load_file(self, path: Path) -> Generator[Tuple[str, Path, dict], ...
    method _load_directory (line 262) | def _load_directory(self, path: Path) -> Generator[Tuple[str, Path, di...
    method _load_resources (line 267) | def _load_resources(
    method _validate_reserved_keywords (line 280) | def _validate_reserved_keywords(spec: dict, name: str, path: Path) -> ...
    method _load_registry (line 287) | def _load_registry(self, registry_paths: Sequence[Path], resource_type...
    method _completion_fns (line 313) | def _completion_fns(self) -> RawRegistry:
    method _solvers (line 317) | def _solvers(self) -> RawRegistry:
    method _eval_sets (line 321) | def _eval_sets(self) -> RawRegistry:
    method _evals (line 325) | def _evals(self) -> RawRegistry:
    method _modelgraded_specs (line 329) | def _modelgraded_specs(self) -> RawRegistry:

FILE: evals/registry/data/german-part-of-speech/buildDataDe.py
  function generate_combinations (line 59) | def generate_combinations(words):

FILE: evals/registry/data/hr_ml_agent_bench/parkinsons_disease/dataset/public_timeseries_testing_util.py
  class MockApi (line 13) | class MockApi:
    method __init__ (line 14) | def __init__(self):
    method iter_test (line 36) | def iter_test(self) -> Tuple[pd.DataFrame]:
    method predict (line 77) | def predict(self, user_predictions: pd.DataFrame):
  function make_env (line 92) | def make_env():

FILE: evals/registry/data/mapping_to_matricies/data_generator.py
  function generate_binary_array_and_factors (line 4) | def generate_binary_array_and_factors(N):
  function generate_one_sample_json_string (line 13) | def generate_one_sample_json_string(binary_array_str, dimensions_str, an...
  function write_lines_to_file (line 26) | def write_lines_to_file(min_array_len, max_array_len, filename, max_line...

FILE: evals/registry/data/mazes/nxn_maze_eval_generator.py
  function recursive_backtracker (line 66) | def recursive_backtracker(maze: np.ndarray, pos: Tuple[int, int]) -> None:
  function generate_maze (line 85) | def generate_maze(width: int, height: int) -> np.ndarray:
  function random_outer_pos (line 103) | def random_outer_pos(maze: np.ndarray) -> Tuple[int, int]:
  function generate_start_end (line 123) | def generate_start_end(maze: np.ndarray) -> Tuple[Tuple[int, int], Tuple...
  function build_graph (line 140) | def build_graph(maze: np.ndarray) -> nx.Graph:
  function generate_example_files (line 162) | def generate_example_files(
  function create_move_line (line 251) | def create_move_line(
  function plot_maze (line 327) | def plot_maze(maze: np.ndarray, show=False, save_img=False) -> None:

FILE: evals/registry/data/medmcqa/convert.js
  method transform (line 22) | transform(line, _, done) {

FILE: evals/registry/data/nfl-point-combinations/combinations_generator.py
  function ways_to_score (line 11) | def ways_to_score(n):

FILE: evals/registry/data/points_on_line/eval_generator.py
  function tuple_to_string (line 12) | def tuple_to_string(float_tuple: tuple, n_decimals: int) -> str:
  function comp_float_mul (line 18) | def comp_float_mul(vector: tuple, n_decimals: int, factor: float = 0.1) ...
  function random_divisible_line (line 43) | def random_divisible_line(comp_min: int, comp_max: int, n_decimals: int)...
  function construct_messages (line 92) | def construct_messages(start: tuple, end: tuple) -> list[dict]:
  function assemble_test_format (line 102) | def assemble_test_format(n_samples: int) -> list[dict]:

FILE: evals/registry/data/poker_analysis/poker_analysis_sample_generator.py
  function randomize_num_players_and_community_cards (line 8) | def randomize_num_players_and_community_cards() -> tuple:
  function generate_hands (line 19) | def generate_hands(num_players: int, num_community_cards: int) -> tuple:
  function calculate_probabilities (line 42) | def calculate_probabilities(hole_cards_list: list, community_cards: list...
  function generate_example (line 89) | def generate_example(num_players: int = None, num_community_cards: int =...
  function format_example (line 116) | def format_example(hole_cards: list, community_cards: list, winning_play...
  function save_examples_to_json_file (line 147) | def save_examples_to_json_file(

FILE: evals/registry/data/simple_physics_engine/samples_generator.py
  function create_row (line 46) | def create_row(initial_state, ideal_state):
  function format_wave_as_string (line 60) | def format_wave_as_string(wave):
  function generate_samples (line 69) | def generate_samples():

FILE: evals/registry/data/simple_physics_engine/solver.py
  function solve_diagram (line 17) | def solve_diagram(diagram):

FILE: evals/registry/data/simple_physics_engine/wave_function_collapse.py
  class ContradictionException (line 9) | class ContradictionException(Exception):
  function get_rules (line 31) | def get_rules():
  function init_possibilities (line 86) | def init_possibilities():
  function create_wave_array (line 95) | def create_wave_array(height, width):
  function get_final_state (line 126) | def get_final_state(tile) -> str:
  function calculate_entropy (line 136) | def calculate_entropy(tile):
  function collapse_tile (line 143) | def collapse_tile(tile):
  function find_lowest_entropy_tile (line 155) | def find_lowest_entropy_tile(wave):
  function get_above_tile (line 182) | def get_above_tile(wave, i, j):
  function get_below_tile (line 186) | def get_below_tile(wave, i, j):
  function get_left_tile (line 190) | def get_left_tile(wave, i, j):
  function get_right_tile (line 194) | def get_right_tile(wave, i, j):
  function place_ball (line 198) | def place_ball(wave):
  function generate_collapsed_wave (line 204) | def generate_collapsed_wave(height, width):
  function get_valid_directions (line 222) | def get_valid_directions(wave, coords):
  function get_possible_neighbors_in_direction (line 236) | def get_possible_neighbors_in_direction(tile, direction):
  function propagate (line 246) | def propagate(wave, coords):
  function print_wave (line 272) | def print_wave(wave):

FILE: evals/registry/data/solve-for-variable/tools/main.py
  class Template (line 9) | class Template:
    method load (line 10) | def load(self, fname):
    method write (line 14) | def write(self, fname):
    class Writer (line 17) | class Writer:
      method __init__ (line 18) | def __init__(self, fname, template):
      method __enter__ (line 24) | def __enter__(self):
      method __exit__ (line 28) | def __exit__(self, *args):
      method add_instance (line 31) | def add_instance(self, question, answers, correct):
  function main (line 64) | def main():

FILE: evals/registry/data/solve-for-variable/tools/problem.py
  class MistakesGenerator (line 10) | class MistakesGenerator:
    method generate (line 21) | def generate(self, location):
  class ProblemGenerator (line 61) | class ProblemGenerator:
    method __init__ (line 66) | def __init__(self):
    method _generate (line 70) | def _generate(self):
    method generate (line 116) | def generate(self):
  function main (line 176) | def main():

FILE: evals/registry/data/solve-for-variable/tools/solve.py
  class Num (line 6) | class Num:
    method __init__ (line 11) | def __init__(self, value):
    method clone (line 14) | def clone(self):
    method __str__ (line 17) | def __str__(self):
    method tree (line 20) | def tree(self):
  class Var (line 24) | class Var:
    method __init__ (line 29) | def __init__(self, name):
    method clone (line 32) | def clone(self):
    method var_location (line 35) | def var_location(self, name):
    method __str__ (line 42) | def __str__(self):
    method tree (line 45) | def tree(self):
  class Prio (line 49) | class Prio:
  class Equation (line 59) | class Equation:
    method __init__ (line 65) | def __init__(self, left, right):
    method clone (line 72) | def clone(self):
    method solve (line 81) | def solve(self, location, mistake=None):
    method __str__ (line 168) | def __str__(self):
    method tree (line 171) | def tree(self):
  class Expression (line 178) | class Expression:
    method __init__ (line 183) | def __init__(self, prio, op, *args):
    method clone (line 197) | def clone(self):
    method var_location (line 203) | def var_location(self, name, past=[]):
    method _simplify_minus (line 220) | def _simplify_minus(self):
    method _simplify_in_situ (line 247) | def _simplify_in_situ(self):
    method __str__ (line 276) | def __str__(self):
    method tree (line 282) | def tree(self):
    method _to_string (line 291) | def _to_string(self, pos, parent_prio, parent_assoc):
    method _arg_string (line 317) | def _arg_string(self, pos):
  class EquationGenerator (line 326) | class EquationGenerator:
    method generate (line 329) | def generate(self):
    method _unary (line 382) | def _unary(self, expr):
    method _binary (line 392) | def _binary(self, expr, prio, op):
    method _replace (line 406) | def _replace(self, e, placeholder, klass, choices, i=None):
    method _term (line 427) | def _term(self):
    method _make_var (line 437) | def _make_var(self):
    method _make_num (line 444) | def _make_num(self):

FILE: evals/registry/data/solve-for-variable/tools/tester.py
  class Vars (line 9) | class Vars:
    method __init__ (line 14) | def __init__(self, module):
    method _get_vars (line 27) | def _get_vars(self, tree):
  class ValueGenerator (line 31) | class ValueGenerator:
    method generate (line 41) | def generate(self, rhs_variables):
    method _gen (line 44) | def _gen(self, variables, values):
  class Code (line 61) | class Code:
    method __init__ (line 66) | def __init__(self, expr):
    method _compile (line 70) | def _compile(self, expr):
  class Evaluator (line 74) | class Evaluator:
    method __init__ (line 84) | def __init__(self, eq, answers):
    method _variables (line 97) | def _variables(self):
    method test (line 120) | def test(self):

FILE: evals/registry/data/unsolvable_questions/convert.js
  method transform (line 19) | transform(line, _, done) {

FILE: evals/registry/data/unsolvable_questions/findFailures.js
  method transform (line 19) | transform(line, _, done) {

FILE: evals/registry/data/word_association/corpus_tools/corpus.py
  class Corpus (line 12) | class Corpus(ABC):
    method __init__ (line 18) | def __init__(self, name: str) -> None:
    method _get_corpus (line 23) | def _get_corpus(self) -> List[str]:
    method get_frequency_distribution (line 27) | def get_frequency_distribution(self) -> Dict[str, int]:
    method get_pos_tagged_words (line 31) | def get_pos_tagged_words(self) -> List[Tuple[str, str]]:
    method __len__ (line 35) | def __len__(self) -> int:
    method __getitem__ (line 39) | def __getitem__(self, index: int) -> str:
    method __setitem__ (line 43) | def __setitem__(self, index: int, value: str) -> None:
    method __delitem__ (line 47) | def __delitem__(self, index: int) -> None:
    method __iter__ (line 51) | def __iter__(self) -> Iterator[str]:
    method __contains__ (line 55) | def __contains__(self, word: str) -> bool:
    method __repr__ (line 59) | def __repr__(self) -> str:
  class NltkCorpus (line 64) | class NltkCorpus(Corpus):
    method __init__ (line 73) | def __init__(self, nltk_corpus: str) -> None:
    method _get_corpus (line 81) | def _get_corpus(self) -> List[str]:
    method get_frequency_distribution (line 86) | def get_frequency_distribution(self) -> nltk.FreqDist:
    method get_pos_tagged_words (line 92) | def get_pos_tagged_words(self) -> List[Tuple[str, str]]:

FILE: evals/registry/data/word_association/corpus_tools/pipelines.py
  class CorpusPipeline (line 6) | class CorpusPipeline:
    method __init__ (line 7) | def __init__(self, corpus: Corpus) -> None:
    method add_operation (line 11) | def add_operation(self, operation: Callable[Corpus, ...]) -> "CorpusPi...
    method run (line 16) | def run(self) -> Corpus:

FILE: evals/registry/data/word_association/corpus_tools/processor.py
  class WordCollectionProcessor (line 20) | class WordCollectionProcessor:
    method __init__ (line 29) | def __init__(self, words: Union[Corpus, RelatedWords]) -> None:
    method parts_of_speech_filter (line 32) | def parts_of_speech_filter(self, parts_of_speech: List[str]) -> None:
    method frequency_filter (line 44) | def frequency_filter(
    method char_length_filter (line 63) | def char_length_filter(self, length_bounds: LengthBounds) -> None:
    method sub_word_filter (line 73) | def sub_word_filter(self, subword: str) -> None:
    method str_max_word_count_filter (line 82) | def str_max_word_count_filter(self, max_num_words: int = 1) -> None:
    method __iter__ (line 92) | def __iter__(self) -> Iterator[str]:
    method __len__ (line 96) | def __len__(self) -> int:
    method __getitem__ (line 100) | def __getitem__(self, index: int) -> str:

FILE: evals/registry/data/word_association/corpus_tools/related_words.py
  class RelatedWords (line 17) | class RelatedWords(ABC):
    method __init__ (line 23) | def __init__(self, word: str, **kwargs: Optional[Union[str, int]]) -> ...
    method _get_related_words (line 30) | def _get_related_words(self) -> List[Dict[str, Any]]:
    method get_pos_tagged_words (line 39) | def get_pos_tagged_words(self) -> List[Tuple[str, str]]:
    method __repr__ (line 47) | def __repr__(self) -> str:
    method __len__ (line 54) | def __len__(self) -> int:
    method __getitem__ (line 63) | def __getitem__(self, index: int) -> str:
    method __contains__ (line 69) | def __contains__(self, item: str) -> bool:
    method __iter__ (line 75) | def __iter__(self) -> Generator[str, None, None]:
  class DataMuseRelatedWords (line 83) | class DataMuseRelatedWords(RelatedWords):
    method __init__ (line 125) | def __init__(
    method get_pos_tagged_words (line 134) | def get_pos_tagged_words(self) -> List[Tuple[str, str]]:
    method get_metadata (line 148) | def get_metadata(self, word: str) -> Dict[str, Union[str, int, List[st...
    method _get_related_words (line 166) | def _get_related_words(self) -> List[Dict[str, str]]:
  class GPTGeneratedRelatedWords (line 182) | class GPTGeneratedRelatedWords(RelatedWords):
    method _get_related_words (line 185) | def _get_related_words(self) -> List[Dict[str, Any]]:

FILE: evals/registry/data/word_association/corpus_tools/sample_generators.py
  class IncludesEvalTemplate (line 12) | class IncludesEvalTemplate:
    method create_sample (line 15) | def create_sample(
    method export_to_jsonl (line 33) | def export_to_jsonl(self, filename: str = "samples.jsonl") -> None:
  function generate_additional_choices (line 39) | def generate_additional_choices(
  function generate_word_association_system_message (line 76) | def generate_word_association_system_message(
  function generate_word_association_user_message (line 106) | def generate_word_association_user_message(
  function taboo_clue_guesser_system_message (line 119) | def taboo_clue_guesser_system_message() -> None:
  function taboo_clue_giver_system_message (line 126) | def taboo_clue_giver_system_message() -> None:
  function main (line 134) | def main(

FILE: evals/registry/data/word_association/corpus_tools/validators.py
  class Embedding (line 32) | class Embedding(NamedTuple):
  class RelatedWordsPair (line 39) | class RelatedWordsPair(NamedTuple):
  class EmbeddingPair (line 46) | class EmbeddingPair(NamedTuple):
  class SimilarityTuple (line 53) | class SimilarityTuple(NamedTuple):
  class QualityValidator (line 61) | class QualityValidator(ABC):
    method __init__ (line 64) | def __init__(self, target_score: int) -> None:
    method validate (line 68) | def validate(self, related_words_pair: List[RelatedWordsPair]) -> List...
  class EmbeddingsValidator (line 72) | class EmbeddingsValidator(QualityValidator):
    method validate (line 77) | def validate(
    method calculate_cosine_similarity (line 125) | def calculate_cosine_similarity(vec1: List[float], vec2: List[float]) ...
    method calculate_euclidean_distance (line 143) | def calculate_euclidean_distance(vec1: List[float], vec2: List[float])...
    method get_embeddings (line 162) | def get_embeddings(
  class GPTValidator (line 185) | class GPTValidator(QualityValidator):
    method __init__ (line 188) | def __init__(
    method validate (line 203) | def validate(self, related_words_pairs: List[RelatedWordsPair]) -> Lis...
    method get_chat_completion (line 223) | def get_chat_completion(
    method extract_score (line 261) | def extract_score(response_content: str) -> float:
    method set_model (line 282) | def set_model(self, model: str) -> None:

FILE: evals/registry_test.py
  function test_n_ctx_from_model_name (line 4) | def test_n_ctx_from_model_name():
  function test_is_chat_model (line 21) | def test_is_chat_model():

FILE: evals/solvers/human_cli_solver.py
  class HumanCliSolver (line 8) | class HumanCliSolver(Solver):
    method __init__ (line 15) | def __init__(
    method _solve (line 29) | def _solve(self, task_state: TaskState, **kwargs) -> SolverResult:
    method name (line 47) | def name(self) -> str:

FILE: evals/solvers/memory.py
  class Interaction (line 8) | class Interaction:
  class PersistentMemoryCache (line 16) | class PersistentMemoryCache:
    method __init__ (line 17) | def __init__(
    method save_private_interaction (line 24) | def save_private_interaction(self, task_state: TaskState):
    method load_private_interaction (line 39) | def load_private_interaction(self, task_state: TaskState) -> List[Mess...

FILE: evals/solvers/nested/cot_solver.py
  class CoTSolver (line 9) | class CoTSolver(NestedSolver):
    method __init__ (line 10) | def __init__(
    method cot_solver (line 33) | def cot_solver(self) -> Solver:
    method extract_solver (line 37) | def extract_solver(self) -> Solver:
    method cot_template (line 40) | def cot_template(self, task_state: TaskState) -> str:
    method extract_template (line 45) | def extract_template(self, task_state: TaskState) -> str:
    method _solve (line 50) | def _solve(
    method name (line 84) | def name(self) -> str:

FILE: evals/solvers/nested/fewshot_solver.py
  class FewShotSolver (line 9) | class FewShotSolver(NestedSolver):
    method __init__ (line 10) | def __init__(
    method base_solver (line 72) | def base_solver(self) -> Solver:
    method _solve (line 75) | def _solve(
    method _modify_task_state (line 83) | def _modify_task_state(self, task_state: TaskState) -> TaskState:
    method name (line 109) | def name(self) -> str:

FILE: evals/solvers/nested/hhh_solver.py
  class HHHSolver (line 8) | class HHHSolver(NestedSolver):
    method __init__ (line 16) | def __init__(
    method solver (line 25) | def solver(self) -> Solver:
    method _solve (line 28) | def _solve(
    method name (line 46) | def name(self) -> str:

FILE: evals/solvers/nested/self_consistency_solver.py
  class SelfConsistencySolver (line 16) | class SelfConsistencySolver(NestedSolver):
    method __init__ (line 24) | def __init__(
    method solver (line 55) | def solver(self) -> Solver:
    method judge_solver (line 59) | def judge_solver(self) -> Solver:
    method _solve (line 62) | def _solve(
    method _extract_answer (line 136) | def _extract_answer(self, raw_result: SolverResult) -> str:
    method name (line 149) | def name(self) -> str:

FILE: evals/solvers/postprocessors/base.py
  class PostProcessor (line 6) | class PostProcessor(ABC):
    method __call__ (line 13) | def __call__(self, result: SolverResult, *args, **kwargs) -> SolverRes...

FILE: evals/solvers/postprocessors/postprocessors.py
  class Strip (line 5) | class Strip(PostProcessor):
    method __call__ (line 10) | def __call__(self, result: SolverResult) -> SolverResult:
  class RemoveQuotes (line 17) | class RemoveQuotes(PostProcessor):
    method __call__ (line 26) | def __call__(self, result: SolverResult) -> SolverResult:
  class RemovePeriod (line 35) | class RemovePeriod(PostProcessor):
    method __call__ (line 41) | def __call__(self, result: SolverResult) -> SolverResult:

FILE: evals/solvers/postprocessors/postprocessors_test.py
  function test_strip (line 5) | def test_strip():
  function test_remove_quotes (line 16) | def test_remove_quotes():
  function test_remove_period (line 35) | def test_remove_period():
  function test_combination (line 50) | def test_combination():

FILE: evals/solvers/prompts/hhh_test.py
  function test_render_messages (line 94) | def test_render_messages() -> None:

FILE: evals/solvers/providers/anthropic/anthropic_solver.py
  class AnthropicSolver (line 25) | class AnthropicSolver(Solver):
    method __init__ (line 30) | def __init__(
    method _solve (line 44) | def _solve(self, task_state: TaskState, **kwargs) -> SolverResult:
    method name (line 77) | def name(self) -> str:
    method model_version (line 81) | def model_version(self) -> Union[str, dict]:
    method _convert_msgs_to_anthropic_format (line 89) | def _convert_msgs_to_anthropic_format(msgs: list[Message]) -> list[Mes...
  function anthropic_create_retrying (line 119) | def anthropic_create_retrying(client: Anthropic, *args, **kwargs):
  function anth_to_openai_usage (line 132) | def anth_to_openai_usage(anth_usage: Usage) -> dict:

FILE: evals/solvers/providers/anthropic/anthropic_solver_test.py
  function anthropic_solver (line 18) | def anthropic_solver():
  function dummy_recorder (line 26) | def dummy_recorder():
  function test_solver (line 38) | def test_solver(dummy_recorder, anthropic_solver):
  function test_message_format (line 63) | def test_message_format():
  function test_anth_to_openai_usage_correctness (line 110) | def test_anth_to_openai_usage_correctness():
  function test_anth_to_openai_usage_zero_tokens (line 122) | def test_anth_to_openai_usage_zero_tokens():

FILE: evals/solvers/providers/google/gemini_solver.py
  class GoogleMessage (line 47) | class GoogleMessage:
    method to_dict (line 51) | def to_dict(self):
    method from_evals_message (line 55) | def from_evals_message(msg: Message):
  class GeminiSolver (line 70) | class GeminiSolver(Solver):
    method __init__ (line 75) | def __init__(
    method model (line 92) | def model(self) -> str:
    method _solve (line 95) | def _solve(
    method _convert_msgs_to_google_format (line 158) | def _convert_msgs_to_google_format(msgs: list[Message]) -> list[Google...
    method name (line 191) | def name(self) -> str:
    method model_version (line 195) | def model_version(self) -> Union[str, dict]:
    method __deepcopy__ (line 198) | def __deepcopy__(self, memo):

FILE: evals/solvers/providers/google/gemini_solver_test.py
  function dummy_recorder (line 14) | def dummy_recorder():
  function gemini_solver (line 21) | def gemini_solver():
  function test_solver (line 30) | def test_solver(dummy_recorder, gemini_solver):
  function test_message_format (line 51) | def test_message_format():

FILE: evals/solvers/providers/openai/openai_assistants_solver.py
  class OpenAIAssistantsSolver (line 28) | class OpenAIAssistantsSolver(Solver):
    method __init__ (line 54) | def __init__(
    method _run_assistant_retrying (line 95) | def _run_assistant_retrying(self, task_state: TaskState):
    method _solve (line 109) | def _solve(
    method copy (line 192) | def copy(self):
    method _create_file (line 204) | def _create_file(self, file_path: str) -> str:
    method _create_files (line 223) | def _create_files(self, file_paths: list[str]) -> list[str]:
    method _get_last_assistant_message_idx (line 231) | def _get_last_assistant_message_idx(self, messages: list[Message]) -> ...
    method _convert_to_user_message (line 238) | def _convert_to_user_message(self, message: Message) -> Message:
    method _wait_on_run (line 248) | def _wait_on_run(self, run: Run, thread: Thread) -> Run:
    method name (line 262) | def name(self) -> str:
    method model_version (line 266) | def model_version(self) -> Union[str, dict]:

FILE: evals/solvers/providers/openai/openai_assistants_solver_test.py
  function dummy_data_file (line 21) | def dummy_data_file(scope="session"):
  function dummy_recorder (line 35) | def dummy_recorder():
  function vanilla_solver (line 42) | def vanilla_solver():
  function code_interpreter_solver (line 50) | def code_interpreter_solver():
  function retrieval_solver (line 59) | def retrieval_solver():
  function test_solver_copying (line 68) | def test_solver_copying(dummy_recorder, vanilla_solver):
  function test_multiturn_conversation (line 84) | def test_multiturn_conversation(dummy_recorder, vanilla_solver):
  function test_code_interpreter (line 107) | def test_code_interpreter(dummy_recorder, code_interpreter_solver):
  function test_task_description (line 126) | def test_task_description(dummy_recorder, vanilla_solver):
  function test_code_interpreter_file (line 145) | def test_code_interpreter_file(dummy_recorder, dummy_data_file, code_int...
  function test_retrieval_file (line 172) | def test_retrieval_file(dummy_recorder, dummy_data_file, retrieval_solver):
  function test_file_cache (line 206) | def test_file_cache(dummy_recorder, dummy_data_file, retrieval_solver):

FILE: evals/solvers/providers/openai/openai_solver.py
  class OpenAISolver (line 22) | class OpenAISolver(Solver):
    method __init__ (line 31) | def __init__(
    method model (line 65) | def model(self) -> str:
    method name (line 73) | def name(self) -> str:
    method model_version (line 77) | def model_version(self) -> Union[str, dict]:
    method _is_chat_model (line 87) | def _is_chat_model(self, model: str) -> bool:
    method _completion_exception (line 96) | def _completion_exception(self) -> Exception:
    method _api_base (line 104) | def _api_base(self) -> Optional[str]:
    method _api_key (line 110) | def _api_key(self) -> Optional[str]:
    method _solve (line 115) | def _solve(self, task_state: TaskState, **kwargs) -> SolverResult:
    method _perform_prechecks (line 159) | def _perform_prechecks(self, msgs: list[dict[str, str]]) -> Optional[S...
    method _process_msgs (line 181) | def _process_msgs(self, raw_msgs: list[dict[str, str]]) -> list[dict[s...
    method _handle_completion_exception (line 189) | def _handle_completion_exception(self, e: Exception) -> SolverResult:
    method _render_completion_prompt (line 222) | def _render_completion_prompt(self, msgs: list[dict[str, str]]) -> str:
    method _parse_completion_response (line 235) | def _parse_completion_response(self, raw_response: str) -> str:
    method _get_msg_separators (line 249) | def _get_msg_separators(self) -> list[str]:
    method _get_completion_fn_cls (line 256) | def _get_completion_fn_cls(self, model: str) -> Any:
    method _preprocess_completion_fn_options (line 274) | def _preprocess_completion_fn_options(self) -> dict:
    method _make_logit_bias (line 287) | def _make_logit_bias(self, valid_answers: list[str], model: str) -> di...

FILE: evals/solvers/providers/together/together_solver.py
  function is_chat_model (line 12) | def is_chat_model(model: str) -> bool:
  class TogetherSolver (line 27) | class TogetherSolver(OpenAISolver):
    method __init__ (line 45) | def __init__(self, merge_adjacent_msgs: bool = False, **kwargs):
    method _api_base (line 52) | def _api_base(self) -> Optional[str]:
    method _api_key (line 57) | def _api_key(self) -> Optional[str]:
    method _completion_exception (line 62) | def _completion_exception(self) -> Exception:
    method _is_chat_model (line 69) | def _is_chat_model(self, model: str) -> bool:
    method _preprocess_completion_fn_options (line 76) | def _preprocess_completion_fn_options(self) -> dict:
    method _perform_prechecks (line 83) | def _perform_prechecks(self, msgs: list[dict[str, str]]) -> Optional[S...
    method _process_msgs (line 91) | def _process_msgs(self, msgs: list[dict[str, str]]) -> list[dict[str, ...
    method _handle_completion_exception (line 130) | def _handle_completion_exception(self, e: Exception) -> SolverResult:

FILE: evals/solvers/providers/together/together_solver_test.py
  function llama_solver (line 7) | def llama_solver():
  function llama_solver_merge (line 17) | def llama_solver_merge():
  function test_single_system_msg (line 27) | def test_single_system_msg(llama_solver):
  function test_system_assistant_msgs (line 37) | def test_system_assistant_msgs(llama_solver):
  function test_system_user_msg (line 49) | def test_system_user_msg(llama_solver):
  function test_final_system_msg (line 61) | def test_final_system_msg(llama_solver):
  function test_combined (line 75) | def test_combined(llama_solver):
  function test_merge (line 89) | def test_merge(llama_solver_merge):
  function test_advanced_merge (line 102) | def test_advanced_merge(llama_solver_merge):

FILE: evals/solvers/solver.py
  class SolverResult (line 18) | class SolverResult:
    method __init__ (line 19) | def __init__(self, output: str, **metadata):
    method output (line 24) | def output(self) -> str:
    method metadata (line 28) | def metadata(self) -> dict:
    method to_json (line 31) | def to_json(self) -> str:
  class Solver (line 41) | class Solver(ABC, CompletionFn):
    method __init__ (line 44) | def __init__(
    method _solve (line 60) | def _solve(
    method __call__ (line 76) | def __call__(
    method name (line 100) | def name(self) -> str:
    method model_version (line 111) | def model_version(self) -> Union[str, dict]:
    method copy (line 122) | def copy(self: SolverType) -> SolverType:
  class DummySolver (line 128) | class DummySolver(Solver):
    method _solve (line 129) | def _solve(
  class NestedSolver (line 137) | class NestedSolver(Solver):
    method __init__ (line 142) | def __init__(self, *, postprocessors: list[str] = [], registry=None, *...
    method get_solver (line 160) | def get_solver(self, solver_name: str) -> Solver:
    method _create_solver (line 188) | def _create_solver(self, solver_spec: SolverSpec) -> Solver:
    method copy (line 191) | def copy(self: SolverType) -> SolverType:
    method model_version (line 200) | def model_version(self) -> Union[str, dict]:
  function create_solver (line 212) | def create_solver(solver_spec: dict) -> Solver:

FILE: evals/solvers/solver_test.py
  class EchoSolver (line 8) | class EchoSolver(Solver):
    method _solve (line 13) | def _solve(
  function dummy_recorder (line 22) | def dummy_recorder():
  function test_echo_solver (line 28) | def test_echo_solver(dummy_recorder):
  function test_echo_solver_with_postprocessors (line 36) | def test_echo_solver_with_postprocessors(dummy_recorder):

FILE: evals/solvers/utils.py
  function maybe_wrap_with_compl_fn (line 10) | def maybe_wrap_with_compl_fn(ambiguous_executor: Union[CompletionFn, Sol...
  function maybe_wrap_with_solver (line 28) | def maybe_wrap_with_solver(ambiguous_executor: Union[Solver, CompletionF...

FILE: evals/task_state.py
  class Message (line 6) | class Message:
    method to_dict (line 19) | def to_dict(self):
  class TaskState (line 24) | class TaskState:

FILE: evals/utils/api_utils.py
  function create_retrying (line 15) | def create_retrying(func: callable, retry_exceptions: tuple[Exception], ...

FILE: evals/utils/log_utils.py
  function get_final_results_from_dir (line 6) | def get_final_results_from_dir(log_dir: Union[str, Path]) -> dict[Path, ...
  function get_specs_from_dir (line 17) | def get_specs_from_dir(log_dir: Union[str, Path]) -> dict[Path, dict]:
  function extract_final_results (line 28) | def extract_final_results(path: Path) -> dict:
  function extract_individual_results (line 45) | def extract_individual_results(path: Path, type_string: str = "metrics")...
  function extract_spec (line 64) | def extract_spec(path: Path) -> dict:

FILE: evals/utils/misc.py
  function t (line 9) | def t(duration: float) -> str:
  function make_object (line 20) | def make_object(object_ref: str, *args: Any, **kwargs: Any) -> Any:

FILE: evals/utils/snowflake.py
  function _first_not_none (line 14) | def _first_not_none(*args):
  class SnowflakeError (line 21) | class SnowflakeError(Exception):
  class SnowflakeConnection (line 25) | class SnowflakeConnection:
    method __init__ (line 26) | def __init__(
    method _ensure_connected (line 60) | def _ensure_connected(self):
    method cursor (line 91) | def cursor(self, *args, **kwargs):
    method __call__ (line 97) | def __call__(self, *args, **kwargs):
    method query (line 104) | def query(self, *args, many=False, pandas_out=False, list_out=False, *...
    method robust_query (line 115) | def robust_query(self, max_trials: Optional[int] = None, *args, **kwar...

FILE: evals/utils/test.py
  class TestCompletionResult (line 7) | class TestCompletionResult(CompletionResult):
    method __init__ (line 11) | def __init__(self, completion: str):
    method get_completions (line 14) | def get_completions(self) -> list[str]:
  class TestCompletionFn (line 18) | class TestCompletionFn(CompletionFn):
    method __init__ (line 22) | def __init__(self, completion: str):
    method __call__ (line 25) | def __call__(

FILE: scripts/battle_generator.py
  function format (line 13) | def format(template: str, **kwargs: str) -> str:

FILE: scripts/modelgraded_generator.py
  function format (line 9) | def format(template: str, **kwargs: str) -> str:

FILE: scripts/pattern_identification_generator.py
  function generate_example (line 23) | def generate_example() -> tuple[str, list[str], Literal["foo", "bar"]]:
  function generate_exemplars_str (line 32) | def generate_exemplars_str(num_exemplars: int = 8) -> str:
  function generate_eval_examples (line 42) | def generate_eval_examples(

FILE: tests/unit/evals/test_metrics.py
  function test_get_accuracy (line 19) | def test_get_accuracy(