SYMBOL INDEX (4474 symbols across 720 files)

FILE: examples/transformer-lens.py
  function evaluate_lm_eval (line 12) | def evaluate_lm_eval(lens_model: HookedTransformer, tasks: list[str], **...

FILE: lm_eval/__init__.py
  function __getattr__ (line 17) | def __getattr__(name):

FILE: lm_eval/__main__.py
  function cli_evaluate (line 5) | def cli_evaluate() -> None:

FILE: lm_eval/_cli/harness.py
  class HarnessCLI (line 10) | class HarnessCLI:
    method __init__ (line 13) | def __init__(self):
    method parse_args (line 46) | def parse_args(self) -> argparse.Namespace:
    method execute (line 58) | def execute(self, args: argparse.Namespace) -> None:

FILE: lm_eval/_cli/ls.py
  class List (line 7) | class List(SubCommand):
    method __init__ (line 10) | def __init__(self, subparsers: argparse._SubParsersAction, *args, **kw...
    method _add_args (line 51) | def _add_args(self) -> None:
    method _execute (line 66) | def _execute(self, args: argparse.Namespace) -> None:

FILE: lm_eval/_cli/run.py
  class Run (line 18) | class Run(SubCommand):
    method __init__ (line 21) | def __init__(self, subparsers: argparse._SubParsersAction, *args, **kw...
    method _add_args (line 49) | def _add_args(self) -> None:
    method _execute (line 338) | def _execute(args: argparse.Namespace) -> None:

FILE: lm_eval/_cli/subcommand.py
  class SubCommand (line 5) | class SubCommand(ABC):
    method __init__ (line 8) | def __init__(self, *args, **kwargs):
    method create (line 12) | def create(cls, subparsers: argparse._SubParsersAction):
    method _add_args (line 17) | def _add_args(self) -> None:

FILE: lm_eval/_cli/utils.py
  function try_parse_json (line 12) | def try_parse_json(value: str | dict[str, Any] | None) -> str | dict[str...
  function _int_or_none_list_arg_type (line 28) | def _int_or_none_list_arg_type(
  function request_caching_arg_to_dict (line 66) | def request_caching_arg_to_dict(cache_requests: str | None) -> dict[str,...
  function check_argument_types (line 81) | def check_argument_types(parser: argparse.ArgumentParser) -> None:
  function handle_cli_value_string (line 95) | def handle_cli_value_string(arg: str) -> bool | int | float | str:
  function key_val_to_dict (line 111) | def key_val_to_dict(args: str) -> dict[str, Any]:
  class MergeDictAction (line 125) | class MergeDictAction(argparse.Action):
    method __call__ (line 128) | def __call__(
  class SplitArgs (line 159) | class SplitArgs(argparse.Action):
    method __call__ (line 160) | def __call__(self, parser, namespace, values, option_string=None):

FILE: lm_eval/_cli/validate.py
  class Validate (line 8) | class Validate(SubCommand):
    method __init__ (line 11) | def __init__(self, subparsers: argparse._SubParsersAction, *args, **kw...
    method _add_args (line 78) | def _add_args(self) -> None:
    method _execute (line 95) | def _execute(self, args: argparse.Namespace) -> None:

FILE: lm_eval/api/filter.py
  class Filter (line 8) | class Filter(ABC):
    method __init__ (line 17) | def __init__(self, **kwargs) -> None:
    method apply (line 23) | def apply(self, resps: Union[List, Iterable], docs: List[dict]) -> Ite...
  class FilterEnsemble (line 34) | class FilterEnsemble:
    method apply (line 45) | def apply(self, instances: List[Instance]) -> None:

FILE: lm_eval/api/group.py
  class Group (line 34) | class Group:
    method add (line 61) | def add(self, item: Task | Group) -> None:
    method pop (line 69) | def pop(self, name: str) -> Group | Task | None:
    method get (line 73) | def get(self, name: str) -> Task | Group | None:
    method __contains__ (line 77) | def __contains__(self, name: str) -> bool:
    method __iter__ (line 81) | def __iter__(self):
    method __len__ (line 85) | def __len__(self) -> int:
    method get_all_tasks (line 91) | def get_all_tasks(self, recursive: bool = True) -> list[Task]:
    method get_all_groups (line 112) | def get_all_groups(self, recursive: bool = True) -> list[Group]:
    method child_names (line 132) | def child_names(self) -> list[str]:
    method version (line 137) | def version(self) -> str:
    method has_aggregation (line 142) | def has_aggregation(self) -> bool:
    method _discover_filters_for_metric (line 149) | def _discover_filters_for_metric(
    method aggregate (line 183) | def aggregate(self, task_metrics: dict[str, _TaskMetrics]) -> _TaskMet...
    method to_dict (line 285) | def to_dict(self) -> dict[str, Any] | None:
    method from_config (line 303) | def from_config(cls, config: GroupConfig | dict[str, Any]) -> Group:
    method __repr__ (line 323) | def __repr__(self):
  class ConfigurableGroup (line 333) | class ConfigurableGroup(Group):
    method __init__ (line 336) | def __init__(self, config: dict | GroupConfig | None = None) -> None:
    method group (line 350) | def group(self):
    method group_alias (line 354) | def group_alias(self):
    method version (line 358) | def version(self) -> str:
    method config (line 364) | def config(self):
    method group_name (line 368) | def group_name(self):
    method from_group (line 372) | def from_group(cls, group: Group) -> ConfigurableGroup:
    method __eq__ (line 385) | def __eq__(self, other):
    method __hash__ (line 390) | def __hash__(self):
    method __repr__ (line 393) | def __repr__(self):

FILE: lm_eval/api/instance.py
  class Instance (line 11) | class Instance:
    method __post_init__ (line 27) | def __post_init__(self) -> None:
    method args (line 32) | def args(self):

FILE: lm_eval/api/metrics.py
  function bypass_agg (line 23) | def bypass_agg(arr):
  function nanmean (line 28) | def nanmean(arr):
  function mean (line 35) | def mean(arr):
  function median (line 40) | def median(arr):
  function perplexity (line 47) | def perplexity(items):
  function weighted_perplexity (line 52) | def weighted_perplexity(items):
  function bits_per_byte (line 57) | def bits_per_byte(items):
  function f1_score (line 62) | def f1_score(items):
  function matthews_corrcoef (line 74) | def matthews_corrcoef(items):
  function bleu (line 84) | def bleu(items):
  function chrf (line 102) | def chrf(items):
  function ter (line 117) | def ter(items):
  function brier_score (line 133) | def brier_score(items):  # This is a passthrough function
  function brier_score_fn (line 148) | def brier_score_fn(items):  # This is a passthrough function
  function acc_fn (line 158) | def acc_fn(items):  # This is a passthrough function
  function acc_norm_fn (line 168) | def acc_norm_fn(items):  # This is a passthrough function
  function acc_mutual_info_fn (line 178) | def acc_mutual_info_fn(items):  # This is a passthrough function
  function acc_bytes_fn (line 188) | def acc_bytes_fn(items):  # This is a passthrough function
  function exact_match_hf_evaluate (line 210) | def exact_match_hf_evaluate(
  function exact_match_fn (line 254) | def exact_match_fn(**kwargs):
  function perplexity_fn (line 264) | def perplexity_fn(items):  # This is a passthrough function
  function likelihood_fn (line 274) | def likelihood_fn(items):  # This is a passthrough function
  function word_perplexity_fn (line 284) | def word_perplexity_fn(items):  # This is a passthrough function
  function byte_perplexity_fn (line 294) | def byte_perplexity_fn(items):  # This is a passthrough function
  function bits_per_byte_fn (line 304) | def bits_per_byte_fn(items):  # This is a passthrough function
  function pop_stddev (line 308) | def pop_stddev(arr):
  function sample_stddev (line 313) | def sample_stddev(arr: Sequence[T]) -> float:
  function mean_stderr (line 318) | def mean_stderr(arr):
  function bypass (line 328) | def bypass(items):
  function mcc_fn (line 338) | def mcc_fn(items):  # This is a passthrough function
  function f1_fn (line 348) | def f1_fn(items):  # This is a passthrough function
  function bleu_fn (line 358) | def bleu_fn(items):  # This is a passthrough function
  function chrf_fn (line 368) | def chrf_fn(items):  # This is a passthrough function
  function ter_fn (line 378) | def ter_fn(items):  # This is a passthrough function
  function acc_all (line 388) | def acc_all(items):
  function acc_all_stderr (line 407) | def acc_all_stderr(items):
  function metric_max_over_ground_truths (line 425) | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths):
  function weighted_mean (line 434) | def weighted_mean(items):
  function is_non_str_iterable (line 439) | def is_non_str_iterable(obj):
  function _sacreformat (line 443) | def _sacreformat(refs, preds):
  class _bootstrap_internal (line 474) | class _bootstrap_internal:
    method __init__ (line 480) | def __init__(self, f: Callable[[Sequence[T]], float], n: int) -> None:
    method __call__ (line 484) | def __call__(self, v: tuple[int, Sequence[T]]) -> list[float]:
  function _bootstrap_internal_no_mp (line 494) | def _bootstrap_internal_no_mp(
  function bootstrap_stderr (line 516) | def bootstrap_stderr(
  function stderr_for_metric (line 555) | def stderr_for_metric(
  function pooled_sample_stderr (line 590) | def pooled_sample_stderr(stderrs: List[float], sizes: List[int]):
  function combined_sample_stderr (line 608) | def combined_sample_stderr(stderrs: List[float], sizes: List[int], metri...
  function aggregate_subtask_metrics (line 640) | def aggregate_subtask_metrics(metrics, sizes, weight_by_size=True):

FILE: lm_eval/api/model.py
  class LM (line 25) | class LM(abc.ABC):
    method __init__ (line 32) | def __init__(self) -> None:
    method loglikelihood (line 40) | def loglikelihood(self, requests: list["Instance"]) -> list[tuple[floa...
    method loglikelihood_rolling (line 58) | def loglikelihood_rolling(self, requests: list["Instance"]) -> list[fl...
    method generate_until (line 100) | def generate_until(self, requests: list["Instance"]) -> list[str]:
    method apply_chat_template (line 113) | def apply_chat_template(
    method create_from_arg_string (line 131) | def create_from_arg_string(
    method create_from_arg_obj (line 149) | def create_from_arg_obj(
    method device (line 176) | def device(self):
    method rank (line 180) | def rank(self) -> int:
    method world_size (line 185) | def world_size(self) -> int:
    method all_gather (line 189) | def all_gather(self, tensor):
    method gather_object (line 196) | def gather_object(self, obj, dst=0):
    method barrier (line 203) | def barrier(self) -> None:
    method tokenizer_name (line 208) | def tokenizer_name(self) -> str:
    method chat_template (line 217) | def chat_template(self, chat_template: bool | str = False) -> str | None:
    method set_cache_hook (line 225) | def set_cache_hook(self, cache_hook: "CacheHook") -> None:
  function hash_args (line 230) | def hash_args(attr: str, args: Iterable[Any]) -> str:
  class CacheHook (line 235) | class CacheHook:
    method __init__ (line 236) | def __init__(self, cachinglm: Optional["CachingLM"]) -> None:
    method add_partial (line 243) | def add_partial(self, attr: str, req: Iterable[Any], res: Any) -> None:
  class CachingLM (line 250) | class CachingLM:
    method __init__ (line 251) | def __init__(self, lm: LM, cache_db: str) -> None:
    method __getattr__ (line 269) | def __getattr__(self, attr: str) -> Any:
    method get_cache_hook (line 327) | def get_cache_hook(self) -> "CacheHook":
  class TemplateLM (line 331) | class TemplateLM(LM):
    method eot_token_id (line 343) | def eot_token_id(self) -> int:
    method prefix_token_id (line 347) | def prefix_token_id(self):
    method tok_encode (line 352) | def tok_encode(
    method _loglikelihood_tokens (line 363) | def _loglikelihood_tokens(
    method _encode_pair (line 368) | def _encode_pair(
    method loglikelihood (line 408) | def loglikelihood(
    method loglikelihood_rolling (line 449) | def loglikelihood_rolling(
    method generate_until (line 455) | def generate_until(self, requests, disable_tqdm: bool = False) -> list...
    method chat_template (line 458) | def chat_template(self, chat_template: bool | str = False) -> str | None:

FILE: lm_eval/api/registry.py
  function _materialise_placeholder (line 101) | def _materialise_placeholder(ph: Placeholder) -> Any:
  function _suggest_similar (line 125) | def _suggest_similar(
  function _build_key_error_msg (line 142) | def _build_key_error_msg(name: str, alias: str, keys: Iterable[str]) -> ...
  class Registry (line 156) | class Registry(Generic[T]):
    method __init__ (line 164) | def __init__(
    method register (line 183) | def register(
    method _materialise (line 261) | def _materialise(self, ph: Placeholder) -> T:
    method get (line 273) | def get(self, alias: str) -> T: ...
    method get (line 276) | def get(self, alias: str, default: D) -> T | D: ...
    method get (line 278) | def get(self, alias: str, default: D | Any = _MISSING) -> T | D:
    method __getitem__ (line 329) | def __getitem__(self, alias: str) -> T:
    method __contains__ (line 333) | def __contains__(self, alias: str) -> bool:
    method __iter__ (line 337) | def __iter__(self):
    method __len__ (line 341) | def __len__(self):
    method __repr__ (line 345) | def __repr__(self) -> str:
    method keys (line 352) | def keys(self):
    method values (line 356) | def values(self):
    method items (line 363) | def items(self):
    method origin (line 372) | def origin(self, alias: str) -> str | None:
    method freeze (line 391) | def freeze(self):
    method _clear (line 402) | def _clear(self):  # pragma: no cover
  function freeze_all (line 426) | def freeze_all():
  function register_model (line 465) | def register_model(*names):
  function get_model (line 491) | def get_model(model_name: str):
  function register_filter (line 525) | def register_filter(name: str):
  function get_filter (line 545) | def get_filter(filter_name: str | Callable) -> Callable:
  function register_metric (line 575) | def register_metric(**args):
  function get_metric (line 609) | def get_metric(name: str, hf_evaluate_metric: bool = False) -> Callable ...
  function register_aggregation (line 643) | def register_aggregation(name: str):
  function get_aggregation (line 660) | def get_aggregation(name: str) -> Callable[..., float] | None:
  function get_metric_aggregation (line 680) | def get_metric_aggregation(name: str) -> Callable[..., float] | None:
  function is_higher_better (line 700) | def is_higher_better(metric_name: str) -> bool | None:

FILE: lm_eval/api/samplers.py
  class ContextSampler (line 17) | class ContextSampler:
    method __init__ (line 18) | def __init__(
    method sample (line 31) | def sample(
    method set_rnd (line 69) | def set_rnd(self, rnd: int | None):
    method replace_df (line 73) | def replace_df(self, df: Sequence[dict[str, Any]]):
    method fewshot_docs (line 78) | def fewshot_docs(self):
    method rm_eval_doc (line 88) | def rm_eval_doc(doc: _T, _iter: Iterable[_T], n=None) -> Sequence[_T]:
  class FirstNSampler (line 96) | class FirstNSampler(ContextSampler):
    method sample (line 97) | def sample(self, n: int, eval_doc=None, df=None, **kwargs):
  class BalancedSampler (line 108) | class BalancedSampler(ContextSampler):
    method sample (line 109) | def sample(self, n: int, eval_doc=None, df=None, **kwargs):
  class ManualSampler (line 118) | class ManualSampler(ContextSampler):
    method sample (line 119) | def sample(self, n: int, eval_doc=None, df=None, **kwargs):
  function get_sampler (line 130) | def get_sampler(name: str):

FILE: lm_eval/api/task.py
  class Task (line 64) | class Task(abc.ABC):
    method __init__ (line 85) | def __init__(
    method download (line 125) | def download(
    method config (line 164) | def config(self) -> TaskConfig:
    method has_training_docs (line 169) | def has_training_docs(self):
    method has_validation_docs (line 174) | def has_validation_docs(self):
    method has_test_docs (line 179) | def has_test_docs(self):
    method training_docs (line 183) | def training_docs(self) -> Iterable:
    method validation_docs (line 190) | def validation_docs(self) -> Iterable:
    method test_docs (line 197) | def test_docs(self) -> Iterable:
    method fewshot_docs (line 204) | def fewshot_docs(self) -> Iterable:
    method _process_doc (line 221) | def _process_doc(self, doc: dict) -> dict:
    method instances (line 233) | def instances(self) -> list[Instance]:
    method fewshot_examples (line 239) | def fewshot_examples(self, k, rnd):
    method doc_to_decontamination_query (line 245) | def doc_to_decontamination_query(self, doc):
    method doc_to_text (line 251) | def doc_to_text(self, doc):
    method doc_to_target (line 255) | def doc_to_target(self, doc):
    method doc_to_image (line 259) | def doc_to_image(self, doc):
    method doc_to_audio (line 262) | def doc_to_audio(self, doc):
    method doc_to_prefix (line 265) | def doc_to_prefix(self, doc):
    method build_all_requests (line 268) | def build_all_requests(
    method construct_requests (line 382) | def construct_requests(self, doc, ctx, **kwargs):
    method process_results (line 403) | def process_results(self, doc, results):
    method aggregation (line 416) | def aggregation(self):
    method higher_is_better (line 425) | def higher_is_better(self):
    method get_config (line 433) | def get_config(self, key: str) -> Any:
    method count_bytes (line 437) | def count_bytes(cls, doc):
    method count_words (line 442) | def count_words(cls, doc):
    method fewshot_context (line 447) | def fewshot_context(self, doc, num_fewshot, rnd=None, description=None...
    method apply_filters (line 505) | def apply_filters(self) -> list[Instance] | None:
    method dump_config (line 514) | def dump_config(self) -> dict:
    method set_config (line 520) | def set_config(self, key: str, value: Any, update: bool = False) -> None:
    method override_metric (line 535) | def override_metric(self, metric_name: str) -> None:
    method set_fewshot_seed (line 560) | def set_fewshot_seed(self, seed: int | None = None) -> None:
    method eval_docs (line 566) | def eval_docs(self) -> datasets.Dataset | list[dict]:
    method doc_iterator (line 576) | def doc_iterator(
    method resolve_field (line 609) | def resolve_field(doc: dict[str, Any], field: str | None = None):
    method task_name (line 614) | def task_name(self) -> str:
  class ConfigurableTask (line 618) | class ConfigurableTask(Task):
    method __init__ (line 623) | def __init__(
    method download (line 855) | def download(self, dataset_kwargs: dict[str, Any] | None = None, **kwa...
    method has_training_docs (line 875) | def has_training_docs(self) -> bool:
    method has_validation_docs (line 878) | def has_validation_docs(self) -> bool:
    method has_test_docs (line 881) | def has_test_docs(self) -> bool:
    method training_docs (line 884) | def training_docs(self) -> datasets.Dataset:
    method validation_docs (line 892) | def validation_docs(self) -> datasets.Dataset:
    method test_docs (line 900) | def test_docs(self) -> datasets.Dataset:
    method fewshot_docs (line 906) | def fewshot_docs(self):
    method fewshot_context (line 933) | def fewshot_context(
    method build_qa_turn (line 1044) | def build_qa_turn(
    method multiple_input_context (line 1109) | def multiple_input_context(
    method apply_filters (line 1160) | def apply_filters(self) -> list[Instance] | None:
    method should_decontaminate (line 1169) | def should_decontaminate(self):
    method doc_to_decontamination_query (line 1172) | def doc_to_decontamination_query(self, doc: dict):
    method _process_doc (line 1189) | def _process_doc(self, doc: dict) -> dict:
    method doc_to_text (line 1200) | def doc_to_text(self, doc, doc_to_text=None):
    method doc_to_target (line 1236) | def doc_to_target(self, doc: Mapping, doc_to_target=None) -> int | str...
    method doc_to_choice (line 1282) | def doc_to_choice(self, doc: Any, doc_to_choice=None) -> list[str]:
    method doc_to_image (line 1308) | def doc_to_image(self, doc: Any, doc_to_image=None) -> int | str | lis...
    method doc_to_audio (line 1331) | def doc_to_audio(self, doc: Any, doc_to_audio=None) -> int | str | lis...
    method doc_to_prefix (line 1354) | def doc_to_prefix(self, doc):
    method construct_requests (line 1362) | def construct_requests(
    method process_results (line 1455) | def process_results(self, doc, results):
    method aggregation (line 1666) | def aggregation(self) -> dict:
    method higher_is_better (line 1669) | def higher_is_better(self) -> dict:
    method get_config (line 1672) | def get_config(self, key: str) -> Any:
    method task_name (line 1676) | def task_name(self) -> str:
    method __repr__ (line 1679) | def __repr__(self):
  class MultipleChoiceTask (line 1688) | class MultipleChoiceTask(Task):
    method doc_to_target (line 1691) | def doc_to_target(self, doc: dict) -> str:
    method construct_requests (line 1694) | def construct_requests(self, doc: dict, ctx: str, **kwargs) -> list[In...
    method process_results (line 1707) | def process_results(self, doc: dict, results: Iterable[tuple[float, bo...
    method higher_is_better (line 1722) | def higher_is_better(self) -> dict:
    method aggregation (line 1728) | def aggregation(self) -> dict:
  class PerplexityTask (line 1735) | class PerplexityTask(Task):
    method has_training_docs (line 1738) | def has_training_docs(self) -> bool:
    method fewshot_examples (line 1741) | def fewshot_examples(self, k: int, rnd) -> list:
    method fewshot_context (line 1748) | def fewshot_context(self, doc: dict, num_fewshot: int) -> Literal[""]:
    method higher_is_better (line 1756) | def higher_is_better(self) -> dict:
    method doc_to_decontamination_query (line 1763) | def doc_to_decontamination_query(self, doc):
    method doc_to_text (line 1766) | def doc_to_text(self, doc) -> str:
    method doc_to_target (line 1769) | def doc_to_target(self, doc):
    method construct_requests (line 1772) | def construct_requests(self, doc: dict, ctx: str | None, **kwargs):
    method process_results (line 1784) | def process_results(self, doc: dict, results: tuple[float]) -> dict:
    method aggregation (line 1794) | def aggregation(self) -> dict:
    method count_bytes (line 1802) | def count_bytes(cls, doc) -> int:
    method count_words (line 1806) | def count_words(cls, doc) -> int:

FILE: lm_eval/api/utils.py
  function maybe_delimit (line 7) | def maybe_delimit(prefix: str | None, suffix: str | None, delimiter: str...
  function requires_delimiter (line 20) | def requires_delimiter(prefix: str, suffix: str) -> bool:
  function ends_with_whitespace (line 27) | def ends_with_whitespace(s: str) -> bool:
  class Message (line 33) | class Message:
    method to_dict (line 51) | def to_dict(self) -> dict[str, str]:
    method to_text (line 55) | def to_text(self) -> str:
  function messages_to_text (line 60) | def messages_to_text(messages: list[Message]) -> str:
  function multiturn_to_singleturn (line 65) | def multiturn_to_singleturn(messages: list[Message]) -> list[dict[str, A...
  function format_turn (line 86) | def format_turn(content: str, role: str, type: str | None = None) -> dic...
  function random_task_id (line 95) | def random_task_id():

FILE: lm_eval/caching/cache.py
  function load_from_cache (line 26) | def load_from_cache(file_name: str, cache: bool = False):
  function save_to_cache (line 41) | def save_to_cache(file_name, obj):
  function delete_cache (line 53) | def delete_cache(key: str = ""):

FILE: lm_eval/config/evaluate_config.py
  class EvaluatorConfig (line 29) | class EvaluatorConfig:
    method from_cli (line 196) | def from_cli(cls, namespace: Namespace) -> "EvaluatorConfig":
    method from_config (line 231) | def from_config(cls, config_path: str | Path) -> "EvaluatorConfig":
    method load_yaml_config (line 241) | def load_yaml_config(config_path: str | Path) -> dict[str, Any]:
    method _parse_dict_args (line 261) | def _parse_dict_args(self):
    method _configure (line 268) | def _configure(self):
    method _validate_arguments (line 274) | def _validate_arguments(self):
    method _process_arguments (line 314) | def _process_arguments(self):
    method process_tasks (line 336) | def process_tasks(self, metadata: dict | None = None) -> "TaskManager":
    method _set_trust_remote_code (line 414) | def _set_trust_remote_code(self):

FILE: lm_eval/config/group.py
  class AggMetricConfig (line 7) | class AggMetricConfig:
    method __post_init__ (line 34) | def __post_init__(self):
  class GroupConfig (line 47) | class GroupConfig:
    method __post_init__ (line 93) | def __post_init__(self):
    method to_dict (line 104) | def to_dict(self, keep_callable: bool = False) -> dict[str, str]:
    method serialize_function (line 113) | def serialize_function(

FILE: lm_eval/config/task.py
  class FewshotConfig (line 21) | class FewshotConfig:
    method __post_init__ (line 43) | def __post_init__(self):
    method from_dict (line 50) | def from_dict(
  class TaskConfig (line 82) | class TaskConfig(dict):
    method __post_init__ (line 130) | def __post_init__(self) -> None:
    method __getitem__ (line 170) | def __getitem__(self, item):
    method __setitem__ (line 173) | def __setitem__(self, item, value):
    method to_dict (line 176) | def to_dict(self, keep_callable: bool = False) -> dict:
    method serialize_function (line 204) | def serialize_function(

FILE: lm_eval/decontamination/archiver.py
  function json_serial (line 14) | def json_serial(obj: Any) -> str:
  class Archive (line 23) | class Archive:
    method __init__ (line 24) | def __init__(self, file_path: str, compression_level: int = 3) -> None:
    method add_data (line 33) | def add_data(self, data, meta=None) -> None:
    method commit (line 43) | def commit(self) -> None:
  class Reader (line 50) | class Reader:
    method __init__ (line 51) | def __init__(self) -> None:
    method read (line 54) | def read(
  class TextArchive (line 84) | class TextArchive:
    method __init__ (line 85) | def __init__(self, file_path, mode: str = "rb+") -> None:
    method add_data (line 96) | def add_data(self, data) -> None:
    method commit (line 99) | def commit(self) -> None:
  class TextReader (line 104) | class TextReader:
    method __init__ (line 105) | def __init__(self, file_path) -> None:
    method read_tqdm (line 110) | def read_tqdm(self, update_frequency: int = 10000):
    method read_and_tell (line 134) | def read_and_tell(self):
    method read (line 145) | def read(self):
    method read_slow (line 152) | def read_slow(self):
  class ZStdTextReader (line 164) | class ZStdTextReader:
    method __init__ (line 165) | def __init__(self, file) -> None:
    method read_tqdm (line 168) | def read_tqdm(self):

FILE: lm_eval/decontamination/decontaminate.py
  function get_train_overlap_stub (line 14) | def get_train_overlap_stub(docs: dict, ngrams_path: str, ngrams_n_size: ...
  function get_train_overlap (line 37) | def get_train_overlap(docs_by_task_set: dict, ngrams_path: str, limit: i...

FILE: lm_eval/decontamination/janitor.py
  function form_ngrams (line 25) | def form_ngrams(sequence: Iterator[T], n: int) -> Iterator[Tuple[T, ...]]:
  function word_ngrams (line 42) | def word_ngrams(s: str, n: int) -> Iterator[str]:
  function split_indices (line 74) | def split_indices(s: str) -> Iterator[Tuple[str, Tuple[int, int]]]:
  function word_ngrams_indices (line 81) | def word_ngrams_indices(s: str, n: int) -> Iterator[Tuple[str, Tuple[int...
  class Janitor (line 109) | class Janitor:
    method __init__ (line 111) | def __init__(
    method save_contamination_ngrams (line 140) | def save_contamination_ngrams(self, filename: str) -> None:
    method load_contamination_ngrams (line 144) | def load_contamination_ngrams(self, filename: str) -> None:
    method register_contaminant (line 152) | def register_contaminant(self, dirt_string: str) -> None:
    method clean (line 161) | def clean(self, dirty_string: str) -> List[str]:
    method _split_chunks (line 171) | def _split_chunks(
    method register_contaminant_cpp (line 196) | def register_contaminant_cpp(self, dirt_string) -> None:
    method clean_cpp (line 201) | def clean_cpp(self, dirty_string: str) -> List[str]:
    method normalize_string (line 211) | def normalize_string(self, s: str) -> str:
    method register_contaminant_python (line 214) | def register_contaminant_python(self, dirt_string: str) -> None:
    method clean_python (line 219) | def clean_python(self, dirty_string: str) -> List[str]:

FILE: lm_eval/defaults.py
  function _strtobool (line 13) | def _strtobool(val: str) -> bool:
  function _envbool (line 25) | def _envbool(var: str, default: bool = False) -> bool:
  function default_gen_kwargs (line 38) | def default_gen_kwargs(

FILE: lm_eval/evaluator.py
  function simple_evaluate (line 54) | def simple_evaluate(
  function evaluate (line 414) | def evaluate(

FILE: lm_eval/evaluator_utils.py
  class ResultAcc (line 29) | class ResultAcc(TypedDict):
  function print_writeout (line 37) | def print_writeout(task: Task) -> None:
  function get_sample_size (line 49) | def get_sample_size(task, limit: int | float | None) -> int | None:
  function find_test_root (line 58) | def find_test_root(start_path: pathlib.Path) -> pathlib.Path:
  function run_task_tests (line 76) | def run_task_tests(task_list: list[str]):
  class EvalAcc (line 99) | class EvalAcc:
    method collect (line 120) | def collect(self) -> tuple[dict[str, _TaskMetrics], dict[str, _TaskMet...
    method _to_eval_results (line 134) | def _to_eval_results(
  function _compute_task_aggregations (line 173) | def _compute_task_aggregations(
  function _collect_results (line 222) | def _collect_results(
  function aggregate_groups (line 275) | def aggregate_groups(
  function _get_root_groups (line 302) | def _get_root_groups(groups: dict[str, Group]) -> list[Group]:
  function _collect_groups_bottom_up (line 319) | def _collect_groups_bottom_up(groups: dict[str, Group]) -> list[Group]:
  function _process_results (line 349) | def _process_results(
  function _propagate_num_fewshot (line 395) | def _propagate_num_fewshot(
  function _propagate_higher_is_better (line 404) | def _propagate_higher_is_better(
  function _log_selected_tasks (line 423) | def _log_selected_tasks(
  function _handle_back_comp (line 483) | def _handle_back_comp(

FILE: lm_eval/filters/__init__.py
  function build_filter_ensemble (line 11) | def build_filter_ensemble(

FILE: lm_eval/filters/custom.py
  class CustomFilter (line 6) | class CustomFilter(Filter):
    method __init__ (line 11) | def __init__(self, **kwargs) -> None:
    method apply (line 16) | def apply(self, resps, docs):

FILE: lm_eval/filters/decontamination.py
  class DecontaminationFilter (line 6) | class DecontaminationFilter(Filter):
    method __init__ (line 13) | def __init__(self, path) -> None:
    method apply (line 21) | def apply(self, resps, docs) -> None:

FILE: lm_eval/filters/extraction.py
  class RegexFilter (line 10) | class RegexFilter(Filter):
    method __init__ (line 18) | def __init__(
    method apply (line 33) | def apply(self, resps: list[list[str]], docs: list[dict]) -> list[list...
  class POSFilter (line 63) | class POSFilter(Filter):
    method __init__ (line 66) | def __init__(
    method apply (line 83) | def apply(self, resps, docs):
  class WhitespaceFilter (line 109) | class WhitespaceFilter(Filter):
    method apply (line 112) | def apply(self, resps: list[list[str]], docs: list[dict]) -> list[list...
  class MultiChoiceRegexFilter (line 126) | class MultiChoiceRegexFilter(RegexFilter):
    method __init__ (line 134) | def __init__(
    method apply (line 157) | def apply(self, resps: list[list[str]], docs: list[dict]) -> list[list...

FILE: lm_eval/filters/selection.py
  class TakeFirstFilter (line 13) | class TakeFirstFilter(Filter):
    method __init__ (line 14) | def __init__(self) -> None:
    method apply (line 19) | def apply(self, resps, docs):
  class TakeKFilter (line 27) | class TakeKFilter(Filter):
    method __init__ (line 28) | def __init__(self, **kwargs) -> None:
    method apply (line 33) | def apply(self, resps, docs):
  class MajorityVoteFilter (line 44) | class MajorityVoteFilter(Filter):
    method __init__ (line 45) | def __init__(self) -> None:
    method apply (line 50) | def apply(self, resps, docs):

FILE: lm_eval/filters/transformation.py
  class LowercaseFilter (line 8) | class LowercaseFilter(Filter):
    method __init__ (line 9) | def __init__(self) -> None:
    method apply (line 12) | def apply(self, resps, docs):
  class UppercaseFilter (line 20) | class UppercaseFilter(Filter):
    method __init__ (line 21) | def __init__(self) -> None:
    method apply (line 24) | def apply(self, resps, docs):
  class MapFilter (line 32) | class MapFilter(Filter):
    method __init__ (line 33) | def __init__(self, mapping_dict: dict = None, default_value=None) -> N...
    method apply (line 54) | def apply(self, resps, docs):
  class SPANFilter (line 62) | class SPANFilter(Filter):
    method __init__ (line 63) | def __init__(self) -> None:
    method apply (line 66) | def apply(self, resps, docs):

FILE: lm_eval/loggers/evaluation_tracker.py
  class GeneralConfigTracker (line 38) | class GeneralConfigTracker:
    method __init__ (line 70) | def __init__(self) -> None:
    method _get_model_name (line 75) | def _get_model_name(model_args: str | dict[str, Any] | None) -> str | ...
    method log_experiment_args (line 95) | def log_experiment_args(
    method log_end_time (line 117) | def log_end_time(self) -> None:
  class EvaluationTracker (line 123) | class EvaluationTracker:
    method __init__ (line 130) | def __init__(
    method _api (line 222) | def _api(token: str | None = None) -> "HfApi | None":
    method save_results_aggregated (line 230) | def save_results_aggregated(
    method save_results_samples (line 320) | def save_results_samples(
    method recreate_metadata_card (line 424) | def recreate_metadata_card(self) -> None:

FILE: lm_eval/loggers/utils.py
  function remove_none_pattern (line 15) | def remove_none_pattern(input_string: str) -> tuple[str, bool]:
  function _handle_non_serializable (line 37) | def _handle_non_serializable(o: Any) -> int | str | list:
  function get_commit_from_path (line 56) | def get_commit_from_path(repo_path: Path | str) -> str | None:
  function get_git_commit_hash (line 83) | def get_git_commit_hash():
  function add_env_info (line 97) | def add_env_info(storage: dict[str, Any]):
  function add_tokenizer_info (line 131) | def add_tokenizer_info(storage: dict[str, Any], lm):

FILE: lm_eval/loggers/wandb_logger.py
  function get_wandb_printer (line 16) | def get_wandb_printer() -> Literal["Printer"]:
  class WandbLogger (line 24) | class WandbLogger:
    method __init__ (line 25) | def __init__(self, init_args=None, config_args=None) -> None:
    method post_init (line 66) | def post_init(self, results: Dict[str, Any]) -> None:
    method _get_config (line 71) | def _get_config(self) -> Dict[str, Any]:
    method _sanitize_results_dict (line 82) | def _sanitize_results_dict(self) -> Tuple[Dict[str, str], Dict[str, An...
    method _log_results_as_table (line 118) | def _log_results_as_table(self) -> None:
    method _log_results_as_artifact (line 168) | def _log_results_as_artifact(self) -> None:
    method log_eval_result (line 180) | def log_eval_result(self) -> None:
    method _generate_dataset (line 196) | def _generate_dataset(
    method _log_samples_as_artifact (line 287) | def _log_samples_as_artifact(
    method log_eval_samples (line 307) | def log_eval_samples(self, samples: Dict[str, List[Dict[str, Any]]]) -...

FILE: lm_eval/models/__init__.py
  function _register_all_models (line 60) | def _register_all_models():

FILE: lm_eval/models/anthropic_llms.py
  function anthropic_completion (line 17) | def anthropic_completion(
  function anthropic_chat (line 80) | def anthropic_chat(
  class AnthropicLM (line 145) | class AnthropicLM(LM):
    method __init__ (line 148) | def __init__(
    method eot_token_id (line 186) | def eot_token_id(self):
    method max_length (line 191) | def max_length(self) -> int:
    method max_gen_toks (line 195) | def max_gen_toks(self) -> int:
    method batch_size (line 199) | def batch_size(self):
    method device (line 204) | def device(self):
    method tok_encode (line 208) | def tok_encode(self, string: str) -> List[int]:
    method tok_decode (line 211) | def tok_decode(self, tokens: List[int]) -> str:
    method _loglikelihood_tokens (line 214) | def _loglikelihood_tokens(self, requests, disable_tqdm: bool = False):
    method generate_until (line 217) | def generate_until(self, requests, disable_tqdm: bool = False) -> List...
    method _model_call (line 261) | def _model_call(self, inps):
    method _model_generate (line 265) | def _model_generate(self, context, max_length, eos_token_id):
    method loglikelihood (line 269) | def loglikelihood(self, requests, disable_tqdm: bool = False):
    method loglikelihood_rolling (line 272) | def loglikelihood_rolling(self, requests, disable_tqdm: bool = False):
  class AnthropicChat (line 277) | class AnthropicChat(LocalCompletionsAPI):
    method __init__ (line 278) | def __init__(
    method api_key (line 297) | def api_key(self):
    method header (line 307) | def header(self):
    method _create_payload (line 313) | def _create_payload(
    method parse_generations (line 359) | def parse_generations(
    method tok_encode (line 370) | def tok_encode(
    method loglikelihood (line 379) | def loglikelihood(self, requests, **kwargs):

FILE: lm_eval/models/api_models.py
  class JsonChatStr (line 54) | class JsonChatStr(NamedTuple):
    method encode (line 57) | def encode(self, encoding):
  function create_image_prompt (line 61) | def create_image_prompt(
  class TemplateAPI (line 104) | class TemplateAPI(TemplateLM):
    method __init__ (line 107) | def __init__(
    method _create_payload (line 252) | def _create_payload(
    method create_message (line 265) | def create_message(
    method parse_logprobs (line 297) | def parse_logprobs(
    method parse_generations (line 308) | def parse_generations(outputs: Union[Any, List[Any]], **kwargs) -> Lis...
    method api_key (line 313) | def api_key(self) -> str:
    method header (line 318) | def header(self) -> dict:
    method tokenizer_name (line 323) | def tokenizer_name(self) -> str:
    method apply_chat_template (line 330) | def apply_chat_template(
    method eot_token_id (line 353) | def eot_token_id(self) -> Optional[int]:
    method eos_string (line 365) | def eos_string(self) -> Optional[str]:
    method prefix_token_id (line 382) | def prefix_token_id(self) -> Optional[int]:
    method tok_encode (line 397) | def tok_encode(
    method decode_batch (line 446) | def decode_batch(self, tokens: List[List[int]]) -> List[str]:
    method model_call (line 454) | def model_call(
    method amodel_call (line 490) | async def amodel_call(
    method batch_loglikelihood_requests (line 552) | def batch_loglikelihood_requests(
    method get_batched_requests (line 575) | async def get_batched_requests(
    method _loglikelihood_tokens (line 620) | def _loglikelihood_tokens(self, requests, **kwargs) -> List[Tuple[floa...
    method generate_until (line 683) | def generate_until(
    method loglikelihood_rolling (line 832) | def loglikelihood_rolling(

FILE: lm_eval/models/dummy.py
  class DummyLM (line 11) | class DummyLM(LM):
    method __init__ (line 14) | def __init__(self, *args, write_out: bool = False, **kwargs) -> None:
    method create_from_arg_string (line 19) | def create_from_arg_string(cls, arg_string, additional_config=None):
    method loglikelihood (line 22) | def loglikelihood(self, requests, disable_tqdm: bool = False):
    method generate_until (line 33) | def generate_until(self, requests, disable_tqdm: bool = False):
    method loglikelihood_rolling (line 45) | def loglikelihood_rolling(self, requests, disable_tqdm: bool = False):
    method tokenizer (line 54) | def tokenizer(self):
    method apply_chat_template (line 59) | def apply_chat_template(

FILE: lm_eval/models/gguf.py
  function get_result (line 15) | def get_result(logprobs, context_length):
  class GGUFLM (line 37) | class GGUFLM(LM):
    method __init__ (line 38) | def __init__(self, base_url=None, max_length=2048, **kwargs):
    method gguf_completion (line 46) | def gguf_completion(
    method loglikelihood (line 75) | def loglikelihood(self, requests, disable_tqdm: bool = False):
    method generate_until (line 104) | def generate_until(self, requests, disable_tqdm: bool = False):
    method loglikelihood_rolling (line 129) | def loglikelihood_rolling(self, requests, disable_tqdm: bool = False):

FILE: lm_eval/models/hf_audiolm.py
  class HFAUDIOLMQWEN (line 22) | class HFAUDIOLMQWEN(HFLM):
    method __init__ (line 30) | def __init__(
    method _create_tokenizer (line 42) | def _create_tokenizer(
    method apply_chat_template (line 85) | def apply_chat_template(
    method _model_multimodal_generate (line 98) | def _model_multimodal_generate(self, inputs, max_length, stop, **gener...
    method tok_batch_multimodal_encode (line 124) | def tok_batch_multimodal_encode(
    method generate_until (line 165) | def generate_until(
    method loglikelihood_rolling (line 290) | def loglikelihood_rolling(self, requests: list[Instance]) -> list[float]:
    method loglikelihood (line 296) | def loglikelihood(

FILE: lm_eval/models/hf_steered.py
  function steer (line 23) | def steer(
  class SteeredModel (line 67) | class SteeredModel(HFLM):
    method __init__ (line 70) | def __init__(
    method derive_steer_config (line 147) | def derive_steer_config(cls, steer_path: str):
    method add (line 210) | def add(
    method clamp (line 231) | def clamp(
    method forward (line 270) | def forward(self, *args, **kwargs):
    method _model_call (line 274) | def _model_call(self, *args, **kwargs):
    method _model_generate (line 278) | def _model_generate(self, *args, **kwargs):

FILE: lm_eval/models/hf_vlms.py
  class HFMultimodalLM (line 30) | class HFMultimodalLM(HFLM):
    method __init__ (line 38) | def __init__(
    method _create_tokenizer (line 112) | def _create_tokenizer(
    method tok_multimodal_encode (line 158) | def tok_multimodal_encode(
    method _encode_multimodal_pair (line 188) | def _encode_multimodal_pair(self, context, continuation, images):
    method apply_chat_template (line 218) | def apply_chat_template(
    method chat_template (line 275) | def chat_template(self, chat_template: bool | str = False) -> str | None:
    method tok_batch_multimodal_encode (line 287) | def tok_batch_multimodal_encode(
    method _model_multimodal_call (line 342) | def _model_multimodal_call(self, inps, imgs, attn_mask=None, labels=No...
    method _model_multimodal_generate (line 350) | def _model_multimodal_generate(self, inputs, max_length, stop, **gener...
    method _batch_images (line 376) | def _batch_images(self, image_encs):
    method loglikelihood_rolling (line 394) | def loglikelihood_rolling(self, requests: list[Instance]) -> list[float]:
    method loglikelihood (line 403) | def loglikelihood(
    method _multimodal_loglikelihood_tokens (line 439) | def _multimodal_loglikelihood_tokens(
    method generate_until (line 625) | def generate_until(

FILE: lm_eval/models/huggingface.py
  class HFLM (line 60) | class HFLM(TemplateLM):
    method __init__ (line 70) | def __init__(
    method _get_accelerate_args (line 442) | def _get_accelerate_args(
    method config (line 529) | def config(self):
    method model (line 534) | def model(self):
    method eot_token_id (line 542) | def eot_token_id(self) -> int:
    method prefix_token_id (line 547) | def prefix_token_id(self) -> int:
    method max_length (line 556) | def max_length(self) -> int:
    method max_gen_toks (line 570) | def max_gen_toks(self) -> int:
    method batch_size (line 574) | def batch_size(self):
    method device (line 578) | def device(self):
    method rank (line 582) | def rank(self):
    method world_size (line 586) | def world_size(self):
    method all_gather (line 589) | def all_gather(self, tensor):
    method gather_object (line 594) | def gather_object(self, obj, dst=0):
    method barrier (line 601) | def barrier(self):
    method tokenizer_name (line 606) | def tokenizer_name(self) -> str:
    method _get_backend (line 609) | def _get_backend(
    method _get_config (line 669) | def _get_config(
    method _create_model (line 687) | def _create_model(
    method _create_tokenizer (line 856) | def _create_tokenizer(
    method _detect_batch_size (line 917) | def _detect_batch_size(self, requests: Sequence | None = None, pos: in...
    method tok_encode (line 976) | def tok_encode(
    method tok_batch_encode (line 1001) | def tok_batch_encode(
    method tok_decode (line 1044) | def tok_decode(self, tokens: Iterator[list[str]], skip_special_tokens:...
    method _model_call (line 1047) | def _model_call(
    method _model_generate (line 1089) | def _model_generate(
    method _select_cont_toks (line 1127) | def _select_cont_toks(
    method loglikelihood_rolling (line 1150) | def loglikelihood_rolling(
    method _batch_scheduler (line 1236) | def _batch_scheduler(self, pos, n_reordered_requests):
    method _loglikelihood_tokens (line 1253) | def _loglikelihood_tokens(
    method generate_until (line 1490) | def generate_until(
    method apply_chat_template (line 1634) | def apply_chat_template(
    method get_model_info (line 1661) | def get_model_info(self) -> dict:

FILE: lm_eval/models/ibm_watsonx_ai.py
  class LogLikelihoodResult (line 21) | class LogLikelihoodResult(NamedTuple):
  function _verify_credentials (line 26) | def _verify_credentials(creds: dict) -> None:
  function get_watsonx_credentials (line 73) | def get_watsonx_credentials() -> dict[str, str | None]:
  class WatsonxLLM (line 120) | class WatsonxLLM(LM):
    method create_from_arg_string (line 127) | def create_from_arg_string(
    method __init__ (line 191) | def __init__(
    method _has_stop_token (line 228) | def _has_stop_token(response_tokens: list[str], context_tokens: list[s...
    method _check_model_logprobs_support (line 257) | def _check_model_logprobs_support(self):
    method _get_log_likelihood (line 278) | def _get_log_likelihood(
    method generate_until (line 312) | def generate_until(self, requests: list[Instance]) -> list[str]:
    method loglikelihood (line 349) | def loglikelihood(self, requests: list[Instance]) -> list[tuple[float,...
    method loglikelihood_rolling (line 416) | def loglikelihood_rolling(self, requests) -> list[float]:
    method tokenizer_name (line 470) | def tokenizer_name(self) -> str:
    method apply_chat_template (line 473) | def apply_chat_template(

FILE: lm_eval/models/mamba_lm.py
  class MambaLMWrapper (line 10) | class MambaLMWrapper(HFLM):
    method __init__ (line 11) | def __init__(
    method _get_config (line 66) | def _get_config(
    method _create_model (line 84) | def _create_model(
    method _model_generate (line 114) | def _model_generate(self, context, max_length, stop, **generation_kwar...

FILE: lm_eval/models/megatron_lm.py
  function _add_megatron_to_path (line 74) | def _add_megatron_to_path():
  function _check_dist_ckpt (line 93) | def _check_dist_ckpt(load_path: str) -> bool:
  function _parse_extra_args (line 105) | def _parse_extra_args(extra_args: str | None) -> list[str]:
  class MegatronLMEval (line 130) | class MegatronLMEval(LM):
    method __init__ (line 154) | def __init__(
    method _validate_parallelism_config (line 247) | def _validate_parallelism_config(self, devices: int, tp: int, pp: int,...
    method _initialize_megatron (line 309) | def _initialize_megatron(self, **kwargs):
    method eot_token_id (line 595) | def eot_token_id(self) -> int:
    method prefix_token_id (line 606) | def prefix_token_id(self) -> int:
    method max_length (line 620) | def max_length(self) -> int:
    method max_gen_toks (line 624) | def max_gen_toks(self) -> int:
    method batch_size (line 628) | def batch_size(self) -> int:
    method device (line 632) | def device(self) -> torch.device:
    method rank (line 636) | def rank(self) -> int:
    method world_size (line 640) | def world_size(self) -> int:
    method accelerator (line 644) | def accelerator(self):
    method all_gather (line 648) | def all_gather(self, tensor: torch.Tensor) -> torch.Tensor:
    method gather_object (line 652) | def gather_object(self, obj, dst: int = 0):
    method barrier (line 661) | def barrier(self) -> None:
    class _Accelerator (line 665) | class _Accelerator:
      method __init__ (line 672) | def __init__(self, world_size, device):
      method wait_for_everyone (line 676) | def wait_for_everyone(self):
      method gather (line 681) | def gather(self, local_tensor):
      method gather_object (line 705) | def gather_object(self, local_obj):
    method tok_encode (line 714) | def tok_encode(self, string: str, add_special_tokens: bool = False) ->...
    method tok_decode (line 721) | def tok_decode(self, tokens: list[int]) -> str:
    method _encode_pair (line 728) | def _encode_pair(
    method _model_forward (line 744) | def _model_forward(
    method _distribute_requests (line 823) | def _distribute_requests(self, requests: list) -> tuple[list, list[int]]:
    method _gather_results (line 840) | def _gather_results(self, local_results: list, sizes: list[int]) -> list:
    method loglikelihood (line 860) | def loglikelihood(self, requests: list[Instance]) -> list[tuple[float,...
    method _loglikelihood_tokens (line 891) | def _loglikelihood_tokens(
    method loglikelihood_rolling (line 1010) | def loglikelihood_rolling(
    method generate_until (line 1055) | def generate_until(

FILE: lm_eval/models/mistral3.py
  class Mistral3LM (line 33) | class Mistral3LM(HFLM):
    method __init__ (line 44) | def __init__(self, **kwargs):
    method _get_backend (line 59) | def _get_backend(
    method _model_call (line 74) | def _model_call(
    method max_length (line 99) | def max_length(self) -> int:

FILE: lm_eval/models/nemo_lm.py
  function _patch_pretrained_cfg (line 42) | def _patch_pretrained_cfg(
  function _get_target_from_class (line 72) | def _get_target_from_class(target_class) -> str:
  function load_model (line 76) | def load_model(
  function setup_distributed_environment (line 145) | def setup_distributed_environment(trainer):
  class NeMoLM (line 168) | class NeMoLM(LM):
    method __init__ (line 169) | def __init__(
    method create_from_arg_string (line 275) | def create_from_arg_string(cls, arg_string, additional_config=None):
    method eot_token_id (line 283) | def eot_token_id(self):
    method max_length (line 290) | def max_length(self):
    method max_gen_toks (line 294) | def max_gen_toks(self):
    method batch_size (line 298) | def batch_size(self):
    method device (line 302) | def device(self):
    method rank (line 306) | def rank(self):
    method world_size (line 310) | def world_size(self):
    method all_gather (line 313) | def all_gather(self, tensor):
    method gather_object (line 320) | def gather_object(self, obj, dst=0):
    method barrier (line 327) | def barrier(self):
    method tok_encode (line 331) | def tok_encode(self, string: str):
    method tok_decode (line 334) | def tok_decode(self, tokens):
    method _encode_pair (line 337) | def _encode_pair(self, context, continuation):
    method loglikelihood (line 348) | def loglikelihood(self, requests):
    method loglikelihood_rolling (line 364) | def loglikelihood_rolling(
    method _loglikelihood_tokens (line 398) | def _loglikelihood_tokens(self, requests, disable_tqdm=False):
    method generate_until (line 491) | def generate_until(self, requests):

FILE: lm_eval/models/neuron_optimum.py
  class CustomNeuronModelForCausalLM (line 37) | class CustomNeuronModelForCausalLM(NeuronModelForCausalLM):
    method generate (line 40) | def generate(
  class NEURON_HF (line 126) | class NEURON_HF(TemplateLM):
    method __init__ (line 133) | def __init__(
    method config (line 248) | def config(self):
    method eot_token_id (line 253) | def eot_token_id(self):
    method prefix_token_id (line 258) | def prefix_token_id(self):
    method max_length (line 263) | def max_length(self):
    method max_gen_toks (line 267) | def max_gen_toks(self) -> int:
    method batch_size (line 271) | def batch_size(self):
    method device (line 275) | def device(self):
    method rank (line 280) | def rank(self):
    method world_size (line 284) | def world_size(self):
    method tok_encode (line 287) | def tok_encode(self, string: str, left_truncate_len=None, add_special_...
    method tok_batch_encode (line 300) | def tok_batch_encode(
    method tok_decode (line 329) | def tok_decode(self, tokens):
    method _model_generate (line 332) | def _model_generate(self, context, max_length, stop, **generation_kwar...
    method _select_cont_toks (line 356) | def _select_cont_toks(self, logits, contlen=None, inplen=None):
    method loglikelihood_rolling (line 366) | def loglikelihood_rolling(self, requests, disable_tqdm: bool = False):
    method _loglikelihood_tokens (line 419) | def _loglikelihood_tokens(
    method generate_until (line 568) | def generate_until(self, requests, disable_tqdm: bool = False):

FILE: lm_eval/models/openai_completions.py
  class LocalCompletionsAPI (line 16) | class LocalCompletionsAPI(TemplateAPI):
    method __init__ (line 17) | def __init__(
    method _create_payload (line 61) | def _create_payload(
    method parse_logprobs (line 99) | def parse_logprobs(
    method parse_generations (line 125) | def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> L...
    method api_key (line 137) | def api_key(self):
  class LocalChatCompletion (line 142) | class LocalChatCompletion(LocalCompletionsAPI):
    method __init__ (line 150) | def __init__(
    method _create_payload (line 175) | def _create_payload(
    method parse_generations (line 211) | def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> L...
    method tok_encode (line 229) | def tok_encode(
    method loglikelihood (line 238) | def loglikelihood(self, requests, **kwargs):
  class OpenAICompletionsAPI (line 247) | class OpenAICompletionsAPI(LocalCompletionsAPI):
    method __init__ (line 248) | def __init__(
    method api_key (line 259) | def api_key(self):
    method loglikelihood (line 268) | def loglikelihood(self, requests, **kwargs):
    method chat_template (line 277) | def chat_template(self, chat_template: Union[bool, str] = False) -> Op...
  class OpenAIChatCompletion (line 282) | class OpenAIChatCompletion(LocalChatCompletion):
    method __init__ (line 283) | def __init__(
    method api_key (line 303) | def api_key(self):
    method loglikelihood (line 312) | def loglikelihood(self, requests, **kwargs):
    method _create_payload (line 317) | def _create_payload(
  class AzureOpenaiChatCompletionsLM (line 359) | class AzureOpenaiChatCompletionsLM(OpenAIChatCompletion):
    method __init__ (line 360) | def __init__(
    method api_key (line 384) | def api_key(self):

FILE: lm_eval/models/optimum_habana.py
  class HabanaLM (line 18) | class HabanaLM(HFLM):
    method __init__ (line 30) | def __init__(self, **kwargs) -> None:
    method max_length (line 52) | def max_length(self) -> int:
    method max_length (line 57) | def max_length(self, value: int) -> None:
    method find_bucket (line 60) | def find_bucket(self, length: int, key=lambda b, length: b >= length) ...
    method _model_call (line 75) | def _model_call(self, inps: torch.Tensor) -> torch.Tensor:
    method setup_generation_config_gaudi (line 97) | def setup_generation_config_gaudi(self, **kwargs):
    method _create_model (line 108) | def _create_model(self, *args, **kwargs) -> None:
    method generate_until (line 125) | def generate_until(
    method _model_generate (line 137) | def _model_generate(

FILE: lm_eval/models/optimum_ipex.py
  class IPEXLM (line 13) | class IPEXLM(HFLM):
    method __init__ (line 18) | def __init__(
    method _create_model (line 33) | def _create_model(

FILE: lm_eval/models/optimum_lm.py
  class OptimumLM (line 14) | class OptimumLM(HFLM):
    method __init__ (line 25) | def __init__(
    method _create_model (line 43) | def _create_model(

FILE: lm_eval/models/sglang_causallms.py
  class SGLangLM (line 34) | class SGLangLM(TemplateLM):
    method __init__ (line 37) | def __init__(
    method loglikelihood_rolling (line 124) | def loglikelihood_rolling(
    method generate_until (line 193) | def generate_until(
    method _model_generate (line 288) | def _model_generate(
    method eot_token_id (line 319) | def eot_token_id(self):
    method prefix_token_id (line 324) | def prefix_token_id(self):
    method max_length (line 333) | def max_length(self):
    method max_gen_toks (line 343) | def max_gen_toks(self):
    method tok_encode (line 347) | def tok_encode(
    method tok_decode (line 372) | def tok_decode(self, tokens: List[int]) -> str:
    method tokenizer_name (line 377) | def tokenizer_name(self) -> str:
    method chat_template (line 387) | def chat_template(self, chat_template: Union[bool, str] = False) -> str:
    method apply_chat_template (line 408) | def apply_chat_template(
    method _loglikelihood_tokens (line 423) | def _loglikelihood_tokens(
    method _parse_logprobs (line 483) | def _parse_logprobs(tokens: List, outputs, ctxlen: int) -> Tuple[float...
    method modify_gen_kwargs (line 519) | def modify_gen_kwargs(kwargs: dict) -> dict:

FILE: lm_eval/models/sglang_generate_API.py
  class SGLANGGENERATEAPI (line 9) | class SGLANGGENERATEAPI(LocalCompletionsAPI):
    method __init__ (line 10) | def __init__(
    method _create_payload (line 20) | def _create_payload(
    method parse_logprobs (line 66) | def parse_logprobs(
    method parse_generations (line 90) | def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> L...
    method api_key (line 99) | def api_key(self):

FILE: lm_eval/models/textsynth.py
  function textsynth_completion (line 29) | def textsynth_completion(**kwargs):
  class TextSynthLM (line 51) | class TextSynthLM(LM):
    method __init__ (line 52) | def __init__(self, engine, truncate: bool = False, **kwargs) -> None:
    method eot_token_id (line 68) | def eot_token_id(self):
    method max_length (line 73) | def max_length(self) -> int:
    method max_gen_toks (line 78) | def max_gen_toks(self) -> int:
    method batch_size (line 82) | def batch_size(self):
    method device (line 87) | def device(self):
    method tok_encode (line 91) | def tok_encode(self, string: str):
    method tok_decode (line 95) | def tok_decode(self, tokens):
    method loglikelihood (line 99) | def loglikelihood(self, requests, disable_tqdm: bool = False):
    method loglikelihood_rolling (line 123) | def loglikelihood_rolling(self, requests, disable_tqdm: bool = False):
    method generate_until (line 133) | def generate_until(self, requests, disable_tqdm: bool = False):
    method _model_call (line 166) | def _model_call(self, inps):
    method _model_generate (line 170) | def _model_generate(self, context, max_length, eos_token_id):

FILE: lm_eval/models/utils.py
  class GenKwargs (line 33) | class GenKwargs(TypedDict, total=False):
  function chunks (line 42) | def chunks(iter, n: int = 0, fn=None):
  class MultiChoice (line 80) | class MultiChoice:
    method __init__ (line 81) | def __init__(self, choices) -> None:
    method __contains__ (line 85) | def __contains__(self, values) -> bool:
    method __iter__ (line 94) | def __iter__(self) -> Iterator:
  class Grouper (line 98) | class Grouper:
    method __init__ (line 105) | def __init__(self, arr, fn) -> None:
    method get_grouped (line 123) | def get_grouped(self):
    method get_original (line 134) | def get_original(self, grouped_dict):
  function undistribute (line 156) | def undistribute(iterable):
  function retry_on_specific_exceptions (line 196) | def retry_on_specific_exceptions(
  class Collator (line 236) | class Collator:
    method __init__ (line 249) | def __init__(
    method _group_by_index (line 270) | def _group_by_index(self) -> None:
    method _group_by_context (line 276) | def _group_by_context(self) -> None:
    method get_batched (line 282) | def get_batched(
    method get_cache (line 329) | def get_cache(
    method _reorder (line 390) | def _reorder(self, arr: list | tuple[tuple[int, Any], ...]) -> Iterator:
    method get_original (line 406) | def get_original(self, newarr: list) -> list:
    method __len__ (line 427) | def __len__(self):
    method group (line 431) | def group(
    method get_chunks (line 474) | def get_chunks(
  function configure_pad_token (line 515) | def configure_pad_token(
  function replace_placeholders (line 560) | def replace_placeholders(
  function flatten_image_list (line 594) | def flatten_image_list(images: list[list]):
  function handle_stop_sequences (line 605) | def handle_stop_sequences(until: str | list[str] | None, eos: str | None...
  function normalize_gen_kwargs (line 621) | def normalize_gen_kwargs(
  function resize_image (line 717) | def resize_image(
  function truncate_tokens (line 817) | def truncate_tokens(
  function maybe_truncate (line 836) | def maybe_truncate(
  function postprocess_generated_text (line 910) | def postprocess_generated_text(
  function has_bos_prefix (line 939) | def has_bos_prefix(sequence: str, bos_str: str | Iterable[str] | None = ...
  function _add_special_kwargs (line 948) | def _add_special_kwargs(add_special_tokens: bool | None, add_bos: bool |...

FILE: lm_eval/models/utils_hf.py
  function pad_and_concat (line 8) | def pad_and_concat(
  function clear_torch_cache (line 59) | def clear_torch_cache() -> None:
  function get_dtype (line 64) | def get_dtype(dtype: str | torch.dtype) -> torch.dtype | str:
  class MultiTokenEOSCriteria (line 74) | class MultiTokenEOSCriteria(transformers.StoppingCriteria):
    method __init__ (line 77) | def __init__(
    method __call__ (line 100) | def __call__(self, input_ids, scores, **kwargs) -> bool:
  function stop_sequences_criteria (line 114) | def stop_sequences_criteria(

FILE: lm_eval/models/vllm_causallms.py
  function _vllm_mp_worker (line 68) | def _vllm_mp_worker(
  class VLLM (line 126) | class VLLM(TemplateLM):
    method __init__ (line 130) | def __init__(
    method eot_token_id (line 288) | def eot_token_id(self):
    method prefix_token_id (line 293) | def prefix_token_id(self):
    method max_length (line 302) | def max_length(self) -> int:
    method max_gen_toks (line 319) | def max_gen_toks(self):
    method apply_chat_template (line 322) | def apply_chat_template(
    method tokenizer_name (line 355) | def tokenizer_name(self) -> str:
    method tok_encode (line 359) | def tok_encode(
    method tok_encode (line 363) | def tok_encode(
    method tok_encode (line 367) | def tok_encode(
    method _model_generate (line 428) | def _model_generate(
    method loglikelihood_rolling (line 558) | def loglikelihood_rolling(
    method generate_until (line 627) | def generate_until(
    method _loglikelihood_tokens (line 725) | def _loglikelihood_tokens(
    method _parse_logprobs (line 787) | def _parse_logprobs(tokens: list, outputs, ctxlen: int) -> tuple[float...
    method modify_gen_kwargs (line 850) | def modify_gen_kwargs(

FILE: lm_eval/models/vllm_vlms.py
  class VLLM_VLM (line 33) | class VLLM_VLM(VLLM):
    method __init__ (line 36) | def __init__(
    method tok_batch_multimodal_encode (line 76) | def tok_batch_multimodal_encode(
    method _multimodal_model_generate (line 102) | def _multimodal_model_generate(
    method apply_chat_template (line 157) | def apply_chat_template(
    method generate_until (line 214) | def generate_until(
    method loglikelihood_rolling (line 309) | def loglikelihood_rolling(

FILE: lm_eval/models/winml.py
  class WindowsML (line 32) | class WindowsML(TemplateLM):
    method create_from_arg_obj (line 43) | def create_from_arg_obj(
    method __init__ (line 67) | def __init__(
    method _validate_dependencies (line 120) | def _validate_dependencies(self) -> None:
    method _fix_winrt_runtime (line 150) | def _fix_winrt_runtime(self):
    method _register_winml_providers_to_genai (line 164) | def _register_winml_providers_to_genai(self) -> bool:
    method _setup_winml_devices_and_providers (line 199) | def _setup_winml_devices_and_providers(self) -> None:
    method _load_and_compile_model (line 238) | def _load_and_compile_model(self, model_path: str) -> None:
    method eot_token_id (line 283) | def eot_token_id(self) -> int:
    method prefix_token_id (line 310) | def prefix_token_id(self) -> int | None:
    method max_gen_toks (line 340) | def max_gen_toks(self) -> int:
    method tok_encode (line 349) | def tok_encode(
    method tok_decode (line 375) | def tok_decode(self, tokens: list[int]) -> str:
    method _run_genai_inference_for_full_logits (line 387) | def _run_genai_inference_for_full_logits(self, input_text: str) -> np....
    method _loglikelihood_tokens (line 438) | def _loglikelihood_tokens(
    method loglikelihood (line 461) | def loglikelihood(
    method loglikelihood_rolling (line 574) | def loglikelihood_rolling(
    method generate_until (line 647) | def generate_until(
    method _run_genai_generation (line 694) | def _run_genai_generation(

FILE: lm_eval/prompts/__init__.py
  function get_prompt (line 23) | def get_prompt(prompt_id: str, dataset_name: str = None, subset_name: st...
  function load_prompt_list (line 72) | def load_prompt_list(
  class PromptString (line 115) | class PromptString:
    method __init__ (line 116) | def __init__(self, prompt_string):
    method apply (line 119) | def apply(self, doc):

FILE: lm_eval/result_schema.py
  class _TaskMetrics (line 110) | class _TaskMetrics(TypedDict, Generic[T], extra_items=T):
  class _SampleCount (line 131) | class _SampleCount(TypedDict):
  class _EvalConfig (line 141) | class _EvalConfig(TypedDict, total=False):
  class SampleResult (line 163) | class SampleResult(TypedDict, extra_items=float):

FILE: lm_eval/tasks/__init__.py
  function get_task_name_from_config (line 36) | def get_task_name_from_config(task_config: dict[str, str]) -> str:
  function get_task_name_from_object (line 50) | def get_task_name_from_object(task_object):
  function _check_duplicates (line 63) | def _check_duplicates(task_dict: dict) -> None:
  function _log_task_dict (line 98) | def _log_task_dict(task_dict: dict, task_manager: "TaskManager") -> None:
  function get_task_dict (line 137) | def get_task_dict(

FILE: lm_eval/tasks/_factory.py
  class TaskFactory (line 25) | class TaskFactory:
    method __init__ (line 32) | def __init__(self, *, meta: dict[str, Any] | None = None):
    method build (line 37) | def build(
    method _build_task (line 65) | def _build_task(self, entry: Entry, overrides: dict[str, Any] | None) ...
    method _build_group (line 85) | def _build_group(
    method _build_group_members (line 127) | def _build_group_members(
    method _build_tag (line 234) | def _build_tag(
    method _load_full_config (line 255) | def _load_full_config(
  function _ctor_accepts_config (line 283) | def _ctor_accepts_config(cls) -> bool:

FILE: lm_eval/tasks/_index.py
  class Kind (line 19) | class Kind(Enum):
  class Entry (line 28) | class Entry:
  class TaskIndex (line 36) | class TaskIndex:
    method __init__ (line 41) | def __init__(self, *, meta: dict[str, str] | None = None) -> None:
    method build (line 45) | def build(
    method _iter_yaml_files (line 82) | def _iter_yaml_files(root: Path):
    method process_cfg (line 94) | def process_cfg(
    method _register_tags (line 139) | def _register_tags(
    method _kind_of (line 154) | def _kind_of(cfg: dict) -> Kind:
    method entry_from_path (line 168) | def entry_from_path(path: Path) -> Entry | None:
    method entry_from_config (line 179) | def entry_from_config(cfg: dict[str, Any]) -> Entry | None:
    method _str_to_set (line 192) | def _str_to_set(*args) -> set[str]:

FILE: lm_eval/tasks/_yaml_loader.py
  function _mk_function_ctor (line 17) | def _mk_function_ctor(base_dir: Path, resolve: bool):
  function _make_loader (line 27) | def _make_loader(base_dir: Path, *, resolve_funcs: bool) -> type[yaml.Lo...
  function _load_module_with_cache (line 38) | def _load_module_with_cache(module_path: Path) -> Any:
  function _import_func_in_yml (line 93) | def _import_func_in_yml(qual: str, base_dir: Path):
  function _import_fun_from_str (line 130) | def _import_fun_from_str(path_str: str) -> Any:
  function load_yaml (line 164) | def load_yaml(

FILE: lm_eval/tasks/aclue/_generate_configs.py
  function parse_args (line 35) | def parse_args():

FILE: lm_eval/tasks/acpbench/gen_2shot/acp_utils.py
  class ACPBench_Visitor (line 47) | class ACPBench_Visitor(Visitor):
    method __init__ (line 48) | def __init__(self) -> None:
    method action_list (line 56) | def action_list(self, tree):
    method prog_list (line 59) | def prog_list(self, tree):
    method progression_list (line 64) | def progression_list(self, tree):
    method action_none (line 67) | def action_none(self, tree):
    method action_name (line 70) | def action_name(self, tree):
    method index (line 78) | def index(self, tree):
  class ACPGrammarParser (line 84) | class ACPGrammarParser(object):
    method __init__ (line 85) | def __init__(self, task) -> None:
    method parse (line 91) | def parse(self, input, debug=False):
  function is_on_optimal_plan (line 135) | def is_on_optimal_plan(domain, problem, action, opt):
  function is_plan (line 177) | def is_plan(domain, problem, new_plan):
  function get_action_preconditions (line 196) | def get_action_preconditions(domain, problem, action):
  function generate_optimal_plans_for_problem_state (line 207) | def generate_optimal_plans_for_problem_state(P, state, num_plans, timeout):
  function generate_top_q_plans (line 228) | def generate_top_q_plans(domain, problem, num_plans=10, quality_bound=1....
  function is_unsolvable_new_goal (line 241) | def is_unsolvable_new_goal(domain, problem, new_goal):
  function is_unsolvable (line 247) | def is_unsolvable(domain, problem):
  function extract_goal (line 274) | def extract_goal(prob):
  function entails (line 288) | def entails(state, partialstate):
  function progress (line 292) | def progress(state, act):
  function regress (line 302) | def regress(state, act):
  function get_STRIPS (line 312) | def get_STRIPS(domain, problem):
  function create_tmp_dom_prob_replace_init (line 330) | def create_tmp_dom_prob_replace_init(P, state, result_domain_file, resul...
  function fix_name (line 340) | def fix_name(s):
  function get_atoms_pddl (line 354) | def get_atoms_pddl(d, p, atoms):
  class Action (line 390) | class Action:
    method __init__ (line 391) | def __init__(self, name, pre, add, delete):
    method __str__ (line 397) | def __str__(self):
    method toJSON (line 404) | def toJSON(self):
    method __repr__ (line 416) | def __repr__(self):
    method __eq__ (line 419) | def __eq__(self, action):
    method __hash__ (line 422) | def __hash__(self):
  class STRIPS (line 426) | class STRIPS:
    method __init__ (line 427) | def __init__(self, domain, problem):
    method __str__ (line 453) | def __str__(self):
    method toJSON (line 460) | def toJSON(self):
    method operator_to_action (line 473) | def operator_to_action(self, op, check_fluents=True, check_static=False):
    method fix_pre_name (line 488) | def fix_pre_name(self, precondition):
    method action (line 493) | def action(self, name):
    method get_action_or_none (line 496) | def get_action_or_none(self, name):
    method fluent (line 501) | def fluent(self, name):
    method static_symbols (line 504) | def static_symbols(self):
    method fluent_symbols (line 507) | def fluent_symbols(self):
    method get_grounded_atoms (line 510) | def get_grounded_atoms(self, symbol):
    method get_applicable_actions (line 523) | def get_applicable_actions(self, s):
    method ground_problem (line 526) | def ground_problem(self, problem):
    method get_static (line 551) | def get_static(self):
    method PDDL_replace_init_pddl_parser (line 558) | def PDDL_replace_init_pddl_parser(self, s):
  function parse_ans (line 571) | def parse_ans(response: str, parser: ACPGrammarParser, task: str):
  function remove_garbage (line 582) | def remove_garbage(s):
  function compare_str (line 593) | def compare_str(s1, s2):
  function compare (line 597) | def compare(l1, l2):
  function check_prog_response (line 608) | def check_prog_response(resp):
  function clean_answer (line 618) | def clean_answer(resp, task):
  function get_grammar_task (line 642) | def get_grammar_task(task):
  function fix_action_name (line 666) | def fix_action_name(a):
  function str_remove_before_first_parentheses (line 671) | def str_remove_before_first_parentheses(s):
  function str_remove_after_last_parentheses (line 680) | def str_remove_after_last_parentheses(s):
  function cleanup_answer (line 691) | def cleanup_answer(ans):
  function set_equal (line 710) | def set_equal(ans1, ans2):
  class BaseEvaluator (line 714) | class BaseEvaluator(ABC):
    method __init__ (line 715) | def __init__(self) -> None:
    method get_score (line 719) | def get_score(self, ans, doc):
    method add_scores (line 722) | def add_scores(self, scores):
    method get_avg_score (line 725) | def get_avg_score(self):
  function get_evaluator (line 730) | def get_evaluator(group):
  class ActionReachabilityEvaluator (line 757) | class ActionReachabilityEvaluator(BaseEvaluator):
    method get_score (line 758) | def get_score(self, ans, doc):
  class ApplicabilityEvaluator (line 801) | class ApplicabilityEvaluator(BaseEvaluator):
    method get_score (line 802) | def get_score(self, ans, doc):
  function is_subsequence (line 817) | def is_subsequence(plan, new_plan):
  function is_subsequence_and_plan (line 828) | def is_subsequence_and_plan(domain, problem, plan, new_plan):
  class JustificationEvaluator (line 842) | class JustificationEvaluator(BaseEvaluator):
    method get_score (line 843) | def get_score(self, ans, doc):
  class LandmarksEvaluator (line 883) | class LandmarksEvaluator(BaseEvaluator):
    method get_score (line 884) | def get_score(self, ans, doc):
  class NextActionEvaluator (line 916) | class NextActionEvaluator(BaseEvaluator):
    method get_score (line 917) | def get_score(self, ans, doc):
  class ProgressionEvaluator (line 961) | class ProgressionEvaluator(BaseEvaluator):
    method get_score (line 962) | def get_score(self, ans, doc):
  class ReachabilityEvaluator (line 992) | class ReachabilityEvaluator(BaseEvaluator):
    method get_score (line 993) | def get_score(self, ans, doc):
  class ValidationEvaluator (line 1029) | class ValidationEvaluator(BaseEvaluator):
    method get_score (line 1030) | def get_score(self, ans, doc):
  function dump_item (line 1049) | def dump_item(item, **kwargs):
  function parse_prediction (line 1053) | def parse_prediction(prediction):
  class ACPGrammarFilter (line 1064) | class ACPGrammarFilter(RegexFilter):
    method __init__ (line 1067) | def __init__(self, *args, **kwargs):
    method clean_pos_neg (line 1071) | def clean_pos_neg(self, resp):
    method clean_simplified_plan (line 1082) | def clean_simplified_plan(self, resp):
    method apply (line 1091) | def apply(self, resps, docs):
  function process_acp_results (line 1107) | def process_acp_results(doc, results):
  function get_score (line 1111) | def get_score(references, predictions, **kwargs):

FILE: lm_eval/tasks/acpbench/gen_2shot_with_pddl/acp_utils.py
  class ACPBench_Visitor (line 47) | class ACPBench_Visitor(Visitor):
    method __init__ (line 48) | def __init__(self) -> None:
    method action_list (line 56) | def action_list(self, tree):
    method prog_list (line 59) | def prog_list(self, tree):
    method progression_list (line 64) | def progression_list(self, tree):
    method action_none (line 67) | def action_none(self, tree):
    method action_name (line 70) | def action_name(self, tree):
    method index (line 78) | def index(self, tree):
  class ACPGrammarParser (line 84) | class ACPGrammarParser(object):
    method __init__ (line 85) | def __init__(self, task) -> None:
    method parse (line 91) | def parse(self, input, debug=False):
  function is_on_optimal_plan (line 135) | def is_on_optimal_plan(domain, problem, action, opt):
  function is_plan (line 177) | def is_plan(domain, problem, new_plan):
  function get_action_preconditions (line 196) | def get_action_preconditions(domain, problem, action):
  function generate_optimal_plans_for_problem_state (line 207) | def generate_optimal_plans_for_problem_state(P, state, num_plans, timeout):
  function generate_top_q_plans (line 228) | def generate_top_q_plans(domain, problem, num_plans=10, quality_bound=1....
  function is_unsolvable_new_goal (line 241) | def is_unsolvable_new_goal(domain, problem, new_goal):
  function is_unsolvable (line 247) | def is_unsolvable(domain, problem):
  function extract_goal (line 274) | def extract_goal(prob):
  function entails (line 288) | def entails(state, partialstate):
  function progress (line 292) | def progress(state, act):
  function regress (line 302) | def regress(state, act):
  function get_STRIPS (line 312) | def get_STRIPS(domain, problem):
  function create_tmp_dom_prob_replace_init (line 330) | def create_tmp_dom_prob_replace_init(P, state, result_domain_file, resul...
  function fix_name (line 340) | def fix_name(s):
  function get_atoms_pddl (line 354) | def get_atoms_pddl(d, p, atoms):
  class Action (line 390) | class Action:
    method __init__ (line 391) | def __init__(self, name, pre, add, delete):
    method __str__ (line 397) | def __str__(self):
    method toJSON (line 404) | def toJSON(self):
    method __repr__ (line 416) | def __repr__(self):
    method __eq__ (line 419) | def __eq__(self, action):
    method __hash__ (line 422) | def __hash__(self):
  class STRIPS (line 426) | class STRIPS:
    method __init__ (line 427) | def __init__(self, domain, problem):
    method __str__ (line 453) | def __str__(self):
    method toJSON (line 460) | def toJSON(self):
    method operator_to_action (line 473) | def operator_to_action(self, op, check_fluents=True, check_static=False):
    method fix_pre_name (line 488) | def fix_pre_name(self, precondition):
    method action (line 493) | def action(self, name):
    method get_action_or_none (line 496) | def get_action_or_none(self, name):
    method fluent (line 501) | def fluent(self, name):
    method static_symbols (line 504) | def static_symbols(self):
    method fluent_symbols (line 507) | def fluent_symbols(self):
    method get_grounded_atoms (line 510) | def get_grounded_atoms(self, symbol):
    method get_applicable_actions (line 523) | def get_applicable_actions(self, s):
    method ground_problem (line 526) | def ground_problem(self, problem):
    method get_static (line 551) | def get_static(self):
    method PDDL_replace_init_pddl_parser (line 558) | def PDDL_replace_init_pddl_parser(self, s):
  function parse_ans (line 571) | def parse_ans(response: str, parser: ACPGrammarParser, task: str):
  function remove_garbage (line 582) | def remove_garbage(s):
  function compare_str (line 593) | def compare_str(s1, s2):
  function compare (line 597) | def compare(l1, l2):
  function check_prog_response (line 608) | def check_prog_response(resp):
  function clean_answer (line 618) | def clean_answer(resp, task):
  function get_grammar_task (line 642) | def get_grammar_task(task):
  function fix_action_name (line 666) | def fix_action_name(a):
  function str_remove_before_first_parentheses (line 671) | def str_remove_before_first_parentheses(s):
  function str_remove_after_last_parentheses (line 680) | def str_remove_after_last_parentheses(s):
  function cleanup_answer (line 691) | def cleanup_answer(ans):
  function set_equal (line 710) | def set_equal(ans1, ans2):
  class BaseEvaluator (line 714) | class BaseEvaluator(ABC):
    method __init__ (line 715) | def __init__(self) -> None:
    method get_score (line 719) | def get_score(self, ans, doc):
    method add_scores (line 722) | def add_scores(self, scores):
    method get_avg_score (line 725) | def get_avg_score(self):
  function get_evaluator (line 730) | def get_evaluator(group):
  class ActionReachabilityEvaluator (line 757) | class ActionReachabilityEvaluator(BaseEvaluator):
    method get_score (line 758) | def get_score(self, ans, doc):
  class ApplicabilityEvaluator (line 801) | class ApplicabilityEvaluator(BaseEvaluator):
    method get_score (line 802) | def get_score(self, ans, doc):
  function is_subsequence (line 817) | def is_subsequence(plan, new_plan):
  function is_subsequence_and_plan (line 828) | def is_subsequence_and_plan(domain, problem, plan, new_plan):
  class JustificationEvaluator (line 842) | class JustificationEvaluator(BaseEvaluator):
    method get_score (line 843) | def get_score(self, ans, doc):
  class LandmarksEvaluator (line 883) | class LandmarksEvaluator(BaseEvaluator):
    method get_score (line 884) | def get_score(self, ans, doc):
  class NextActionEvaluator (line 916) | class NextActionEvaluator(BaseEvaluator):
    method get_score (line 917) | def get_score(self, ans, doc):
  class ProgressionEvaluator (line 961) | class ProgressionEvaluator(BaseEvaluator):
    method get_score (line 962) | def get_score(self, ans, doc):
  class ReachabilityEvaluator (line 992) | class ReachabilityEvaluator(BaseEvaluator):
    method get_score (line 993) | def get_score(self, ans, doc):
  class ValidationEvaluator (line 1029) | class ValidationEvaluator(BaseEvaluator):
    method get_score (line 1030) | def get_score(self, ans, doc):
  function dump_item (line 1049) | def dump_item(item, **kwargs):
  function parse_prediction (line 1053) | def parse_prediction(prediction):
  class ACPGrammarFilter (line 1064) | class ACPGrammarFilter(RegexFilter):
    method __init__ (line 1067) | def __init__(self, *args, **kwargs):
    method clean_pos_neg (line 1071) | def clean_pos_neg(self, resp):
    method clean_simplified_plan (line 1082) | def clean_simplified_plan(self, resp):
    method apply (line 1091) | def apply(self, resps, docs):
  function process_acp_results (line 1107) | def process_acp_results(doc, results):
  function get_score (line 1111) | def get_score(references, predictions, **kwargs):

FILE: lm_eval/tasks/afrimgsm/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 22) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 96) | def main() -> None:

FILE: lm_eval/tasks/afrimgsm/utils.py
  function add_regex_pattern (line 75) | def add_regex_pattern(regex_pattern):
  function gen_lang_yamls (line 109) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 195) | def main() -> None:

FILE: lm_eval/tasks/afrimmlu/direct/prompt_1/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrimmlu/direct/prompt_2/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrimmlu/direct/prompt_3/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrimmlu/direct/prompt_4/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrimmlu/direct/prompt_5/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrimmlu/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function gen_lang_yamls (line 12) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 77) | def main() -> None:

FILE: lm_eval/tasks/afrimmlu/translate/prompt_1/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrimmlu/translate/prompt_2/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrimmlu/translate/prompt_3/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrimmlu/translate/prompt_4/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrimmlu/translate/prompt_5/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrimmlu/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 9) | def doc_to_text(doc):

FILE: lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py
  function doc_to_target (line 4) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/anli prompt/translate/utils.py
  function doc_to_target (line 4) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/direct/prompt_1/utils.py
  function doc_to_text (line 4) | def doc_to_text(doc):
  function doc_to_target (line 17) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/direct/prompt_2/utils.py
  function doc_to_target (line 4) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/direct/prompt_3/utils.py
  function doc_to_target (line 4) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/direct/prompt_4/utils.py
  function doc_to_text (line 4) | def doc_to_text(doc):
  function doc_to_target (line 17) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/direct/prompt_5/utils.py
  function doc_to_target (line 4) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 30) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 103) | def main() -> None:

FILE: lm_eval/tasks/afrixnli/lai prompt/direct/utils.py
  function doc_to_text (line 4) | def doc_to_text(doc):
  function doc_to_target (line 17) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/lai prompt/translate/utils.py
  function doc_to_text (line 4) | def doc_to_text(doc):
  function doc_to_target (line 17) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/translate/prompt_1/utils.py
  function doc_to_text (line 4) | def doc_to_text(doc):
  function doc_to_target (line 17) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/translate/prompt_2/utils.py
  function doc_to_target (line 4) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/translate/prompt_3/utils.py
  function doc_to_text (line 4) | def doc_to_text(doc):
  function doc_to_target (line 19) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/translate/prompt_4/utils.py
  function doc_to_text (line 4) | def doc_to_text(doc):
  function doc_to_target (line 17) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/translate/prompt_5/utils.py
  function doc_to_target (line 4) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrixnli/utils.py
  class FunctionTag (line 6) | class FunctionTag:
    method __init__ (line 7) | def __init__(self, value):
  function gen_lang_yamls (line 123) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 211) | def main() -> None:

FILE: lm_eval/tasks/afrobench/adr/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 30) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 79) | def main() -> None:

FILE: lm_eval/tasks/afrobench/afriqa/prompt_1/utils.py
  function normalize_answer (line 6) | def normalize_answer(s):
  function f1 (line 28) | def f1(items):

FILE: lm_eval/tasks/afrobench/afriqa/prompt_2/utils.py
  function normalize_answer (line 6) | def normalize_answer(s):
  function f1 (line 28) | def f1(items):

FILE: lm_eval/tasks/afrobench/afriqa/prompt_3/utils.py
  function normalize_answer (line 6) | def normalize_answer(s):
  function f1 (line 28) | def f1(items):

FILE: lm_eval/tasks/afrobench/afriqa/prompt_4/utils.py
  function normalize_answer (line 6) | def normalize_answer(s):
  function f1 (line 28) | def f1(items):

FILE: lm_eval/tasks/afrobench/afriqa/prompt_5/utils.py
  function normalize_answer (line 6) | def normalize_answer(s):
  function f1 (line 28) | def f1(items):

FILE: lm_eval/tasks/afrobench/afriqa/utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 43) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 99) | def main() -> None:

FILE: lm_eval/tasks/afrobench/afrisenti/utils.py
  class FunctionTag (line 6) | class FunctionTag:
    method __init__ (line 7) | def __init__(self, value):
  function prompt_func (line 11) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 35) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 98) | def main() -> None:

FILE: lm_eval/tasks/afrobench/belebele/utils.py
  function prompt_func (line 7) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 18) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 129) | def main() -> None:

FILE: lm_eval/tasks/afrobench/flores/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang, lang_dict):
  function gen_lang_yamls (line 33) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse:...
  function main (line 165) | def main() -> None:

FILE: lm_eval/tasks/afrobench/injongointent/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang, intent):
  function gen_lang_yamls (line 29) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 133) | def main() -> None:

FILE: lm_eval/tasks/afrobench/mafand/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang, lang_dict):
  function gen_lang_yamls (line 35) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse:...
  function main (line 110) | def main() -> None:

FILE: lm_eval/tasks/afrobench/mafand/prompt_1/african-english/utils.py
  function get_target (line 26) | def get_target(doc):
  function get_target_reverse (line 35) | def get_target_reverse(doc):
  function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc):
  function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc):
  function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc):
  function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc):
  function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc):
  function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc):

FILE: lm_eval/tasks/afrobench/mafand/prompt_1/english-african/utils.py
  function get_target (line 26) | def get_target(doc):
  function get_target_reverse (line 35) | def get_target_reverse(doc):
  function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc):
  function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc):
  function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc):
  function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc):
  function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc):
  function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc):

FILE: lm_eval/tasks/afrobench/mafand/prompt_2/african-english/utils.py
  function get_target (line 26) | def get_target(doc):
  function get_target_reverse (line 35) | def get_target_reverse(doc):
  function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc):
  function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc):
  function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc):
  function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc):
  function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc):
  function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc):

FILE: lm_eval/tasks/afrobench/mafand/prompt_2/english-african/utils.py
  function get_target (line 26) | def get_target(doc):
  function get_target_reverse (line 35) | def get_target_reverse(doc):
  function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc):
  function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc):
  function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc):
  function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc):
  function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc):
  function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc):

FILE: lm_eval/tasks/afrobench/mafand/prompt_3/african-english/utils.py
  function get_target (line 26) | def get_target(doc):
  function get_target_reverse (line 35) | def get_target_reverse(doc):
  function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc):
  function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc):
  function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc):
  function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc):
  function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc):
  function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc):

FILE: lm_eval/tasks/afrobench/mafand/prompt_3/english-african/utils.py
  function get_target (line 26) | def get_target(doc):
  function get_target_reverse (line 35) | def get_target_reverse(doc):
  function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc):
  function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc):
  function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc):
  function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc):
  function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc):
  function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc):

FILE: lm_eval/tasks/afrobench/masakhaner/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 48) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 112) | def main() -> None:

FILE: lm_eval/tasks/afrobench/masakhaner/prompt_1/utils.py
  function doc_to_target (line 7) | def doc_to_target(doc):
  function transform_text (line 11) | def transform_text(text):
  function span_f1_agg (line 42) | def span_f1_agg(items):

FILE: lm_eval/tasks/afrobench/masakhaner/prompt_2/utils.py
  function doc_to_target (line 7) | def doc_to_target(doc):
  function transform_text (line 11) | def transform_text(text):
  function span_f1_agg (line 42) | def span_f1_agg(items):

FILE: lm_eval/tasks/afrobench/masakhaner/prompt_3/utils.py
  function doc_to_target (line 7) | def doc_to_target(doc):
  function transform_text (line 11) | def transform_text(text):
  function span_f1_agg (line 42) | def span_f1_agg(items):

FILE: lm_eval/tasks/afrobench/masakhaner/prompt_4/utils.py
  function doc_to_target (line 7) | def doc_to_target(doc):
  function transform_text (line 11) | def transform_text(text):
  function span_f1_agg (line 42) | def span_f1_agg(items):

FILE: lm_eval/tasks/afrobench/masakhaner/prompt_5/utils.py
  function doc_to_target (line 7) | def doc_to_target(doc):
  function transform_text (line 11) | def transform_text(text):
  function span_f1_agg (line 42) | def span_f1_agg(items):

FILE: lm_eval/tasks/afrobench/masakhanews/utils.py
  function prompt_func (line 7) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 35) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 97) | def main() -> None:

FILE: lm_eval/tasks/afrobench/masakhapos/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 61) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 125) | def main() -> None:

FILE: lm_eval/tasks/afrobench/masakhapos/prompt_1/utils.py
  function doc_to_target (line 8) | def doc_to_target(doc):
  function acc_score (line 32) | def acc_score(items):

FILE: lm_eval/tasks/afrobench/masakhapos/prompt_2/utils.py
  function doc_to_target (line 8) | def doc_to_target(doc):
  function acc_score (line 32) | def acc_score(items):

FILE: lm_eval/tasks/afrobench/masakhapos/prompt_3/utils.py
  function doc_to_target (line 8) | def doc_to_target(doc):
  function acc_score (line 32) | def acc_score(items):

FILE: lm_eval/tasks/afrobench/masakhapos/prompt_4/utils.py
  function doc_to_target (line 8) | def doc_to_target(doc):
  function acc_score (line 32) | def acc_score(items):

FILE: lm_eval/tasks/afrobench/masakhapos/prompt_5/utils.py
  function doc_to_target (line 8) | def doc_to_target(doc):
  function acc_score (line 32) | def acc_score(items):

FILE: lm_eval/tasks/afrobench/masakhapos/utils.py
  function doc_to_text (line 4) | def doc_to_text(doc):
  function doc_to_target (line 19) | def doc_to_target(doc):

FILE: lm_eval/tasks/afrobench/naijarc/utils.py
  function prompt_func (line 7) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 18) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 67) | def main() -> None:

FILE: lm_eval/tasks/afrobench/ntrex/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang, lang_dict):
  function gen_lang_yamls (line 33) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse:...
  function main (line 134) | def main() -> None:

FILE: lm_eval/tasks/afrobench/openai_mmlu/utils.py
  function prompt_func (line 7) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 18) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 73) | def main() -> None:

FILE: lm_eval/tasks/afrobench/salt/gen_utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang, lang_dict):
  function gen_lang_yamls (line 34) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse:...
  function main (line 112) | def main() -> None:

FILE: lm_eval/tasks/afrobench/sib/utils.py
  class FunctionTag (line 7) | class FunctionTag:
    method __init__ (line 8) | def __init__(self, value):
  function prompt_func (line 12) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 40) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 201) | def main() -> None:

FILE: lm_eval/tasks/afrobench/uhura-arc-easy/utils.py
  function get_language_from_code (line 8) | def get_language_from_code(code: str) -> str:
  function prompt_func (line 13) | def prompt_func(mode):
  function gen_lang_yamls (line 51) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 99) | def main() -> None:

FILE: lm_eval/tasks/afrobench/xlsum/prompt_1/utils.py
  function rougeL (line 4) | def rougeL(items):
  function rougeL_agg (line 11) | def rougeL_agg(items):

FILE: lm_eval/tasks/afrobench/xlsum/prompt_2/utils.py
  function rougeL (line 4) | def rougeL(items):
  function rougeL_agg (line 11) | def rougeL_agg(items):

FILE: lm_eval/tasks/afrobench/xlsum/prompt_3/utils.py
  function rougeL (line 4) | def rougeL(items):
  function rougeL_agg (line 11) | def rougeL_agg(items):

FILE: lm_eval/tasks/afrobench/xlsum/utils.py
  function prompt_func (line 7) | def prompt_func(mode, lang):
  function gen_lang_yamls (line 29) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 88) | def main() -> None:

FILE: lm_eval/tasks/agieval/utils.py
  function parse_math_answer (line 10) | def parse_math_answer(raw_string):
  function _fix_fracs (line 82) | def _fix_fracs(string):
  function _fix_a_slash_b (line 114) | def _fix_a_slash_b(string):
  function _remove_right_units (line 129) | def _remove_right_units(string):
  function _fix_sqrt (line 139) | def _fix_sqrt(string):
  function _strip_string (line 154) | def _strip_string(string):
  function is_equiv (line 224) | def is_equiv(str1, str2, verbose=False):
  function process_results (line 243) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
  function process_results_mcqa (line 262) | def process_results_mcqa(doc, results):

FILE: lm_eval/tasks/aime/utils.py
  function process_results (line 5) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
  function is_equiv (line 36) | def is_equiv(str1, str2, verbose=False):
  function remove_boxed (line 53) | def remove_boxed(s):
  function last_boxed_only_string (line 67) | def last_boxed_only_string(string):
  function fix_fracs (line 97) | def fix_fracs(string):
  function fix_a_slash_b (line 129) | def fix_a_slash_b(string):
  function remove_right_units (line 144) | def remove_right_units(string):
  function fix_sqrt (line 154) | def fix_sqrt(string):
  function strip_string (line 169) | def strip_string(string):

FILE: lm_eval/tasks/arab_culture/_generate_configs.py
  function parse_args (line 34) | def parse_args():

FILE: lm_eval/tasks/arab_culture/utils_mcq.py
  function doc_to_text (line 49) | def doc_to_text(doc):
  function doc_to_choice (line 101) | def doc_to_choice(doc):
  function doc_to_target (line 105) | def doc_to_target(doc):

FILE: lm_eval/tasks/arab_culture_completion/_generate_configs.py
  function parse_args (line 34) | def parse_args():

FILE: lm_eval/tasks/arab_culture_completion/utils_completion.py
  function doc_to_text (line 52) | def doc_to_text(doc):
  function doc_to_choice (line 91) | def doc_to_choice(doc):
  function doc_to_target (line 96) | def doc_to_target(doc):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/utils.py
  function process_docs (line 15) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/utils.py
  function process_docs (line 15) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/utils.py
  function process_docs (line 7) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/utils.py
  function doc_to_text (line 7) | def doc_to_text(doc):
  function process_docs (line 24) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/utils.py
  function process_docs (line 15) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/utils.py
  function process_docs (line 15) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/utils.py
  function process_docs (line 7) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/utils.py
  function doc_to_text (line 7) | def doc_to_text(doc):
  function process_docs (line 24) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/utils.py
  function process_docs (line 5) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/arabicmmlu/_generate_configs.py
  function parse_args (line 60) | def parse_args():

FILE: lm_eval/tasks/arabicmmlu/utils.py
  function doc_to_text (line 14) | def doc_to_text(doc):
  function doc_to_choice (line 43) | def doc_to_choice(doc):

FILE: lm_eval/tasks/aradice/ArabicMMLU/EGY/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/ArabicMMLU/EGY/utils.py
  function process_docs (line 51) | def process_docs(dataset):

FILE: lm_eval/tasks/aradice/ArabicMMLU/LEV/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/ArabicMMLU/LEV/utils.py
  function process_docs (line 50) | def process_docs(dataset):

FILE: lm_eval/tasks/aradice/boolq/EGY/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/boolq/EGY/utils.py
  function process_docs (line 4) | def process_docs(dataset):

FILE: lm_eval/tasks/aradice/boolq/ENG/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/boolq/ENG/utils.py
  function process_docs (line 4) | def process_docs(dataset):

FILE: lm_eval/tasks/aradice/boolq/LEV/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/boolq/LEV/utils.py
  function process_docs (line 4) | def process_docs(dataset):

FILE: lm_eval/tasks/aradice/boolq/MSA/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/boolq/MSA/utils.py
  function process_docs (line 4) | def process_docs(dataset):

FILE: lm_eval/tasks/aradice/cultural-benchmark/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/cultural-benchmark/utils.py
  function process_docs (line 1) | def process_docs(dataset):

FILE: lm_eval/tasks/aradice/openbookqa/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/openbookqa/utils.py
  function doc_to_target (line 1) | def doc_to_target(doc):
  function doc_to_choice (line 12) | def doc_to_choice(doc):
  function doc_to_text (line 17) | def doc_to_text(doc):

FILE: lm_eval/tasks/aradice/piqa/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/truthfulqa_mcq/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/winogrande/metrics.py
  function macro_f1_score (line 4) | def macro_f1_score(items):
  function micro_f1_score (line 12) | def micro_f1_score(items):
  function weighted_f1_score (line 20) | def weighted_f1_score(items):

FILE: lm_eval/tasks/aradice/winogrande/utils.py
  function doc_to_text (line 1) | def doc_to_text(doc):
  function doc_to_target (line 6) | def doc_to_target(doc):
  function doc_to_choice (line 11) | def doc_to_choice(doc):

FILE: lm_eval/tasks/babilong/common_utils.py
  function get_tokenizer (line 18) | def get_tokenizer(
  function postprocess_pred (line 27) | def postprocess_pred(prediction: list[str]) -> list[str]:
  function load_dataset (line 40) | def load_dataset(**kwargs):
  function process_results (line 55) | def process_results(doc: dict, results: list[str]) -> dict[str, float]:

FILE: lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py
  function doc_to_text (line 257) | def doc_to_text(src: str, tgt: str) -> str:
  function doc_to_target (line 265) | def doc_to_target(tgt: str) -> str:
  function gen_lang_yamls (line 272) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
  function main (line 316) | def main() -> None:

FILE: lm_eval/tasks/basque_bench/utils.py
  function xcopa_doc_to_text (line 6) | def xcopa_doc_to_text(doc):
  function xcopa_doc_to_choice (line 11) | def xcopa_doc_to_choice(doc):
  function paws_process_docs (line 21) | def paws_process_docs(dataset):

FILE: lm_eval/tasks/basqueglue/utils.py
  function general_detokenize (line 7) | def general_detokenize(string):
  function process_doc (line 16) | def process_doc(string):
  function process_wic_docs (line 22) | def process_wic_docs(dataset):
  function coref_doc_to_text (line 36) | def coref_doc_to_text(x):
  function micro_f1_score (line 62) | def micro_f1_score(items):
  function vaxx_f1_score (line 71) | def vaxx_f1_score(items):

FILE: lm_eval/tasks/bbh/_generate_configs.py
  function parse_args (line 15) | def parse_args():

FILE: lm_eval/tasks/bbh/cot_zeroshot/utils.py
  class ExtendedRegexFilter (line 9) | class ExtendedRegexFilter(RegexFilter):
    method __init__ (line 14) | def __init__(
    method filter_ignores (line 28) | def filter_ignores(self, st):
    method find_match (line 41) | def find_match(self, regex, resp, convert_dict={}):
  class MapRegexFilter (line 53) | class MapRegexFilter(ExtendedRegexFilter):
    method __init__ (line 54) | def __init__(
    method apply (line 82) | def apply(self, resps, docs):
  class NumberParseRegexFilter (line 109) | class NumberParseRegexFilter(ExtendedRegexFilter):
    method apply (line 110) | def apply(self, resps, docs):
  class WordSortFilter (line 140) | class WordSortFilter(Filter):
    method apply (line 143) | def apply(self, resps, docs):
  class MultiChoiceRegexFilter (line 162) | class MultiChoiceRegexFilter(ExtendedRegexFilter):
    method __init__ (line 163) | def __init__(self, *args, **kwargs):
    method apply (line 175) | def apply(self, resps, docs):

FILE: lm_eval/tasks/bbh/zeroshot/utils.py
  class ExtendedRegexFilter (line 9) | class ExtendedRegexFilter(RegexFilter):
    method __init__ (line 14) | def __init__(
    method filter_ignores (line 28) | def filter_ignores(self, st):
    method find_match (line 41) | def find_match(self, regex, resp, convert_dict={}):
  class MapRegexFilter (line 53) | class MapRegexFilter(ExtendedRegexFilter):
    method __init__ (line 54) | def __init__(
    method apply (line 82) | def apply(self, resps, docs):
  class NumberParseRegexFilter (line 109) | class NumberParseRegexFilter(ExtendedRegexFilter):
    method apply (line 110) | def apply(self, resps, docs):
  class WordSortFilter (line 140) | class WordSortFilter(Filter):
    method apply (line 143) | def apply(self, resps, docs):
  class MultiChoiceRegexFilter (line 162) | class MultiChoiceRegexFilter(ExtendedRegexFilter):
    method __init__ (line 163) | def __init__(self, *args, **kwargs):
    method apply (line 175) | def apply(self, resps, docs):

FILE: lm_eval/tasks/bbq/utils.py
  function agg_accuracy_amb (line 33) | def agg_accuracy_amb(arr):
  function agg_accuracy_disamb (line 42) | def agg_accuracy_disamb(arr):
  function agg_disamb_bias_scores (line 51) | def agg_disamb_bias_scores(arr):
  function agg_amb_bias_scores (line 84) | def agg_amb_bias_scores(arr):
  function _process_results (line 110) | def _process_results(doc, answer: int):
  function _clean_answer (line 193) | def _clean_answer(answer: str):
  function _check_unk_answer (line 204) | def _check_unk_answer(answer: str):
  function process_results_generate_until (line 212) | def process_results_generate_until(doc, results):
  function process_results_multiple_choice (line 242) | def process_results_multiple_choice(doc, results):
  function doc_to_biased_answer (line 255) | def doc_to_biased_answer(doc):
  function _process_groups_in_answers (line 265) | def _process_groups_in_answers(string):
  function process_docs (line 300) | def process_docs(dataset: datasets.Dataset):
  function filter_dataset_context (line 362) | def filter_dataset_context(dataset: datasets.Dataset, context: str) -> d...
  function process_docs_ambig (line 368) | def process_docs_ambig(dataset: datasets.Dataset):
  function process_docs_disambig (line 372) | def process_docs_disambig(dataset: datasets.Dataset):
  function doc_to_choice (line 376) | def doc_to_choice(doc):
  function _doc_to_choice_groups (line 385) | def _doc_to_choice_groups(doc):
  function doc_to_targets (line 397) | def doc_to_targets(doc):
  function doc_to_target (line 412) | def doc_to_target(doc):
  function filter_dataset (line 417) | def filter_dataset(dataset: datasets.Dataset, bias_type: str) -> dataset...
  function filter_race_color (line 421) | def filter_race_color(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/belebele/_generate_configs.py
  function parse_args (line 18) | def parse_args():
  function query (line 41) | def query():

FILE: lm_eval/tasks/bigbench/generate_tasks.py
  function main (line 183) | def main() -> None:

FILE: lm_eval/tasks/blimp/generate_configs.py
  function main (line 75) | def main() -> None:

FILE: lm_eval/tasks/c4/preprocess_c4.py
  function c4_detokenizer (line 4) | def c4_detokenizer(doc):
  function process_results (line 39) | def process_results(doc, results):

FILE: lm_eval/tasks/cabbq/utils.py
  function _model_answer (line 6) | def _model_answer(lls):
  function _model_answer_type (line 25) | def _model_answer_type(doc, model_answer):
  function process_results (line 75) | def process_results(doc, results):
  function acc_ambig_agg (line 137) | def acc_ambig_agg(results):
  function acc_disambig_agg (line 159) | def acc_disambig_agg(results):
  function bias_score_ambig_agg (line 181) | def bias_score_ambig_agg(results):
  function bias_score_disambig_agg (line 212) | def bias_score_disambig_agg(results):

FILE: lm_eval/tasks/careqa/utils.py
  function doc_to_text (line 1) | def doc_to_text(doc) -> str:
  function doc_to_target (line 39) | def doc_to_target(doc) -> int:

FILE: lm_eval/tasks/careqa/utils_open.py
  function doc_eval (line 22) | def doc_eval(pred, refs):
  function doc_to_text (line 65) | def doc_to_text(doc) -> str:
  function doc_to_target (line 69) | def doc_to_target(doc) -> str:
  function process_results_gen (line 73) | def process_results_gen(doc, results):
  function process_results_gen_w_repeats (line 98) | def process_results_gen_w_repeats(doc, results):

FILE: lm_eval/tasks/careqa/utils_perplexity.py
  function doc_to_target (line 5) | def doc_to_target(doc) -> str:
  function process_results (line 9) | def process_results(doc, results):

FILE: lm_eval/tasks/catalan_bench/flores_ca/create_yamls_flores_ca.py
  function code_to_language_name (line 246) | def code_to_language_name(code):
  function code_to_short_name (line 250) | def code_to_short_name(code):
  function jinja_var (line 254) | def jinja_var(s):
  function doc_to_text (line 258) | def doc_to_text(src: str, tgt: str) -> str:
  function doc_to_target (line 266) | def doc_to_target(tgt: str) -> str:
  function gen_lang_yamls (line 273) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
  function main (line 317) | def main() -> None:

FILE: lm_eval/tasks/catalan_bench/truthfulqa_va/utils.py
  function lowercase_first_letter (line 14) | def lowercase_first_letter(text):
  function process_summarization (line 18) | def process_summarization(dataset):
  function process_docs_paraphrases (line 28) | def process_docs_paraphrases(dataset):
  function process_docs_paws (line 56) | def process_docs_paws(dataset):
  function rouge1 (line 84) | def rouge1(items):
  function rouge1_agg (line 91) | def rouge1_agg(items):
  function process_results_mc2 (line 102) | def process_results_mc2(doc, results):
  function process_docs_gen (line 115) | def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset:
  function preprocess_function_gen (line 119) | def preprocess_function_gen(examples):
  function process_doc_nli (line 143) | def process_doc_nli(dataset):
  function process_results_gen (line 170) | def process_results_gen(doc, results):
  function bleu (line 241) | def bleu(refs, preds):
  function rouge (line 263) | def rouge(refs, preds):

FILE: lm_eval/tasks/catalan_bench/utils.py
  function lowercase_first_letter (line 10) | def lowercase_first_letter(text):
  function process_doc_nli (line 14) | def process_doc_nli(dataset):
  function process_results_coqcat (line 38) | def process_results_coqcat(doc, results):
  function process_results_qa (line 72) | def process_results_qa(doc, results):
  function process_doc_cabreu (line 81) | def process_doc_cabreu(dataset):
  function process_docs_paraphrases (line 96) | def process_docs_paraphrases(dataset):
  function process_docs_copa_ca (line 119) | def process_docs_copa_ca(dataset):
  function rouge1 (line 128) | def rouge1(items):
  function rouge1_agg (line 135) | def rouge1_agg(items):

FILE: lm_eval/tasks/ceval/_generate_configs.py
  function parse_args (line 72) | def parse_args():

FILE: lm_eval/tasks/chartqa/utils.py
  function _normalize_string (line 6) | def _normalize_string(s):
  function _remove_end_punctuation (line 14) | def _remove_end_punctuation(unnormalized_string: str) -> str:
  class RelaxedCorrectness (line 27) | class RelaxedCorrectness:
    method _relaxed_correctness (line 39) | def _relaxed_correctness(
    method score (line 132) | def score(self, model_answer: str, reference_answer: str | list[str]) ...
  class ExplicitPromptRelaxedCorrectness (line 141) | class ExplicitPromptRelaxedCorrectness(RelaxedCorrectness):
    method name (line 145) | def name(self) -> str:
    method _get_final_answer (line 148) | def _get_final_answer(self, generation: str) -> str:
    method score (line 174) | def score(self, model_answer: str, reference_answer: str | list[str]) ...
  class AnywhereInAnswerRelaxedCorrectness (line 182) | class AnywhereInAnswerRelaxedCorrectness(ExplicitPromptRelaxedCorrectness):
    method name (line 189) | def name(self) -> str:
    method score (line 192) | def score(self, model_answer: str, reference_answer: str | list[str]) ...
  function exact_match (line 242) | def exact_match(references, predictions):
  function relaxed_accuracy (line 257) | def relaxed_accuracy(references, predictions):
  function anywhere_accuracy (line 268) | def anywhere_accuracy(references, predictions):

FILE: lm_eval/tasks/click/click_cul/utils.py
  function get_context (line 6) | def get_context(doc) -> str:
  function get_target (line 18) | def get_target(doc) -> str:
  function get_choices (line 25) | def get_choices(doc) -> List[str]:
  function extract_economy (line 31) | def extract_economy(dataset: Dataset) -> Dataset:
  function extract_geography (line 35) | def extract_geography(dataset: Dataset) -> Dataset:
  function extract_history (line 39) | def extract_history(dataset: Dataset) -> Dataset:
  function extract_law (line 45) | def extract_law(dataset: Dataset) -> Dataset:
  function extract_politics (line 51) | def extract_politics(dataset: Dataset) -> Dataset:
  function extract_kpop (line 55) | def extract_kpop(dataset: Dataset) -> Dataset:
  function extract_society (line 59) | def extract_society(dataset: Dataset) -> Dataset:
  function extract_tradition (line 63) | def extract_tradition(dataset: Dataset) -> Dataset:

FILE: lm_eval/tasks/click/click_lang/utils.py
  function get_context (line 6) | def get_context(doc) -> str:
  function get_target (line 18) | def get_target(doc) -> str:
  function get_choices (line 25) | def get_choices(doc) -> List[str]:
  function extract_text (line 31) | def extract_text(dataset: Dataset) -> Dataset:
  function extract_grammar (line 41) | def extract_grammar(dataset: Dataset) -> Dataset:
  function extract_function (line 65) | def extract_function(dataset: Dataset) -> Dataset:

FILE: lm_eval/tasks/cmmlu/_generate_configs.py
  function parse_args (line 87) | def parse_args():

FILE: lm_eval/tasks/cnn_dailymail/utils.py
  function normalize_text (line 27) | def normalize_text(text: str) -> str:
  function calculate_rouge_scores (line 44) | def calculate_rouge_scores(
  function calculate_bertscore (line 82) | def calculate_bertscore(
  function process_results (line 124) | def process_results(doc: Dict[str, Any], results: List[str]) -> Dict[str...
  function postprocess_generation (line 186) | def postprocess_generation(generation: str) -> str:
  function filter_long_articles (line 208) | def filter_long_articles(doc: Dict[str, Any]) -> bool:
  function doc_to_choice (line 224) | def doc_to_choice(doc: Dict[str, Any]) -> List[str]:
  function process_docs (line 237) | def process_docs(dataset):
  function calculate_summary_length (line 269) | def calculate_summary_length(generated: str) -> int:

FILE: lm_eval/tasks/code_x_glue/code-text/bleu.py
  function normalize (line 58) | def normalize(s):
  function count_ngrams (line 78) | def count_ngrams(words, n=4):
  function cook_refs (line 87) | def cook_refs(refs, n=4):
  function cook_test (line 101) | def cook_test(test, item, n=4):
  function score_cooked (line 132) | def score_cooked(allcomps, n=4, ground=0, smooth=1):
  function bleu (line 174) | def bleu(refs, candidate, ground=0, smooth=1):
  function splitPuncts (line 180) | def splitPuncts(line):
  function computeMaps (line 184) | def computeMaps(predictions, goldfile):
  function bleuFromMaps (line 210) | def bleuFromMaps(m1, m2):
  function smoothed_bleu_4 (line 222) | def smoothed_bleu_4(references, predictions, **kwargs):

FILE: lm_eval/tasks/code_x_glue/code-text/utils.py
  function doc_to_text (line 1) | def doc_to_text(doc):
  function doc_to_target (line 8) | def doc_to_target(doc):

FILE: lm_eval/tasks/common_voice/utils.py
  function doc_to_text (line 10) | def doc_to_text(doc: Dict[str, Any]) -> str:
  function doc_to_audio (line 14) | def doc_to_audio(doc: Dict[str, Any]) -> List[dict]:

FILE: lm_eval/tasks/copal_id/utils.py
  function convert_choice (line 4) | def convert_choice(choice):
  function doc_to_text (line 8) | def doc_to_text(doc, connector):
  function doc_to_choice (line 13) | def doc_to_choice(doc):

FILE: lm_eval/tasks/coqa/utils.py
  function doc_to_text (line 6) | def doc_to_text(doc):
  function doc_to_target (line 19) | def doc_to_target(doc):
  function em (line 37) | def em(gold_list, pred):
  function compute_scores (line 51) | def compute_scores(gold_list, pred):
  function process_results (line 72) | def process_results(doc, results):

FILE: lm_eval/tasks/crows_pairs/utils.py
  function process_results (line 4) | def process_results(doc, results):
  function doc_to_choice (line 19) | def doc_to_choice(doc):
  function filter_dataset (line 23) | def filter_dataset(dataset: datasets.Dataset, bias_type: str) -> dataset...
  function filter_race_color (line 27) | def filter_race_color(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_socio (line 31) | def filter_socio(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_gender (line 35) | def filter_gender(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_age (line 39) | def filter_age(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_religion (line 43) | def filter_religion(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_disability (line 47) | def filter_disability(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_orientation (line 51) | def filter_orientation(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_nationality (line 55) | def filter_nationality(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_appearance (line 59) | def filter_appearance(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_autre (line 63) | def filter_autre(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/csatqa/_generate_configs.py
  function parse_args (line 19) | def parse_args():

FILE: lm_eval/tasks/csatqa/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/darija_bench/darija_sentiment/utils.py
  function doc_to_text (line 9) | def doc_to_text(doc):
  function doc_to_choice_3 (line 21) | def doc_to_choice_3(doc):
  function doc_to_choice_2 (line 25) | def doc_to_choice_2(doc):
  function doc_to_target (line 29) | def doc_to_target(doc):

FILE: lm_eval/tasks/darija_bench/darija_summarization/utils.py
  function strip (line 5) | def strip(resps, docs):
  function doc_to_text (line 12) | def doc_to_text(doc):
  function doc_to_target (line 19) | def doc_to_target(doc):
  function bert (line 23) | def bert(items):
  function Average (line 27) | def Average(lst):
  function darijabert (line 31) | def darijabert(items):
  function rouge1 (line 44) | def rouge1(items):
  function rougeL (line 48) | def rougeL(items):
  function rouge2 (line 52) | def rouge2(items):
  function rougeLsum (line 56) | def rougeLsum(items):
  function agg_rougelsum (line 60) | def agg_rougelsum(items):
  function agg_rouge1 (line 66) | def agg_rouge1(items):
  function agg_rouge2 (line 72) | def agg_rouge2(items):
  function agg_rougel (line 78) | def agg_rougel(items):

FILE: lm_eval/tasks/darija_bench/darija_translation/utils.py
  function strip (line 5) | def strip(resps, docs):
  function dr_fr (line 12) | def dr_fr(dataset: datasets.Dataset):
  function dr_en (line 16) | def dr_en(dataset: datasets.Dataset):
  function dr_msa (line 20) | def dr_msa(dataset: datasets.Dataset):
  function fr_dr (line 24) | def fr_dr(dataset: datasets.Dataset):
  function en_dr (line 28) | def en_dr(dataset: datasets.Dataset):
  function msa_dr (line 32) | def msa_dr(dataset: datasets.Dataset):
  function doc_to_text (line 46) | def doc_to_text(doc):
  function doc_to_target (line 51) | def doc_to_target(doc):
  function bert (line 55) | def bert(items):
  function Average (line 59) | def Average(lst):
  function camembert (line 63) | def camembert(items):
  function darijabert (line 76) | def darijabert(items):
  function arabert (line 89) | def arabert(items):
  function bertbase (line 102) | def bertbase(items):
  function mbert (line 115) | def mbert(items):

FILE: lm_eval/tasks/darija_bench/darija_transliteration/utils.py
  function strip (line 5) | def strip(resps, docs):
  function dr_ar (line 12) | def dr_ar(dataset: datasets.Dataset):
  function ar_dr (line 16) | def ar_dr(dataset: datasets.Dataset):
  function doc_to_text (line 20) | def doc_to_text(doc):
  function doc_to_target (line 25) | def doc_to_target(doc):
  function bert (line 29) | def bert(items):
  function Average (line 33) | def Average(lst):
  function arabizibert (line 37) | def arabizibert(items):
  function darijabert (line 50) | def darijabert(items):
  function mbert (line 63) | def mbert(items):

FILE: lm_eval/tasks/darijahellaswag/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/darijammlu/_generate_configs.py
  function parse_args (line 73) | def parse_args():

FILE: lm_eval/tasks/darijammlu/utils.py
  function doc_to_text (line 7) | def doc_to_text(doc):
  function doc_to_choice (line 24) | def doc_to_choice(doc):

FILE: lm_eval/tasks/discrim_eval/utils.py
  function _logit (line 8) | def _logit(p: float) -> float:
  function process_results (line 30) | def process_results(
  function agg_demographic_bias_regression (line 63) | def agg_demographic_bias_regression(items: List[BiasTuple]) -> float:

FILE: lm_eval/tasks/drop/utils.py
  function process_docs (line 10) | def process_docs(dataset):
  function get_answers (line 22) | def get_answers(doc):
  function parse_answer (line 51) | def parse_answer(answer):
  function process_results (line 64) | def process_results(doc, results):
  function get_metrics (line 76) | def get_metrics(predicted, gold):
  function _answer_to_bags (line 100) | def _answer_to_bags(answer):
  function _align_bags (line 114) | def _align_bags(predicted, gold):
  function _compute_f1 (line 134) | def _compute_f1(predicted_bag, gold_bag):
  function _match_numbers_if_present (line 152) | def _match_numbers_if_present(gold_bag, predicted_bag):
  function _is_number (line 166) | def _is_number(text):
  function _remove_articles (line 174) | def _remove_articles(text):
  function _white_space_fix (line 178) | def _white_space_fix(text):
  function _remove_punc (line 182) | def _remove_punc(text):
  function _fix_number (line 190) | def _fix_number(text):
  function _tokenize (line 194) | def _tokenize(text):
  function _normalize (line 198) | def _normalize(answer):

FILE: lm_eval/tasks/e2lmc/mmlu_early_training/custom_metrics.py
  function loglikelihood_diff (line 4) | def loglikelihood_diff(items):

FILE: lm_eval/tasks/e2lmc/noor/_generate_configs.py
  function parse_args (line 78) | def parse_args():

FILE: lm_eval/tasks/egyhellaswag/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/egymmlu/_generate_configs.py
  function parse_args (line 74) | def parse_args():

FILE: lm_eval/tasks/egymmlu/utils.py
  function doc_to_text (line 7) | def doc_to_text(doc):
  function doc_to_choice (line 24) | def doc_to_choice(doc):

FILE: lm_eval/tasks/eq_bench/multilingual/utils.py
  function calculate_score_fullscale (line 6) | def calculate_score_fullscale(docs, results):

FILE: lm_eval/tasks/eq_bench/utils.py
  function calculate_score_fullscale (line 6) | def calculate_score_fullscale(docs, results):

FILE: lm_eval/tasks/esbbq/utils.py
  function _model_answer (line 6) | def _model_answer(lls):
  function _model_answer_type (line 25) | def _model_answer_type(doc, model_answer):
  function process_results (line 75) | def process_results(doc, results):
  function acc_ambig_agg (line 137) | def acc_ambig_agg(results):
  function acc_disambig_agg (line 159) | def acc_disambig_agg(results):
  function bias_score_ambig_agg (line 181) | def bias_score_ambig_agg(results):
  function bias_score_disambig_agg (line 212) | def bias_score_disambig_agg(results):

FILE: lm_eval/tasks/eus_exams/configs.py
  function gen_config_yamls (line 16) | def gen_config_yamls(output_dir: str, overwrite: bool) -> None:
  function main (line 49) | def main() -> None:

FILE: lm_eval/tasks/eus_exams/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset):

FILE: lm_eval/tasks/eus_reading/utils.py
  function doc_to_text_context (line 7) | def doc_to_text_context(doc) -> str:
  function doc_to_choice (line 28) | def doc_to_choice(doc) -> List[str]:

FILE: lm_eval/tasks/eus_trivia/utils.py
  function doc_to_text (line 7) | def doc_to_text(doc) -> str:
  function doc_to_choice (line 28) | def doc_to_choice(doc) -> List[str]:

FILE: lm_eval/tasks/evalita_llm/metrics.py
  function _aggreg_ls (line 10) | def _aggreg_ls(predictions):
  function _aggreg_sa_v2 (line 37) | def _aggreg_sa_v2(predictions):
  function _aggreg_sa (line 49) | def _aggreg_sa(predictions):
  function _aggreg_ner (line 124) | def _aggreg_ner(predictions):
  function _aggreg_rel (line 143) | def _aggreg_rel(predictions):
  function _aggreg_dd (line 160) | def _aggreg_dd(items):

FILE: lm_eval/tasks/evalita_llm/sum_utils.py
  function rouge1_score (line 7) | def rouge1_score(references, predictions, **kwargs):
  function process_results_sum (line 16) | def process_results_sum(doc, results):

FILE: lm_eval/tasks/evalita_llm/utils.py
  function sa_doc_to_target (line 11) | def sa_doc_to_target(x):
  function sa_doc_to_target_v2 (line 30) | def sa_doc_to_target_v2(x):
  function sa_doc_to_choice (line 49) | def sa_doc_to_choice(x):
  function _ls_gold_to_target (line 60) | def _ls_gold_to_target(x):
  function ls_doc_to_target (line 77) | def ls_doc_to_target(x):
  function _ls_split_gold (line 91) | def _ls_split_gold(x):
  function ls_process_results (line 112) | def ls_process_results(doc, results):
  function _ner_gold_to_target (line 163) | def _ner_gold_to_target(x: list) -> list:
  function _ner_gold_to_target_v2 (line 171) | def _ner_gold_to_target_v2(x: list) -> list:
  function ner_doc_to_target (line 179) | def ner_doc_to_target(doc):
  function ner_process_results (line 193) | def ner_process_results(doc, results):
  function ner_process_results_v2 (line 246) | def ner_process_results_v2(doc, results):
  function _ner_process_raw_output (line 313) | def _ner_process_raw_output(llm_result: str) -> list[tuple]:
  function _ner_process_raw_output_v2 (line 337) | def _ner_process_raw_output_v2(llm_result: str) -> list[tuple]:
  function _rel_process_raw_output (line 364) | def _rel_process_raw_output(llm_result: str) -> list[str]:
  function re_doc_to_target (line 391) | def re_doc_to_target(doc):
  function _rel_gold_to_target (line 403) | def _rel_gold_to_target(x: list) -> list:
  function rel_doc_to_target (line 410) | def rel_doc_to_target(doc):
  function _extract_relations (line 422) | def _extract_relations(results):
  function rel_process_results_v3 (line 439) | def rel_process_results_v3(doc, results):
  function split_text_with_regex (line 498) | def split_text_with_regex(text, pattern):
  function faq_doc_to_target (line 526) | def faq_doc_to_target(x):
  function ht_doc_to_target (line 541) | def ht_doc_to_target(x):

FILE: lm_eval/tasks/fda/task.py
  class FDA (line 10) | class FDA(ConfigurableTask):
    method __init__ (line 15) | def __init__(self, **kwargs):
    method has_training_docs (line 18) | def has_training_docs(self):
    method has_validation_docs (line 21) | def has_validation_docs(self):
    method has_test_docs (line 24) | def has_test_docs(self):
    method validation_docs (line 27) | def validation_docs(self):
    method doc_to_text (line 30) | def doc_to_text(self, doc):
    method doc_to_target (line 33) | def doc_to_target(self, doc):
    method construct_requests (line 36) | def construct_requests(
    method process_results (line 60) | def process_results(self, doc, results):
    method aggregation (line 75) | def aggregation(self):
    method higher_is_better (line 85) | def higher_is_better(self):
  function contains_score (line 96) | def contains_score(prediction: str, labels: List[str]):

FILE: lm_eval/tasks/french_bench/preprocess_wikitext.py
  function wikitext_detokenizer (line 4) | def wikitext_detokenizer(doc):
  function process_results (line 39) | def process_results(doc, results):

FILE: lm_eval/tasks/french_bench/utils.py
  function normalize_answer (line 9) | def normalize_answer(s):
  function get_tokens (line 29) | def get_tokens(s):
  function exact (line 36) | def exact(predictions, references):
  function f1 (line 41) | def f1(predictions, references):
  function rouge1 (line 57) | def rouge1(items):
  function rouge1_agg (line 64) | def rouge1_agg(items):
  function is_included (line 74) | def is_included(items):
  function preprocess (line 83) | def preprocess(text):
  function process_docs (line 92) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/galician_bench/flores_gl/create_yamls_flores_gl.py
  function doc_to_text (line 257) | def doc_to_text(src: str, tgt: str) -> str:
  function doc_to_target (line 265) | def doc_to_target(tgt: str) -> str:
  function gen_lang_yamls (line 272) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
  function main (line 316) | def main() -> None:

FILE: lm_eval/tasks/galician_bench/utils.py
  function lowercase_first_letter (line 14) | def lowercase_first_letter(text):
  function process_summarization (line 18) | def process_summarization(dataset):
  function process_docs_paraphrases (line 28) | def process_docs_paraphrases(dataset):
  function process_docs_paws (line 56) | def process_docs_paws(dataset):
  function rouge1 (line 84) | def rouge1(items):
  function rouge1_agg (line 91) | def rouge1_agg(items):
  function process_results_mc2 (line 102) | def process_results_mc2(doc, results):
  function process_docs_gen (line 115) | def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset:
  function preprocess_function_gen (line 119) | def preprocess_function_gen(examples):
  function process_doc_nli (line 143) | def process_doc_nli(dataset):
  function process_results_gen (line 170) | def process_results_gen(doc, results):
  function bleu (line 241) | def bleu(refs, preds):
  function rouge (line 264) | def rouge(refs, preds):

FILE: lm_eval/tasks/glianorex/preprocess_glianorex.py
  function doc_to_text (line 4) | def doc_to_text(doc) -> str:
  function doc_to_target (line 10) | def doc_to_target(doc) -> str:
  function filter_dataset (line 15) | def filter_dataset(dataset: datasets.Dataset, lang: str) -> datasets.Dat...
  function filter_french (line 19) | def filter_french(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_english (line 23) | def filter_english(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/global_mmlu/default/ar/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/bn/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/de/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/en/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/es/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/fr/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/hi/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/id/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/it/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/ja/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/ko/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/pt/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/sw/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/yo/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/default/zh/utils.py
  function process_docs (line 7) | def process_docs(dataset, category):

FILE: lm_eval/tasks/global_mmlu/full/am/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ar/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/bn/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/cs/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/de/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/el/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/en/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/es/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/fa/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/fil/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/fr/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ha/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/he/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/hi/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/id/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ig/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/it/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ja/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ko/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ky/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/lt/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/mg/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ms/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ne/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/nl/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ny/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/pl/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/pt/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ro/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/ru/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/si/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/sn/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/so/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/sr/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/sv/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/sw/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/te/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/tr/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/uk/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/vi/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/yo/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_mmlu/full/zh/utils.py
  function process_docs (line 65) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/global_piqa/completions/_generate_config.py
  class IndentedDumper (line 7) | class IndentedDumper(yaml.Dumper):
    method increase_indent (line 8) | def increase_indent(self, flow=False, indentless=False):
  function format_subset (line 15) | def format_subset(subset: str, preface: str = PREFACE) -> str:

FILE: lm_eval/tasks/global_piqa/prompted/_generate_config.py
  class IndentedDumper (line 7) | class IndentedDumper(yaml.Dumper):
    method increase_indent (line 8) | def increase_indent(self, flow=False, indentless=False):
  function format_subset (line 15) | def format_subset(subset: str, preface: str = PREFACE) -> str:

FILE: lm_eval/tasks/glue/mnli/utils.py
  function doc_to_text (line 1) | def doc_to_text(doc) -> str:

FILE: lm_eval/tasks/gpqa/cot_n_shot/_generate_configs.py
  function main (line 5) | def main() -> None:

FILE: lm_eval/tasks/gpqa/cot_n_shot/utils.py
  function preprocess (line 7) | def preprocess(text):
  function process_docs (line 17) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/gpqa/cot_zeroshot/_generate_configs.py
  function main (line 5) | def main() -> None:

FILE: lm_eval/tasks/gpqa/cot_zeroshot/utils.py
  function preprocess (line 7) | def preprocess(text):
  function process_docs (line 17) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/gpqa/generative/_generate_configs.py
  function main (line 5) | def main() -> None:

FILE: lm_eval/tasks/gpqa/generative/utils.py
  function preprocess (line 7) | def preprocess(text):
  function process_docs (line 17) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/gpqa/n_shot/_generate_configs.py
  function main (line 5) | def main() -> None:

FILE: lm_eval/tasks/gpqa/n_shot/utils.py
  function preprocess (line 7) | def preprocess(text):
  function process_docs (line 20) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/gpqa/zeroshot/_generate_configs.py
  function main (line 5) | def main() -> None:

FILE: lm_eval/tasks/gpqa/zeroshot/utils.py
  function preprocess (line 7) | def preprocess(text):
  function process_docs (line 17) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/graphwalks/utils.py
  function load_dataset (line 7) | def load_dataset(**kwargs):
  function extract_answer_list (line 27) | def extract_answer_list(response: str) -> Tuple[List[str], bool]:
  function extract_answer_list_flexible (line 65) | def extract_answer_list_flexible(response: str) -> Tuple[List[str], bool]:
  function process_results (line 100) | def process_results(doc, results):

FILE: lm_eval/tasks/groundcocoa/utils.py
  function process_docs (line 6) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/hellaswag/utils.py
  function preprocess (line 6) | def preprocess(text):
  function process_docs (line 15) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/hendrycks_ethics/utils.py
  function _preproc_doc (line 5) | def _preproc_doc(doc):
  function doc_to_text (line 18) | def doc_to_text(doc) -> str:
  function doc_to_target (line 23) | def doc_to_target(doc):

FILE: lm_eval/tasks/hendrycks_math/utils.py
  function process_docs (line 6) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_results (line 18) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
  function is_equiv (line 36) | def is_equiv(str1, str2, verbose=False):
  function remove_boxed (line 53) | def remove_boxed(s):
  function last_boxed_only_string (line 67) | def last_boxed_only_string(string):
  function fix_fracs (line 97) | def fix_fracs(string):
  function fix_a_slash_b (line 129) | def fix_a_slash_b(string):
  function remove_right_units (line 144) | def remove_right_units(string):
  function fix_sqrt (line 154) | def fix_sqrt(string):
  function strip_string (line 169) | def strip_string(string):

FILE: lm_eval/tasks/histoires_morales/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/hrm8k/default/utils.py
  function doc_to_text (line 5) | def doc_to_text(doc):
  function doc_to_text_mmmlu (line 14) | def doc_to_text_mmmlu(doc):
  function doc_to_target (line 23) | def doc_to_target(doc):
  function postprocess (line 27) | def postprocess(s):
  function process_results (line 36) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
  function is_equiv (line 54) | def is_equiv(str1, str2, verbose=False):
  function parse_math_answer (line 74) | def parse_math_answer(raw_string):
  function _fix_fracs (line 146) | def _fix_fracs(string):
  function _fix_a_slash_b (line 178) | def _fix_a_slash_b(string):
  function _remove_right_units (line 193) | def _remove_right_units(string):
  function _fix_sqrt (line 203) | def _fix_sqrt(string):
  function _strip_string (line 218) | def _strip_string(string):

FILE: lm_eval/tasks/hrm8k/en/utils.py
  function doc_to_text (line 5) | def doc_to_text(doc):
  function doc_to_text_mmmlu (line 14) | def doc_to_text_mmmlu(doc):
  function doc_to_target (line 23) | def doc_to_target(doc):
  function postprocess (line 27) | def postprocess(s):
  function process_results (line 36) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
  function is_equiv (line 54) | def is_equiv(str1, str2, verbose=False):
  function parse_math_answer (line 74) | def parse_math_answer(raw_string):
  function _fix_fracs (line 146) | def _fix_fracs(string):
  function _fix_a_slash_b (line 178) | def _fix_a_slash_b(string):
  function _remove_right_units (line 193) | def _remove_right_units(string):
  function _fix_sqrt (line 203) | def _fix_sqrt(string):
  function _strip_string (line 218) | def _strip_string(string):

FILE: lm_eval/tasks/humaneval/utils.py
  function pass_at_k (line 13) | def pass_at_k(references: list[str], predictions: list[list[str]], k: li...
  function build_predictions (line 26) | def build_predictions(resps: list[list[str]], docs: list[dict]) -> list[...
  function build_predictions_instruct (line 30) | def build_predictions_instruct(

FILE: lm_eval/tasks/humaneval_infilling/utils.py
  function pass_at_k (line 13) | def pass_at_k(references: list[str], predictions: list[list[str]], k: li...
  function build_predictions (line 26) | def build_predictions(resps: list[list[str]], docs: list[dict]) -> list[...

FILE: lm_eval/tasks/icelandic_winogrande/preprocess_winogrande.py
  function doc_to_text (line 1) | def doc_to_text(doc):
  function doc_to_target (line 6) | def doc_to_target(doc):
  function doc_to_choice (line 14) | def doc_to_choice(doc):

FILE: lm_eval/tasks/ifeval/instructions.py
  class Instruction (line 110) | class Instruction:
    method __init__ (line 113) | def __init__(self, instruction_id):
    method build_description (line 116) | def build_description(self, **kwargs):
    method get_instruction_args (line 119) | def get_instruction_args(self):
    method get_instruction_args_keys (line 122) | def get_instruction_args_keys(self):
    method check_following (line 125) | def check_following(self, value):
  class ResponseLanguageChecker (line 129) | class ResponseLanguageChecker(Instruction):
    method build_description (line 132) | def build_description(self, *, language=None):
    method get_instruction_args (line 155) | def get_instruction_args(self):
    method get_instruction_args_keys (line 159) | def get_instruction_args_keys(self):
    method check_following (line 163) | def check_following(self, value):
  class NumberOfSentences (line 184) | class NumberOfSentences(Instruction):
    method build_description (line 187) | def build_description(self, *, num_sentences=None, relation=None):
    method get_instruction_args (line 225) | def get_instruction_args(self):
    method get_instruction_args_keys (line 232) | def get_instruction_args_keys(self):
    method check_following (line 236) | def check_following(self, value):
  class PlaceholderChecker (line 256) | class PlaceholderChecker(Instruction):
    method build_description (line 259) | def build_description(self, *, num_placeholders=None):
    method get_instruction_args (line 278) | def get_instruction_args(self):
    method get_instruction_args_keys (line 282) | def get_instruction_args_keys(self):
    method check_following (line 286) | def check_following(self, value):
  class BulletListChecker (line 301) | class BulletListChecker(Instruction):
    method build_description (line 304) | def build_description(self, *, num_bullets=None):
    method get_instruction_args (line 325) | def get_instruction_args(self):
    method get_instruction_args_keys (line 329) | def get_instruction_args_keys(self):
    method check_following (line 333) | def check_following(self, value):
  class ConstrainedResponseChecker (line 350) | class ConstrainedResponseChecker(Instruction):
    method build_description (line 353) | def build_description(self):
    method get_instruction_args (line 364) | def get_instruction_args(self):
    method get_instruction_args_keys (line 368) | def get_instruction_args_keys(self):
    method check_following (line 372) | def check_following(self, value):
  class ConstrainedStartChecker (line 389) | class ConstrainedStartChecker(Instruction):
    method build_description (line 392) | def build_description(self, *, starter=None):
    method get_instruction_args (line 411) | def get_instruction_args(self):
    method get_instruction_args_keys (line 415) | def get_instruction_args_keys(self):
    method check_following (line 419) | def check_following(self, value):
  class HighlightSectionChecker (line 436) | class HighlightSectionChecker(Instruction):
    method build_description (line 439) | def build_description(self, *, num_highlights=None):
    method get_instruction_args (line 460) | def get_instruction_args(self):
    method get_instruction_args_keys (line 464) | def get_instruction_args_keys(self):
    method check_following (line 468) | def check_following(self, value):
  class SectionChecker (line 492) | class SectionChecker(Instruction):
    method build_description (line 495) | def build_description(self, *, section_spliter=None, num_sections=None):
    method get_instruction_args (line 531) | def get_instruction_args(self):
    method get_instruction_args_keys (line 538) | def get_instruction_args_keys(self):
    method check_following (line 542) | def check_following(self, value):
  class ParagraphChecker (line 561) | class ParagraphChecker(Instruction):
    method build_description (line 564) | def build_description(self, *, num_paragraphs=None):
    method get_instruction_args (line 584) | def get_instruction_args(self):
    method get_instruction_args_keys (line 588) | def get_instruction_args_keys(self):
    method check_following (line 592) | def check_following(self, value):
  class PostscriptChecker (line 616) | class PostscriptChecker(Instruction):
    method build_description (line 619) | def build_description(self, *, postscript_marker=None):
    method get_instruction_args (line 644) | def get_instruction_args(self):
    method get_instruction_args_keys (line 648) | def get_instruction_args_keys(self):
    method check_following (line 652) | def check_following(self, value):
  class RephraseChecker (line 674) | class RephraseChecker(Instruction):
    method build_description (line 677) | def build_description(self, *, original_message):
    method get_instruction_args (line 703) | def get_instruction_args(self):
    method get_instruction_args_keys (line 707) | def get_instruction_args_keys(self):
    method check_following (line 711) | def check_following(self, value):
    method is_change (line 733) | def is_change(self, response):
    method strip_changes (line 737) | def strip_changes(self, response):
  class KeywordChecker (line 742) | class KeywordChecker(Instruction):
    method build_description (line 745) | def build_description(self, *, keywords=None):
    method get_instruction_args (line 768) | def get_instruction_args(self):
    method get_instruction_args_keys (line 772) | def get_instruction_args_keys(self):
    method check_following (line 776) | def check_following(self, value):
  class KeywordFrequencyChecker (line 784) | class KeywordFrequencyChecker(Instruction):
    method build_description (line 787) | def build_description(self, *, keyword=None, frequency=None, relation=...
    method get_instruction_args (line 833) | def get_instruction_args(self):
    method get_instruction_args_keys (line 841) | def get_instruction_args_keys(self):
    method check_following (line 845) | def check_following(self, value):
  class NumberOfWords (line 855) | class NumberOfWords(Instruction):
    method build_description (line 858) | def build_description(self, *, num_words=None, relation=None):
    method get_instruction_args (line 896) | def get_instruction_args(self):
    method get_instruction_args_keys (line 900) | def get_instruction_args_keys(self):
    method check_following (line 904) | def check_following(self, value):
  class JsonFormat (line 914) | class JsonFormat(Instruction):
    method build_description (line 917) | def build_description(self):
    method get_instruction_args (line 924) | def get_instruction_args(self):
    method get_instruction_args_keys (line 928) | def get_instruction_args_keys(self):
    method check_following (line 932) | def check_following(self, value):
  class ParagraphFirstWordCheck (line 949) | class ParagraphFirstWordCheck(Instruction):
    method build_description (line 952) | def build_description(
    method get_instruction_args (line 998) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1006) | def get_instruction_args_keys(self):
    method check_following (line 1010) | def check_following(self, value):
  class KeySentenceChecker (line 1056) | class KeySentenceChecker(Instruction):
    method build_description (line 1059) | def build_description(self, key_sentences=None, num_sentences=None):
    method get_instruction_args (line 1091) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1098) | def get_instruction_args_keys(self):
    method check_following (line 1102) | def check_following(self, value):
  class ForbiddenWords (line 1113) | class ForbiddenWords(Instruction):
    method build_description (line 1116) | def build_description(self, forbidden_words=None):
    method get_instruction_args (line 1140) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1144) | def get_instruction_args_keys(self):
    method check_following (line 1148) | def check_following(self, value):
  class RephraseParagraph (line 1156) | class RephraseParagraph(Instruction):
    method build_description (line 1159) | def build_description(self, *, original_paragraph, low, high):
    method get_instruction_args (line 1190) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1198) | def get_instruction_args_keys(self):
    method check_following (line 1202) | def check_following(self, value):
  class TwoResponsesChecker (line 1216) | class TwoResponsesChecker(Instruction):
    method build_description (line 1219) | def build_description(self):
    method get_instruction_args (line 1227) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1231) | def get_instruction_args_keys(self):
    method check_following (line 1235) | def check_following(self, value):
  class RepeatPromptThenAnswer (line 1258) | class RepeatPromptThenAnswer(Instruction):
    method build_description (line 1261) | def build_description(self, *, prompt_to_repeat=None):
    method get_instruction_args (line 1282) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1285) | def get_instruction_args_keys(self):
    method check_following (line 1289) | def check_following(self, value):
  class EndChecker (line 1295) | class EndChecker(Instruction):
    method build_description (line 1298) | def build_description(self, *, end_phrase=None):
    method get_instruction_args (line 1318) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1321) | def get_instruction_args_keys(self):
    method check_following (line 1325) | def check_following(self, value):
  class TitleChecker (line 1332) | class TitleChecker(Instruction):
    method build_description (line 1335) | def build_description(self):
    method get_instruction_args (line 1343) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1346) | def get_instruction_args_keys(self):
    method check_following (line 1350) | def check_following(self, value):
  class LetterFrequencyChecker (line 1362) | class LetterFrequencyChecker(Instruction):
    method build_description (line 1365) | def build_description(self, *, letter=None, let_frequency=None, let_re...
    method get_instruction_args (line 1417) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1425) | def get_instruction_args_keys(self):
    method check_following (line 1429) | def check_following(self, value):
  class CapitalLettersEnglishChecker (line 1440) | class CapitalLettersEnglishChecker(Instruction):
    method build_description (line 1443) | def build_description(self):
    method get_instruction_args (line 1450) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1453) | def get_instruction_args_keys(self):
    method check_following (line 1457) | def check_following(self, value):
  class LowercaseLettersEnglishChecker (line 1471) | class LowercaseLettersEnglishChecker(Instruction):
    method build_description (line 1474) | def build_description(self):
    method get_instruction_args (line 1482) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1485) | def get_instruction_args_keys(self):
    method check_following (line 1489) | def check_following(self, value):
  class CommaChecker (line 1503) | class CommaChecker(Instruction):
    method build_description (line 1506) | def build_description(self):
    method get_instruction_args (line 1513) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1516) | def get_instruction_args_keys(self):
    method check_following (line 1520) | def check_following(self, value):
  class CapitalWordFrequencyChecker (line 1525) | class CapitalWordFrequencyChecker(Instruction):
    method build_description (line 1528) | def build_description(
    method get_instruction_args (line 1566) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1573) | def get_instruction_args_keys(self):
    method check_following (line 1577) | def check_following(self, value):
  class QuotationChecker (line 1591) | class QuotationChecker(Instruction):
    method build_description (line 1594) | def build_description(self):
    method get_instruction_args (line 1601) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1605) | def get_instruction_args_keys(self):
    method check_following (line 1609) | def check_following(self, value):

FILE: lm_eval/tasks/ifeval/instructions_registry.py
  function conflict_make (line 153) | def conflict_make(conflicts):

FILE: lm_eval/tasks/ifeval/instructions_util.py
  function download_nltk_resources (line 36) | def download_nltk_resources():
  function split_into_sentences (line 1628) | def split_into_sentences(text):
  function count_words (line 1679) | def count_words(text):
  function _get_sentence_tokenizer (line 1688) | def _get_sentence_tokenizer():
  function count_sentences (line 1692) | def count_sentences(text):
  function generate_keywords (line 1699) | def generate_keywords(num_keywords):

FILE: lm_eval/tasks/ifeval/multilingual/instruction_utils/ca_instructions_util.py
  function lang_code_to_name (line 32) | def lang_code_to_name(lang_code: str):
  function split_into_sentences (line 46) | def split_into_sentences(text):
  function count_words (line 98) | def count_words(text):
  function tokenize_words (line 105) | def tokenize_words(text):
  function count_sentences (line 113) | def count_sentences(text):
  function generate_keywords (line 120) | def generate_keywords(num_keywords):

FILE: lm_eval/tasks/ifeval/multilingual/instruction_utils/es_instructions_util.py
  function lang_code_to_name (line 32) | def lang_code_to_name(lang_code: str):
  function split_into_sentences (line 46) | def split_into_sentences(text):
  function count_words (line 98) | def count_words(text):
  function tokenize_words (line 105) | def tokenize_words(text):
  function count_sentences (line 113) | def count_sentences(text):
  function generate_keywords (line 120) | def generate_keywords(num_keywords):

FILE: lm_eval/tasks/ifeval/multilingual/instructions/ca_instructions.py
  class Instruction (line 90) | class Instruction:
    method __init__ (line 93) | def __init__(self, instruction_id):
    method build_description (line 96) | def build_description(self, **kwargs):
    method get_instruction_args (line 99) | def get_instruction_args(self):
    method get_instruction_args_keys (line 102) | def get_instruction_args_keys(self):
    method check_following (line 105) | def check_following(self, value):
  class ResponseLanguageChecker (line 109) | class ResponseLanguageChecker(Instruction):
    method build_description (line 112) | def build_description(self, *, language = None):
    method get_instruction_args (line 133) | def get_instruction_args(self):
    method get_instruction_args_keys (line 137) | def get_instruction_args_keys(self):
    method check_following (line 141) | def check_following(self, value):
  class NumberOfSentences (line 162) | class NumberOfSentences(Instruction):
    method build_description (line 165) | def build_description(self, *, num_sentences = None,
    method get_instruction_args (line 201) | def get_instruction_args(self):
    method get_instruction_args_keys (line 206) | def get_instruction_args_keys(self):
    method check_following (line 210) | def check_following(self, value):
  class PlaceholderChecker (line 248) | class PlaceholderChecker(Instruction):
    method build_description (line 251) | def build_description(self, *, num_placeholders = None,
    method get_instruction_args (line 284) | def get_instruction_args(self):
    method get_instruction_args_keys (line 289) | def get_instruction_args_keys(self):
    method check_following (line 293) | def check_following(self, value):
  class BulletListChecker (line 312) | class BulletListChecker(Instruction):
    method build_description (line 315) | def build_description(self, *, num_bullets = None):
    method get_instruction_args (line 336) | def get_instruction_args(self):
    method get_instruction_args_keys (line 340) | def get_instruction_args_keys(self):
    method check_following (line 344) | def check_following(self, value):
  class ConstrainedResponseChecker (line 360) | class ConstrainedResponseChecker(Instruction):
    method build_description (line 363) | def build_description(self):
    method get_instruction_args (line 372) | def get_instruction_args(self):
    method get_instruction_args_keys (line 376) | def get_instruction_args_keys(self):
    method check_following (line 380) | def check_following(self, value):
  class ConstrainedStartChecker (line 398) | class ConstrainedStartChecker(Instruction):
    method build_description (line 401) | def build_description(self, *, starter = None):
    method get_instruction_args (line 419) | def get_instruction_args(self):
    method get_instruction_args_keys (line 423) | def get_instruction_args_keys(self):
    method check_following (line 427) | def check_following(self, value):
  class HighlightSectionChecker (line 443) | class HighlightSectionChecker(Instruction):
    method build_description (line 446) | def build_description(self, *, num_highlights = None,
    method get_instruction_args (line 479) | def get_instruction_args(self):
    method get_instruction_args_keys (line 484) | def get_instruction_args_keys(self):
    method check_following (line 488) | def check_following(self, value):
  class SectionChecker (line 516) | class SectionChecker(Instruction):
    method build_description (line 519) | def build_description(self, *, section_spliter = None,
    method get_instruction_args (line 563) | def get_instruction_args(self):
    method get_instruction_args_keys (line 569) | def get_instruction_args_keys(self):
    method check_following (line 573) | def check_following(self, value):
  class ParagraphChecker (line 596) | class ParagraphChecker(Instruction):
    method build_description (line 599) | def build_description(self, *, num_paragraphs = None):
    method get_instruction_args (line 618) | def get_instruction_args(self):
    method get_instruction_args_keys (line 622) | def get_instruction_args_keys(self):
    method check_following (line 626) | def check_following(self, value):
  class PostscriptChecker (line 650) | class PostscriptChecker(Instruction):
    method build_description (line 653) | def build_description(self, *, postscript_marker = None
    method get_instruction_args (line 675) | def get_instruction_args(self):
    method get_instruction_args_keys (line 679) | def get_instruction_args_keys(self):
    method check_following (line 683) | def check_following(self, value):
  class RephraseChecker (line 706) | class RephraseChecker(Instruction):
    method build_description (line 709) | def build_description(self, *, original_message):
    method get_instruction_args (line 731) | def get_instruction_args(self):
    method get_instruction_args_keys (line 735) | def get_instruction_args_keys(self):
    method check_following (line 739) | def check_following(self, value):
    method is_change (line 761) | def is_change(self, response):
    method strip_changes (line 765) | def strip_changes(self, response):
  class KeywordChecker (line 770) | class KeywordChecker(Instruction):
    method build_description (line 773) | def build_description(self, *, keywords = None
    method get_instruction_args (line 796) | def get_instruction_args(self):
    method get_instruction_args_keys (line 800) | def get_instruction_args_keys(self):
    method check_following (line 804) | def check_following(self, value):
  class KeywordFrequencyChecker (line 812) | class KeywordFrequencyChecker(Instruction):
    method build_description (line 815) | def build_description(self, *, keyword = None,
    method get_instruction_args (line 859) | def get_instruction_args(self):
    method get_instruction_args_keys (line 865) | def get_instruction_args_keys(self):
    method check_following (line 869) | def check_following(self, value):
  class NumberOfWords (line 880) | class NumberOfWords(Instruction):
    method build_description (line 883) | def build_description(self, *, num_words = None,
    method get_instruction_args (line 921) | def get_instruction_args(self):
    method get_instruction_args_keys (line 926) | def get_instruction_args_keys(self):
    method check_following (line 930) | def check_following(self, value):
  class JsonFormat (line 946) | class JsonFormat(Instruction):
    method build_description (line 949) | def build_description(self):
    method get_instruction_args (line 955) | def get_instruction_args(self):
    method get_instruction_args_keys (line 959) | def get_instruction_args_keys(self):
    method check_following (line 966) | def check_following(self, value):
  class ParagraphFirstWordCheck (line 983) | class ParagraphFirstWordCheck(Instruction):
    method build_description (line 986) | def build_description(self, num_paragraphs = None,
    method get_instruction_args (line 1030) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1036) | def get_instruction_args_keys(self):
    method check_following (line 1040) | def check_following(self, value):
  class KeySentenceChecker (line 1095) | class KeySentenceChecker(Instruction):
    method build_description (line 1098) | def build_description(self, key_sentences = None,
    method get_instruction_args (line 1131) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1136) | def get_instruction_args_keys(self):
    method check_following (line 1140) | def check_following(self, value):
  class ForbiddenWords (line 1151) | class ForbiddenWords(Instruction):
    method build_description (line 1154) | def build_description(self, forbidden_words = None
    method get_instruction_args (line 1180) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1184) | def get_instruction_args_keys(self):
    method check_following (line 1188) | def check_following(self, value):
  class RephraseParagraph (line 1197) | class RephraseParagraph(Instruction):
    method build_description (line 1200) | def build_description(self, *, original_paragraph, low, high
    method get_instruction_args (line 1229) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1235) | def get_instruction_args_keys(self):
    method check_following (line 1239) | def check_following(self, value):
  class TwoResponsesChecker (line 1253) | class TwoResponsesChecker(Instruction):
    method build_description (line 1256) | def build_description(self):
    method get_instruction_args (line 1264) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1268) | def get_instruction_args_keys(self):
    method check_following (line 1272) | def check_following(self, value):
  class RepeatPromptThenAnswer (line 1295) | class RepeatPromptThenAnswer(Instruction):
    method build_description (line 1298) | def build_description(self, *, prompt_to_repeat = None):
    method get_instruction_args (line 1318) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1321) | def get_instruction_args_keys(self):
    method check_following (line 1325) | def check_following(self, value):
  class EndChecker (line 1331) | class EndChecker(Instruction):
    method build_description (line 1334) | def build_description(self, *, end_phrase = None):
    method get_instruction_args (line 1353) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1356) | def get_instruction_args_keys(self):
    method check_following (line 1360) | def check_following(self, value):
  class TitleChecker (line 1371) | class TitleChecker(Instruction):
    method build_description (line 1374) | def build_description(self):
    method get_instruction_args (line 1382) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1385) | def get_instruction_args_keys(self):
    method check_following (line 1389) | def check_following(self, value):
  class LetterFrequencyChecker (line 1401) | class LetterFrequencyChecker(Instruction):
    method build_description (line 1404) | def build_description(self, *, letter = None,
    method get_instruction_args (line 1458) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1464) | def get_instruction_args_keys(self):
    method check_following (line 1468) | def check_following(self, value):
  class CapitalLettersCatalanChecker (line 1479) | class CapitalLettersCatalanChecker(Instruction):
    method build_description (line 1482) | def build_description(self):
    method get_instruction_args (line 1489) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1492) | def get_instruction_args_keys(self):
    method check_following (line 1496) | def check_following(self, value):
  class LowercaseLettersCatalanChecker (line 1521) | class LowercaseLettersCatalanChecker(Instruction):
    method build_description (line 1524) | def build_description(self):
    method get_instruction_args (line 1532) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1535) | def get_instruction_args_keys(self):
    method check_following (line 1539) | def check_following(self, value):
  class CommaChecker (line 1553) | class CommaChecker(Instruction):
    method build_description (line 1556) | def build_description(self):
    method get_instruction_args (line 1563) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1566) | def get_instruction_args_keys(self):
    method check_following (line 1570) | def check_following(self, value):
  class CapitalWordFrequencyChecker (line 1575) | class CapitalWordFrequencyChecker(Instruction):
    method build_description (line 1578) | def build_description(
    method get_instruction_args (line 1616) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1623) | def get_instruction_args_keys(self):
    method check_following (line 1627) | def check_following(self, value):
  class QuotationChecker (line 1641) | class QuotationChecker(Instruction):
    method build_description (line 1644) | def build_description(self):
    method get_instruction_args (line 1651) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1655) | def get_instruction_args_keys(self):
    method check_following (line 1659) | def check_following(self, value):
  class QuestionMarkChecker (line 1665) | class QuestionMarkChecker(Instruction):
    method build_description (line 1668) | def build_description(self):
    method get_instruction_args (line 1675) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1679) | def get_instruction_args_keys(self):
    method check_following (line 1683) | def check_following(self, value):
  class ExclamationMarkChecker (line 1695) | class ExclamationMarkChecker(Instruction):
    method build_description (line 1698) | def build_description(self):
    method get_instruction_args (line 1705) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1709) | def get_instruction_args_keys(self):
    method check_following (line 1713) | def check_following(self, value):
  class EnieChecker (line 1725) | class EnieChecker(Instruction):
    method build_description (line 1728) | def build_description(
    method get_instruction_args (line 1754) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1760) | def get_instruction_args_keys(self):
    method check_following (line 1764) | def check_following(self, value):
  class DieresisChecker (line 1778) | class DieresisChecker(Instruction):
    method build_description (line 1781) | def build_description(
    method get_instruction_args (line 1807) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1813) | def get_instruction_args_keys(self):
    method check_following (line 1817) | def check_following(self, value):
  class TildesChecker (line 1831) | class TildesChecker(Instruction):
    method build_description (line 1834) | def build_description(self, *, num_words = None,
    method get_instruction_args (line 1872) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1877) | def get_instruction_args_keys(self):
    method check_following (line 1881) | def check_following(self, value):

FILE: lm_eval/tasks/ifeval/multilingual/instructions/es_instructions.py
  class Instruction (line 92) | class Instruction:
    method __init__ (line 95) | def __init__(self, instruction_id):
    method build_description (line 98) | def build_description(self, **kwargs):
    method get_instruction_args (line 101) | def get_instruction_args(self):
    method get_instruction_args_keys (line 104) | def get_instruction_args_keys(self):
    method check_following (line 107) | def check_following(self, value):
  class ResponseLanguageChecker (line 111) | class ResponseLanguageChecker(Instruction):
    method build_description (line 114) | def build_description(self, *, language = None):
    method get_instruction_args (line 135) | def get_instruction_args(self):
    method get_instruction_args_keys (line 139) | def get_instruction_args_keys(self):
    method check_following (line 143) | def check_following(self, value):
  class NumberOfSentences (line 164) | class NumberOfSentences(Instruction):
    method build_description (line 167) | def build_description(self, *, num_sentences = None,
    method get_instruction_args (line 203) | def get_instruction_args(self):
    method get_instruction_args_keys (line 208) | def get_instruction_args_keys(self):
    method check_following (line 212) | def check_following(self, value):
  class PlaceholderChecker (line 250) | class PlaceholderChecker(Instruction):
    method build_description (line 253) | def build_description(self, *, num_placeholders = None,
    method get_instruction_args (line 286) | def get_instruction_args(self):
    method get_instruction_args_keys (line 291) | def get_instruction_args_keys(self):
    method check_following (line 295) | def check_following(self, value):
  class BulletListChecker (line 314) | class BulletListChecker(Instruction):
    method build_description (line 317) | def build_description(self, *, num_bullets = None):
    method get_instruction_args (line 338) | def get_instruction_args(self):
    method get_instruction_args_keys (line 342) | def get_instruction_args_keys(self):
    method check_following (line 346) | def check_following(self, value):
  class ConstrainedResponseChecker (line 362) | class ConstrainedResponseChecker(Instruction):
    method build_description (line 365) | def build_description(self):
    method get_instruction_args (line 374) | def get_instruction_args(self):
    method get_instruction_args_keys (line 378) | def get_instruction_args_keys(self):
    method check_following (line 382) | def check_following(self, value):
  class ConstrainedStartChecker (line 400) | class ConstrainedStartChecker(Instruction):
    method build_description (line 403) | def build_description(self, *, starter = None):
    method get_instruction_args (line 421) | def get_instruction_args(self):
    method get_instruction_args_keys (line 425) | def get_instruction_args_keys(self):
    method check_following (line 429) | def check_following(self, value):
  class HighlightSectionChecker (line 445) | class HighlightSectionChecker(Instruction):
    method build_description (line 448) | def build_description(self, *, num_highlights = None,
    method get_instruction_args (line 481) | def get_instruction_args(self):
    method get_instruction_args_keys (line 486) | def get_instruction_args_keys(self):
    method check_following (line 490) | def check_following(self, value):
  class SectionChecker (line 518) | class SectionChecker(Instruction):
    method build_description (line 521) | def build_description(self, *, section_spliter = None,
    method get_instruction_args (line 565) | def get_instruction_args(self):
    method get_instruction_args_keys (line 571) | def get_instruction_args_keys(self):
    method check_following (line 575) | def check_following(self, value):
  class ParagraphChecker (line 598) | class ParagraphChecker(Instruction):
    method build_description (line 601) | def build_description(self, *, num_paragraphs = None):
    method get_instruction_args (line 620) | def get_instruction_args(self):
    method get_instruction_args_keys (line 624) | def get_instruction_args_keys(self):
    method check_following (line 628) | def check_following(self, value):
  class PostscriptChecker (line 652) | class PostscriptChecker(Instruction):
    method build_description (line 655) | def build_description(self, *, postscript_marker = None
    method get_instruction_args (line 677) | def get_instruction_args(self):
    method get_instruction_args_keys (line 681) | def get_instruction_args_keys(self):
    method check_following (line 685) | def check_following(self, value):
  class RephraseChecker (line 708) | class RephraseChecker(Instruction):
    method build_description (line 711) | def build_description(self, *, original_message):
    method get_instruction_args (line 733) | def get_instruction_args(self):
    method get_instruction_args_keys (line 737) | def get_instruction_args_keys(self):
    method check_following (line 741) | def check_following(self, value):
    method is_change (line 763) | def is_change(self, response):
    method strip_changes (line 767) | def strip_changes(self, response):
  class KeywordChecker (line 772) | class KeywordChecker(Instruction):
    method build_description (line 775) | def build_description(self, *, keywords = None
    method get_instruction_args (line 798) | def get_instruction_args(self):
    method get_instruction_args_keys (line 802) | def get_instruction_args_keys(self):
    method check_following (line 806) | def check_following(self, value):
  class KeywordFrequencyChecker (line 814) | class KeywordFrequencyChecker(Instruction):
    method build_description (line 817) | def build_description(self, *, keyword = None,
    method get_instruction_args (line 861) | def get_instruction_args(self):
    method get_instruction_args_keys (line 867) | def get_instruction_args_keys(self):
    method check_following (line 871) | def check_following(self, value):
  class NumberOfWords (line 882) | class NumberOfWords(Instruction):
    method build_description (line 885) | def build_description(self, *, num_words = None,
    method get_instruction_args (line 923) | def get_instruction_args(self):
    method get_instruction_args_keys (line 928) | def get_instruction_args_keys(self):
    method check_following (line 932) | def check_following(self, value):
  class JsonFormat (line 948) | class JsonFormat(Instruction):
    method build_description (line 951) | def build_description(self):
    method get_instruction_args (line 957) | def get_instruction_args(self):
    method get_instruction_args_keys (line 961) | def get_instruction_args_keys(self):
    method check_following (line 968) | def check_following(self, value):
  class ParagraphFirstWordCheck (line 985) | class ParagraphFirstWordCheck(Instruction):
    method build_description (line 988) | def build_description(self, num_paragraphs = None,
    method get_instruction_args (line 1032) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1038) | def get_instruction_args_keys(self):
    method check_following (line 1042) | def check_following(self, value):
  class KeySentenceChecker (line 1097) | class KeySentenceChecker(Instruction):
    method build_description (line 1100) | def build_description(self, key_sentences = None,
    method get_instruction_args (line 1133) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1138) | def get_instruction_args_keys(self):
    method check_following (line 1142) | def check_following(self, value):
  class ForbiddenWords (line 1153) | class ForbiddenWords(Instruction):
    method build_description (line 1156) | def build_description(self, forbidden_words = None
    method get_instruction_args (line 1182) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1186) | def get_instruction_args_keys(self):
    method check_following (line 1190) | def check_following(self, value):
  class RephraseParagraph (line 1199) | class RephraseParagraph(Instruction):
    method build_description (line 1202) | def build_description(self, *, original_paragraph, low, high
    method get_instruction_args (line 1231) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1237) | def get_instruction_args_keys(self):
    method check_following (line 1241) | def check_following(self, value):
  class TwoResponsesChecker (line 1255) | class TwoResponsesChecker(Instruction):
    method build_description (line 1258) | def build_description(self):
    method get_instruction_args (line 1266) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1270) | def get_instruction_args_keys(self):
    method check_following (line 1274) | def check_following(self, value):
  class RepeatPromptThenAnswer (line 1297) | class RepeatPromptThenAnswer(Instruction):
    method build_description (line 1300) | def build_description(self, *, prompt_to_repeat = None):
    method get_instruction_args (line 1320) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1323) | def get_instruction_args_keys(self):
    method check_following (line 1327) | def check_following(self, value):
  class EndChecker (line 1333) | class EndChecker(Instruction):
    method build_description (line 1336) | def build_description(self, *, end_phrase = None):
    method get_instruction_args (line 1355) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1358) | def get_instruction_args_keys(self):
    method check_following (line 1362) | def check_following(self, value):
  class TitleChecker (line 1373) | class TitleChecker(Instruction):
    method build_description (line 1376) | def build_description(self):
    method get_instruction_args (line 1384) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1387) | def get_instruction_args_keys(self):
    method check_following (line 1391) | def check_following(self, value):
  class LetterFrequencyChecker (line 1403) | class LetterFrequencyChecker(Instruction):
    method build_description (line 1406) | def build_description(self, *, letter = None,
    method get_instruction_args (line 1460) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1466) | def get_instruction_args_keys(self):
    method check_following (line 1470) | def check_following(self, value):
  class CapitalLettersSpanishChecker (line 1481) | class CapitalLettersSpanishChecker(Instruction):
    method build_description (line 1484) | def build_description(self):
    method get_instruction_args (line 1491) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1494) | def get_instruction_args_keys(self):
    method check_following (line 1498) | def check_following(self, value):
  class LowercaseLettersSpanishChecker (line 1523) | class LowercaseLettersSpanishChecker(Instruction):
    method build_description (line 1526) | def build_description(self):
    method get_instruction_args (line 1534) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1537) | def get_instruction_args_keys(self):
    method check_following (line 1541) | def check_following(self, value):
  class CommaChecker (line 1555) | class CommaChecker(Instruction):
    method build_description (line 1558) | def build_description(self):
    method get_instruction_args (line 1565) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1568) | def get_instruction_args_keys(self):
    method check_following (line 1572) | def check_following(self, value):
  class CapitalWordFrequencyChecker (line 1577) | class CapitalWordFrequencyChecker(Instruction):
    method build_description (line 1580) | def build_description(
    method get_instruction_args (line 1618) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1625) | def get_instruction_args_keys(self):
    method check_following (line 1629) | def check_following(self, value):
  class QuotationChecker (line 1643) | class QuotationChecker(Instruction):
    method build_description (line 1646) | def build_description(self):
    method get_instruction_args (line 1653) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1657) | def get_instruction_args_keys(self):
    method check_following (line 1661) | def check_following(self, value):
  class QuestionMarkChecker (line 1667) | class QuestionMarkChecker(Instruction):
    method build_description (line 1670) | def build_description(self):
    method get_instruction_args (line 1677) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1681) | def get_instruction_args_keys(self):
    method check_following (line 1685) | def check_following(self, value):
  class ExclamationMarkChecker (line 1697) | class ExclamationMarkChecker(Instruction):
    method build_description (line 1700) | def build_description(self):
    method get_instruction_args (line 1707) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1711) | def get_instruction_args_keys(self):
    method check_following (line 1715) | def check_following(self, value):
  class EnieChecker (line 1727) | class EnieChecker(Instruction):
    method build_description (line 1730) | def build_description(
    method get_instruction_args (line 1756) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1762) | def get_instruction_args_keys(self):
    method check_following (line 1766) | def check_following(self, value):
  class DieresisChecker (line 1780) | class DieresisChecker(Instruction):
    method build_description (line 1783) | def build_description(
    method get_instruction_args (line 1809) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1815) | def get_instruction_args_keys(self):
    method check_following (line 1819) | def check_following(self, value):
  class TildesChecker (line 1833) | class TildesChecker(Instruction):
    method build_description (line 1836) | def build_description(self, *, num_words = None,
    method get_instruction_args (line 1874) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1879) | def get_instruction_args_keys(self):
    method check_following (line 1883) | def check_following(self, value):

FILE: lm_eval/tasks/ifeval/multilingual/utils.py
  class InputExample (line 7) | class InputExample:
  class OutputExample (line 15) | class OutputExample:
  function test_instruction_following_strict (line 23) | def test_instruction_following_strict(
  function test_instruction_following_loose (line 56) | def test_instruction_following_loose(
  function process_results (line 110) | def process_results(doc, results):
  function agg_inst_level_acc (line 130) | def agg_inst_level_acc(items):

FILE: lm_eval/tasks/ifeval/utils.py
  class InputExample (line 8) | class InputExample:
  class OutputExample (line 16) | class OutputExample:
  function test_instruction_following_strict (line 24) | def test_instruction_following_strict(
  function test_instruction_following_loose (line 57) | def test_instruction_following_loose(
  function process_results (line 111) | def process_results(doc, results):
  function agg_inst_level_acc (line 131) | def agg_inst_level_acc(items):

FILE: lm_eval/tasks/include/default/Albanian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Arabic/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Armenian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Azerbaijani/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Basque/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Belarusian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Bengali/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Bulgarian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Chinese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Croatian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Dutch/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Estonian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Finnish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/French/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Georgian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/German/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Greek/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Hebrew/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Hindi/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Hungarian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Indonesian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Italian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Japanese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Kazakh/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Korean/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Lithuanian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Malay/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Malayalam/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Nepali/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/North Macedonian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Persian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Polish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Portuguese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Russian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Serbian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Spanish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Tagalog/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Tamil/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Telugu/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Turkish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Ukrainian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Urdu/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Uzbek/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/default/Vietnamese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Albanian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Arabic/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Armenian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Azerbaijani/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Basque/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Belarusian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Bengali/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Bulgarian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Chinese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Croatian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Dutch/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Estonian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Finnish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/French/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Georgian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/German/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Greek/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Hebrew/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Hindi/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Hungarian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Indonesian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Italian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Japanese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Kazakh/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Korean/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Lithuanian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Malay/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Malayalam/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Nepali/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/North Macedonian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Persian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Polish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Portuguese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Russian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Serbian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Spanish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Tagalog/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Tamil/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Telugu/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Turkish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Ukrainian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Urdu/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Uzbek/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_en/Vietnamese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Albanian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Arabic/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Armenian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Azerbaijani/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Basque/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Belarusian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Bengali/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Bulgarian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Chinese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Croatian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Dutch/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Estonian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Finnish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/French/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Georgian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/German/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Greek/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Hebrew/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Hindi/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Hungarian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Indonesian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Italian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Japanese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Kazakh/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Korean/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Lithuanian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Malay/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Malayalam/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Nepali/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/North Macedonian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Persian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Polish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Portuguese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Russian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Serbian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Spanish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Tagalog/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Tamil/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Telugu/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Turkish/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Ukrainian/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Urdu/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Uzbek/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/include/few_shot_og/Vietnamese/utils.py
  function process_docs (line 19) | def process_docs(dataset, category):

FILE: lm_eval/tasks/japanese_leaderboard/ja_leaderboard_jcommonsenseqa.py
  function process_docs (line 1) | def process_docs(dataset):

FILE: lm_eval/tasks/japanese_leaderboard/ja_leaderboard_mgsm.py
  function _extract_answer (line 9) | def _extract_answer(completion):
  function process_results (line 25) | def process_results(doc, results):

FILE: lm_eval/tasks/japanese_leaderboard/ja_leaderboard_xlsum.py
  function _missing_module_message (line 4) | def _missing_module_message(name):
  class MecabTokenizer (line 17) | class MecabTokenizer:
    method __init__ (line 18) | def __init__(self) -> None:
    method normalize_answer (line 21) | def normalize_answer(self, text):
    method tokenize (line 47) | def tokenize(self, text):
  function rouge2 (line 51) | def rouge2(items):
  function rouge2_agg (line 55) | def rouge2_agg(items):

FILE: lm_eval/tasks/japanese_leaderboard/ja_leaderboard_xwinograd.py
  function process_docs (line 1) | def process_docs(dataset):

FILE: lm_eval/tasks/jfinqa/test_jfinqa_utils.py
  class TestNormalize (line 16) | class TestNormalize:
    method test_normalize (line 32) | def test_normalize(self, text, expected):
    method test_normalize_comma_only_between_digits (line 35) | def test_normalize_comma_only_between_digits(self):
  class TestExtractAnswer (line 40) | class TestExtractAnswer:
    method test_extract_answer (line 55) | def test_extract_answer(self, text, expected):
    method test_extract_answer_multiline_with_answer (line 58) | def test_extract_answer_multiline_with_answer(self):
  class TestTryParseNumber (line 63) | class TestTryParseNumber:
    method test_parse_number (line 81) | def test_parse_number(self, text, expected):
    method test_parse_unparseable (line 86) | def test_parse_unparseable(self):
    method test_parse_negative (line 90) | def test_parse_negative(self):
  class TestNumericalMatch (line 96) | class TestNumericalMatch:
    method test_tolerance_constant (line 97) | def test_tolerance_constant(self):
    method test_exact_numerical_match (line 100) | def test_exact_numerical_match(self):
    method test_within_tolerance (line 103) | def test_within_tolerance(self):
    method test_outside_tolerance (line 107) | def test_outside_tolerance(self):
    method test_zero_gold (line 111) | def test_zero_gold(self):
    method test_non_numeric_fallback (line 115) | def test_non_numeric_fallback(self):
    method test_unit_match (line 119) | def test_unit_match(self):
    method test_same_unit_different_values (line 123) | def test_same_unit_different_values(self):
  class TestDocToText (line 128) | class TestDocToText:
    method test_complete_document (line 129) | def test_complete_document(self):
    method test_missing_optional_fields (line 145) | def test_missing_optional_fields(self):
    method test_no_table (line 151) | def test_no_table(self):
  class TestProcessResults (line 162) | class TestProcessResults:
    method test_exact_and_numerical_match (line 163) | def test_exact_and_numerical_match(self):
    method test_numerical_match_only (line 169) | def test_numerical_match_only(self):
    method test_no_match (line 175) | def test_no_match(self):
    method test_empty_results (line 181) | def test_empty_results(self):
    method test_japanese_text_match (line 186) | def test_japanese_text_match(self):

FILE: lm_eval/tasks/jfinqa/utils.py
  function doc_to_text (line 25) | def doc_to_text(doc: dict[str, Any]) -> str:
  function process_results (line 56) | def process_results(doc: dict[str, Any], results: list[str]) -> dict[str...
  function _extract_answer (line 68) | def _extract_answer(text: str) -> str:
  function _normalize (line 77) | def _normalize(text: str) -> str:
  function _try_parse_number (line 110) | def _try_parse_number(text: str) -> float | None:
  function _numerical_match (line 136) | def _numerical_match(

FILE: lm_eval/tasks/jsonschema_bench/metrics.py
  function is_json_schema_valid (line 20) | def is_json_schema_valid(schema: dict):
  function ipv4_check (line 41) | def ipv4_check(value):
  function ipv6_check (line 46) | def ipv6_check(value):
  function uuid_check (line 51) | def uuid_check(value):
  function schema_conform_with_format_checker (line 55) | def schema_conform_with_format_checker(
  function schema_compliance (line 76) | def schema_compliance(references: list[str], predictions: list[str]) -> ...
  function json_validity (line 101) | def json_validity(references: list[str], predictions: list[str]) -> bool:

FILE: lm_eval/tasks/kobest/utils.py
  function copa_doc_to_text (line 4) | def copa_doc_to_text(doc: dict) -> str:
  function copa_doc_to_target (line 9) | def copa_doc_to_target(doc: dict) -> str:
  function copa_doc_to_choice (line 14) | def copa_doc_to_choice(doc: dict) -> list:
  function sentineg_doc_to_text (line 18) | def sentineg_doc_to_text(doc: dict):
  function wic_doc_to_text (line 22) | def wic_doc_to_text(doc: dict) -> str:
  function hellaswag_process_doc (line 26) | def hellaswag_process_doc(doc: Dataset) -> Dataset:
  function macro_f1_score (line 42) | def macro_f1_score(items):

FILE: lm_eval/tasks/leaderboard/gpqa/utils.py
  function preprocess (line 7) | def preprocess(text):
  function process_docs (line 17) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/leaderboard/ifeval/instructions.py
  class Instruction (line 110) | class Instruction:
    method __init__ (line 113) | def __init__(self, instruction_id):
    method build_description (line 116) | def build_description(self, **kwargs):
    method get_instruction_args (line 119) | def get_instruction_args(self):
    method get_instruction_args_keys (line 122) | def get_instruction_args_keys(self):
    method check_following (line 125) | def check_following(self, value):
  class ResponseLanguageChecker (line 129) | class ResponseLanguageChecker(Instruction):
    method build_description (line 132) | def build_description(self, *, language=None):
    method get_instruction_args (line 155) | def get_instruction_args(self):
    method get_instruction_args_keys (line 159) | def get_instruction_args_keys(self):
    method check_following (line 163) | def check_following(self, value):
  class NumberOfSentences (line 184) | class NumberOfSentences(Instruction):
    method build_description (line 187) | def build_description(self, *, num_sentences=None, relation=None):
    method get_instruction_args (line 225) | def get_instruction_args(self):
    method get_instruction_args_keys (line 232) | def get_instruction_args_keys(self):
    method check_following (line 236) | def check_following(self, value):
  class PlaceholderChecker (line 256) | class PlaceholderChecker(Instruction):
    method build_description (line 259) | def build_description(self, *, num_placeholders=None):
    method get_instruction_args (line 278) | def get_instruction_args(self):
    method get_instruction_args_keys (line 282) | def get_instruction_args_keys(self):
    method check_following (line 286) | def check_following(self, value):
  class BulletListChecker (line 301) | class BulletListChecker(Instruction):
    method build_description (line 304) | def build_description(self, *, num_bullets=None):
    method get_instruction_args (line 325) | def get_instruction_args(self):
    method get_instruction_args_keys (line 329) | def get_instruction_args_keys(self):
    method check_following (line 333) | def check_following(self, value):
  class ConstrainedResponseChecker (line 350) | class ConstrainedResponseChecker(Instruction):
    method build_description (line 353) | def build_description(self):
    method get_instruction_args (line 364) | def get_instruction_args(self):
    method get_instruction_args_keys (line 368) | def get_instruction_args_keys(self):
    method check_following (line 372) | def check_following(self, value):
  class ConstrainedStartChecker (line 389) | class ConstrainedStartChecker(Instruction):
    method build_description (line 392) | def build_description(self, *, starter=None):
    method get_instruction_args (line 411) | def get_instruction_args(self):
    method get_instruction_args_keys (line 415) | def get_instruction_args_keys(self):
    method check_following (line 419) | def check_following(self, value):
  class HighlightSectionChecker (line 436) | class HighlightSectionChecker(Instruction):
    method build_description (line 439) | def build_description(self, *, num_highlights=None):
    method get_instruction_args (line 460) | def get_instruction_args(self):
    method get_instruction_args_keys (line 464) | def get_instruction_args_keys(self):
    method check_following (line 468) | def check_following(self, value):
  class SectionChecker (line 492) | class SectionChecker(Instruction):
    method build_description (line 495) | def build_description(self, *, section_spliter=None, num_sections=None):
    method get_instruction_args (line 531) | def get_instruction_args(self):
    method get_instruction_args_keys (line 538) | def get_instruction_args_keys(self):
    method check_following (line 542) | def check_following(self, value):
  class ParagraphChecker (line 561) | class ParagraphChecker(Instruction):
    method build_description (line 564) | def build_description(self, *, num_paragraphs=None):
    method get_instruction_args (line 584) | def get_instruction_args(self):
    method get_instruction_args_keys (line 588) | def get_instruction_args_keys(self):
    method check_following (line 592) | def check_following(self, value):
  class PostscriptChecker (line 616) | class PostscriptChecker(Instruction):
    method build_description (line 619) | def build_description(self, *, postscript_marker=None):
    method get_instruction_args (line 644) | def get_instruction_args(self):
    method get_instruction_args_keys (line 648) | def get_instruction_args_keys(self):
    method check_following (line 652) | def check_following(self, value):
  class RephraseChecker (line 674) | class RephraseChecker(Instruction):
    method build_description (line 677) | def build_description(self, *, original_message):
    method get_instruction_args (line 703) | def get_instruction_args(self):
    method get_instruction_args_keys (line 707) | def get_instruction_args_keys(self):
    method check_following (line 711) | def check_following(self, value):
    method is_change (line 733) | def is_change(self, response):
    method strip_changes (line 737) | def strip_changes(self, response):
  class KeywordChecker (line 742) | class KeywordChecker(Instruction):
    method build_description (line 745) | def build_description(self, *, keywords=None):
    method get_instruction_args (line 768) | def get_instruction_args(self):
    method get_instruction_args_keys (line 772) | def get_instruction_args_keys(self):
    method check_following (line 776) | def check_following(self, value):
  class KeywordFrequencyChecker (line 784) | class KeywordFrequencyChecker(Instruction):
    method build_description (line 787) | def build_description(self, *, keyword=None, frequency=None, relation=...
    method get_instruction_args (line 833) | def get_instruction_args(self):
    method get_instruction_args_keys (line 841) | def get_instruction_args_keys(self):
    method check_following (line 845) | def check_following(self, value):
  class NumberOfWords (line 855) | class NumberOfWords(Instruction):
    method build_description (line 858) | def build_description(self, *, num_words=None, relation=None):
    method get_instruction_args (line 896) | def get_instruction_args(self):
    method get_instruction_args_keys (line 900) | def get_instruction_args_keys(self):
    method check_following (line 904) | def check_following(self, value):
  class JsonFormat (line 914) | class JsonFormat(Instruction):
    method build_description (line 917) | def build_description(self):
    method get_instruction_args (line 924) | def get_instruction_args(self):
    method get_instruction_args_keys (line 928) | def get_instruction_args_keys(self):
    method check_following (line 932) | def check_following(self, value):
  class ParagraphFirstWordCheck (line 949) | class ParagraphFirstWordCheck(Instruction):
    method build_description (line 952) | def build_description(
    method get_instruction_args (line 998) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1006) | def get_instruction_args_keys(self):
    method check_following (line 1010) | def check_following(self, value):
  class KeySentenceChecker (line 1056) | class KeySentenceChecker(Instruction):
    method build_description (line 1059) | def build_description(self, key_sentences=None, num_sentences=None):
    method get_instruction_args (line 1091) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1098) | def get_instruction_args_keys(self):
    method check_following (line 1102) | def check_following(self, value):
  class ForbiddenWords (line 1113) | class ForbiddenWords(Instruction):
    method build_description (line 1116) | def build_description(self, forbidden_words=None):
    method get_instruction_args (line 1140) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1144) | def get_instruction_args_keys(self):
    method check_following (line 1148) | def check_following(self, value):
  class RephraseParagraph (line 1156) | class RephraseParagraph(Instruction):
    method build_description (line 1159) | def build_description(self, *, original_paragraph, low, high):
    method get_instruction_args (line 1190) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1198) | def get_instruction_args_keys(self):
    method check_following (line 1202) | def check_following(self, value):
  class TwoResponsesChecker (line 1216) | class TwoResponsesChecker(Instruction):
    method build_description (line 1219) | def build_description(self):
    method get_instruction_args (line 1227) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1231) | def get_instruction_args_keys(self):
    method check_following (line 1235) | def check_following(self, value):
  class RepeatPromptThenAnswer (line 1258) | class RepeatPromptThenAnswer(Instruction):
    method build_description (line 1261) | def build_description(self, *, prompt_to_repeat=None):
    method get_instruction_args (line 1282) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1285) | def get_instruction_args_keys(self):
    method check_following (line 1289) | def check_following(self, value):
  class EndChecker (line 1295) | class EndChecker(Instruction):
    method build_description (line 1298) | def build_description(self, *, end_phrase=None):
    method get_instruction_args (line 1318) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1321) | def get_instruction_args_keys(self):
    method check_following (line 1325) | def check_following(self, value):
  class TitleChecker (line 1332) | class TitleChecker(Instruction):
    method build_description (line 1335) | def build_description(self):
    method get_instruction_args (line 1343) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1346) | def get_instruction_args_keys(self):
    method check_following (line 1350) | def check_following(self, value):
  class LetterFrequencyChecker (line 1362) | class LetterFrequencyChecker(Instruction):
    method build_description (line 1365) | def build_description(self, *, letter=None, let_frequency=None, let_re...
    method get_instruction_args (line 1417) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1425) | def get_instruction_args_keys(self):
    method check_following (line 1429) | def check_following(self, value):
  class CapitalLettersEnglishChecker (line 1440) | class CapitalLettersEnglishChecker(Instruction):
    method build_description (line 1443) | def build_description(self):
    method get_instruction_args (line 1450) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1453) | def get_instruction_args_keys(self):
    method check_following (line 1457) | def check_following(self, value):
  class LowercaseLettersEnglishChecker (line 1471) | class LowercaseLettersEnglishChecker(Instruction):
    method build_description (line 1474) | def build_description(self):
    method get_instruction_args (line 1482) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1485) | def get_instruction_args_keys(self):
    method check_following (line 1489) | def check_following(self, value):
  class CommaChecker (line 1503) | class CommaChecker(Instruction):
    method build_description (line 1506) | def build_description(self):
    method get_instruction_args (line 1513) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1516) | def get_instruction_args_keys(self):
    method check_following (line 1520) | def check_following(self, value):
  class CapitalWordFrequencyChecker (line 1525) | class CapitalWordFrequencyChecker(Instruction):
    method build_description (line 1528) | def build_description(
    method get_instruction_args (line 1566) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1573) | def get_instruction_args_keys(self):
    method check_following (line 1577) | def check_following(self, value):
  class QuotationChecker (line 1591) | class QuotationChecker(Instruction):
    method build_description (line 1594) | def build_description(self):
    method get_instruction_args (line 1601) | def get_instruction_args(self):
    method get_instruction_args_keys (line 1605) | def get_instruction_args_keys(self):
    method check_following (line 1609) | def check_following(self, value):

FILE: lm_eval/tasks/leaderboard/ifeval/instructions_registry.py
  function conflict_make (line 153) | def conflict_make(conflicts):

FILE: lm_eval/tasks/leaderboard/ifeval/instructions_util.py
  function download_nltk_resources (line 34) | def download_nltk_resources():
  function split_into_sentences (line 1623) | def split_into_sentences(text):
  function count_words (line 1674) | def count_words(text):
  function _get_sentence_tokenizer (line 1683) | def _get_sentence_tokenizer():
  function count_sentences (line 1687) | def count_sentences(text):
  function generate_keywords (line 1694) | def generate_keywords(num_keywords):

FILE: lm_eval/tasks/leaderboard/ifeval/utils.py
  class InputExample (line 8) | class InputExample:
  class OutputExample (line 16) | class OutputExample:
  function test_instruction_following_strict (line 24) | def test_instruction_following_strict(
  function test_instruction_following_loose (line 57) | def test_instruction_following_loose(
  function process_results (line 111) | def process_results(doc, results):
  function agg_inst_level_acc (line 131) | def agg_inst_level_acc(items):

FILE: lm_eval/tasks/leaderboard/math/utils.py
  function doc_to_text (line 26) | def doc_to_text(doc: dict) -> str:
  function process_docs (line 30) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  function list_fewshot_samples (line 44) | def list_fewshot_samples() -> list[dict]:
  function process_results (line 73) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
  function process_result_v1 (line 94) | def process_result_v1(doc: dict, candidates: str) -> int:
  function last_boxed_only_string (line 108) | def last_boxed_only_string(string: str) -> str:
  function remove_boxed (line 138) | def remove_boxed(s: str) -> str:
  class timeout (line 154) | class timeout:
    method __init__ (line 155) | def __init__(self, seconds=1, error_message="Timeout"):
    method handle_timeout (line 159) | def handle_timeout(self, signum, frame):
    method __enter__ (line 162) | def __enter__(self):
    method __exit__ (line 166) | def __exit__(self, type, value, traceback):
  function is_equiv (line 170) | def is_equiv(x1: str, x2: str) -> bool:
  function get_unnormalized_answer (line 214) | def get_unnormalized_answer(text: str) -> str:
  function normalize_final_answer (line 285) | def normalize_final_answer(final_answer: str) -> str:

FILE: lm_eval/tasks/leaderboard/mmlu_pro/utils.py
  function doc_to_text (line 4) | def doc_to_text(doc):
  function doc_to_choice (line 14) | def doc_to_choice(doc):

FILE: lm_eval/tasks/leaderboard/musr/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):
  function doc_to_text (line 14) | def doc_to_text(doc):

FILE: lm_eval/tasks/libra/utils.py
  class PredictionResult (line 20) | class PredictionResult:
  function filter_dataset_by_page_lengths (line 26) | def filter_dataset_by_page_lengths(*args, **kwargs) -> Dict[str, dataset...
  function normalize_answer (line 57) | def normalize_answer(sentence: str) -> str:
  function process_results (line 81) | def process_results(doc: List, results: List[str]) -> Dict:
  function exact_match_score (line 100) | def exact_match_score(prediction: str, ground_truth: str) -> float:
  function f1_score (line 107) | def f1_score(prediction: str, ground_truth: str) -> float:
  function count_score (line 118) | def count_score(prediction: str, ground_truth: str) -> float:
  function aggregate_results (line 128) | def aggregate_results(
  function aggregate_results_em (line 161) | def aggregate_results_em(results: List[PredictionResult]) -> Dict[str, f...
  function aggregate_results_f1 (line 165) | def aggregate_results_f1(results: List[PredictionResult]) -> Dict[str, f...
  function aggregate_results_count_score (line 169) | def aggregate_results_count_score(results: List[PredictionResult]) -> Di...

FILE: lm_eval/tasks/lingoly/script.py
  function clean_answer (line 6) | def clean_answer(answer: str):
  function safe_exact (line 31) | def safe_exact(references: list[str], predictions: list[str]):
  function parse_str_list_score (line 42) | def parse_str_list_score(model, correct, scoring_func):
  function exact_match (line 91) | def exact_match(references: list[str], predictions: list[str]):
  function aggregate_scores (line 124) | def aggregate_scores(input):
  function aggregate_metrics (line 128) | def aggregate_metrics(

FILE: lm_eval/tasks/lingoly/utils.py
  function load_questionsheet (line 6) | def load_questionsheet(qsheet: dict, no_context: bool = False):
  function format_answers (line 31) | def format_answers(questionpart_ns: list[str], answers: list[str]):
  function load_question (line 43) | def load_question(
  function load_all_questions (line 77) | def load_all_questions(

FILE: lm_eval/tasks/llama3/instruct/arc_challenge/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/llama3/instruct/mmlu_de/utils.py
  function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset:

FILE: lm_eval/tasks/llama3/instruct/mmlu_es/utils.py
  function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset:

FILE: lm_eval/tasks/llama3/instruct/mmlu_fr/utils.py
  function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset:

FILE: lm_eval/tasks/llama3/instruct/mmlu_hi/utils.py
  function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset:

FILE: lm_eval/tasks/llama3/instruct/mmlu_it/utils.py
  function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset:

FILE: lm_eval/tasks/llama3/instruct/mmlu_pro/utils.py
  function process_docs (line 5) | def process_docs(dataset, subject):
  function fewshot_to_text (line 9) | def fewshot_to_text(example):

FILE: lm_eval/tasks/llama3/instruct/mmlu_pt/utils.py
  function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset:

FILE: lm_eval/tasks/llama3/instruct/mmlu_th/utils.py
  function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset:

FILE: lm_eval/tasks/logiqa/utils_logiqa.py
  function doc_to_text (line 2) | def doc_to_text(doc) -> str:
  function doc_to_target (line 22) | def doc_to_target(doc) -> int:

FILE: lm_eval/tasks/logiqa2/utils_logiqa2.py
  function doc_to_text (line 2) | def doc_to_text(doc) -> str:

FILE: lm_eval/tasks/longbench/_generate_config.py
  function parse_args (line 139) | def parse_args():

FILE: lm_eval/tasks/longbench/metrics.py
  function normalize_answer (line 40) | def normalize_answer(s: str) -> str:
  function normalize_zh_answer (line 59) | def normalize_zh_answer(s: str) -> str:
  function count_score (line 76) | def count_score(prediction: str, ground_truth: str, **kwargs):
  function get_count_score (line 86) | def get_count_score(doc: dict, results: list[str], **kwargs):
  function retrieval_score (line 95) | def retrieval_score(prediction: str, ground_truth: str, **kwargs):
  function get_retrieval_score (line 108) | def get_retrieval_score(doc: dict, results: list[str], **kwargs):
  function retrieval_zh_score (line 117) | def retrieval_zh_score(prediction: str, ground_truth: str, **kwargs):
  function get_retrieval_zh_score (line 130) | def get_retrieval_zh_score(doc: dict, results: list[str], **kwargs):
  function code_sim_score (line 139) | def code_sim_score(prediction: str, ground_truth: str, **kwargs):
  function get_code_sim_score (line 149) | def get_code_sim_score(doc: dict, results: list[str], **kwargs):
  function classification_score (line 158) | def classification_score(prediction: str, ground_truth: str, **kwargs):
  function get_classification_score (line 174) | def get_classification_score(doc: dict, results: list[str]) -> dict:
  function rouge_score (line 185) | def rouge_score(predictions: str, ground_truth: str, **kwargs) -> float:
  function get_rouge_score (line 197) | def get_rouge_score(doc: dict, results: list[str], **kwargs):
  function rouge_zh_score (line 206) | def rouge_zh_score(prediction: str, ground_truth: str, **kwargs):
  function get_rouge_zh_score (line 213) | def get_rouge_zh_score(doc, results, **kwargs):
  function f1_score (line 222) | def f1_score(prediction: Union[str, list], ground_truth: Union[str, list...
  function get_f1_score (line 233) | def get_f1_score(doc: dict, results: list[str], **kwargs):
  function qa_f1_score (line 242) | def qa_f1_score(prediction: str, ground_truth: str, **kwargs):
  function qa_f1_zh_score (line 251) | def qa_f1_zh_score(prediction: str, ground_truth: str, **kwargs):
  function get_qa_f1_score (line 261) | def get_qa_f1_score(doc: dict, results: list[str], **kwargs):
  function get_qa_f1_zh_score (line 270) | def get_qa_f1_zh_score(doc: dict, results: list[str], **kwargs):
  function get_qa_f1_with_score (line 284) | def get_qa_f1_with_score(doc: dict, results: list[str], **kwargs):
  function get_qa_f1_zh_with_score (line 290) | def get_qa_f1_zh_with_score(doc: dict, results: list[str], **kwargs):
  function get_rouge_with_score (line 296) | def get_rouge_with_score(doc: dict, results: list[str], **kwargs):
  function get_rouge_zh_with_score (line 302) | def get_rouge_zh_with_score(doc: dict, results: list[str], **kwargs):
  function get_classification_with_score (line 308) | def get_classification_with_score(doc: dict, results: list[str], **kwargs):
  function get_count_with_score (line 314) | def get_count_with_score(doc: dict, results: list[str], **kwargs):
  function get_retrieval_with_score (line 320) | def get_retrieval_with_score(doc: dict, results: list[str], **kwargs):
  function get_retrieval_zh_with_score (line 326) | def get_retrieval_zh_with_score(doc: dict, results: list[str], **kwargs):
  function get_code_sim_with_score (line 332) | def get_code_sim_with_score(doc: dict, results: list[str], **kwargs):

FILE: lm_eval/tasks/longbench/utils.py
  function scorer_e (line 50) | def scorer_e(dataset, predictions, answers, lengths, all_classes):
  function scorer (line 74) | def scorer(dataset, predictions, answers, all_classes):

FILE: lm_eval/tasks/manager.py
  class TaskDict (line 23) | class TaskDict(TypedDict):
  class TaskManager (line 37) | class TaskManager:
    method __init__ (line 53) | def __init__(
    method all_tasks (line 109) | def all_tasks(self) -> list[str]:
    method all_groups (line 114) | def all_groups(self) -> list[str]:
    method all_subtasks (line 119) | def all_subtasks(self) -> list[str]:
    method all_tags (line 124) | def all_tags(self) -> list[str]:
    method task_index (line 129) | def task_index(self) -> dict[str, Entry]:
    method _entry (line 134) | def _entry(self, name: str) -> Entry | None:
    method _load_spec (line 138) | def _load_spec(self, spec: str | dict[str, Any]) -> Task | Group | lis...
    method load (line 179) | def load(
    method load_task_or_group (line 242) | def load_task_or_group(self, task_list: str | list[str | dict]) -> dict:
    method _check_duplicates (line 283) | def _check_duplicates(built: list[Task | Group]) -> None:
    method match_tasks (line 309) | def match_tasks(self, task_list: list[str]) -> list[str]:
    method list_all_tasks (line 313) | def list_all_tasks(

FILE: lm_eval/tasks/mathqa/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc):

FILE: lm_eval/tasks/mbpp/utils.py
  function pass_at_1 (line 18) | def pass_at_1(
  function extract_code_blocks (line 32) | def extract_code_blocks(text: str) -> str:
  function build_predictions (line 47) | def build_predictions(resps: list[list[str]], docs: list[dict]) -> list[...
  function list_fewshot_samples (line 51) | def list_fewshot_samples():

FILE: lm_eval/tasks/med_concepts_qa/_generate_configs.py
  function generate_yaml_content (line 6) | def generate_yaml_content(vocab_name: str, level: str):
  function generate_yaml_files (line 17) | def generate_yaml_files(

FILE: lm_eval/tasks/med_prescriptions/utils.py
  function get_full_med_list (line 2048) | def get_full_med_list():
  function process_docs (line 2052) | def process_docs(dataset: datasets.Dataset):
  function contains_indian_characters (line 2088) | def contains_indian_characters(text):
  function check_list_for_indian_characters (line 2110) | def check_list_for_indian_characters(string_list):
  function doc_to_text_easy (line 2118) | def doc_to_text_easy(doc) -> str:
  function doc_to_text_hard (line 2134) | def doc_to_text_hard(doc) -> str:
  function get_diagnosis (line 2150) | def get_diagnosis(doc):
  function get_medicines_list (line 2178) | def get_medicines_list(doc):
  function doc_to_target (line 2250) | def doc_to_target(doc):
  function doc_to_target_obtain (line 2254) | def doc_to_target_obtain(doc):
  function doc_to_choice_easy (line 2263) | def doc_to_choice_easy(doc):
  function doc_to_choice_hard (line 2271) | def doc_to_choice_hard(doc):

FILE: lm_eval/tasks/med_text_classification/utils.py
  function process_docs_hard (line 6) | def process_docs_hard(dataset: datasets.Dataset):
  function process_docs (line 10) | def process_docs(dataset: datasets.Dataset):
  function doc_to_choice_easy (line 23) | def doc_to_choice_easy(doc):
  function doc_to_text_easy (line 33) | def doc_to_text_easy(doc) -> str:
  function doc_to_target_easy (line 51) | def doc_to_target_easy(doc):
  function doc_to_text_hard (line 55) | def doc_to_text_hard(doc) -> str:
  function doc_to_choice_hard (line 67) | def doc_to_choice_hard(doc):
  function doc_to_target_hard (line 113) | def doc_to_target_hard(doc):

FILE: lm_eval/tasks/meddialog/utils.py
  function doc_eval (line 24) | def doc_eval(pred, refs):
  function doc_to_text_raw (line 67) | def doc_to_text_raw(doc) -> str:
  function doc_to_target_raw (line 71) | def doc_to_target_raw(doc) -> str:
  function process_results_gen_raw (line 75) | def process_results_gen_raw(doc, results):
  function doc_to_text_qsumm (line 100) | def doc_to_text_qsumm(doc) -> str:
  function doc_to_target_qsumm (line 104) | def doc_to_target_qsumm(doc) -> str:
  function process_results_gen_qsumm (line 108) | def process_results_gen_qsumm(doc, results):

FILE: lm_eval/tasks/meddialog/utils_perplexity.py
  function process_results_qsumm (line 6) | def process_results_qsumm(doc, results):
  function process_results_raw (line 17) | def process_results_raw(doc, results):

FILE: lm_eval/tasks/mediqa_qa2019/utils.py
  function doc_eval (line 24) | def doc_eval(pred, refs):
  function doc_to_text (line 67) | def doc_to_text(doc) -> str:
  function doc_to_target (line 71) | def doc_to_target(doc) -> str:
  function process_results_gen (line 75) | def process_results_gen(doc, results):

FILE: lm_eval/tasks/mediqa_qa2019/utils_perplexity.py
  function doc_to_target (line 5) | def doc_to_target(doc) -> str:
  function process_results (line 9) | def process_results(doc, results):

FILE: lm_eval/tasks/medmcqa/utils_medmcqa.py
  function doc_to_text (line 2) | def doc_to_text(doc) -> str:

FILE: lm_eval/tasks/medqa/preprocess_medqa.py
  function doc_to_text (line 1) | def doc_to_text(doc) -> str:
  function doc_to_target (line 12) | def doc_to_target(doc) -> int:

FILE: lm_eval/tasks/medtext/utils.py
  function doc_eval (line 24) | def doc_eval(pred, refs):
  function doc_to_text (line 67) | def doc_to_text(doc) -> str:
  function doc_to_target (line 71) | def doc_to_target(doc) -> str:
  function process_results (line 75) | def process_results(doc, results):

FILE: lm_eval/tasks/medtext/utils_perplexity.py
  function process_results (line 6) | def process_results(doc, results):

FILE: lm_eval/tasks/meqsum/utils.py
  function doc_to_text (line 24) | def doc_to_text(doc) -> str:
  function doc_to_target (line 33) | def doc_to_target(doc) -> str:
  function process_results_gen (line 37) | def process_results_gen(doc, results):

FILE: lm_eval/tasks/metabench/process_docs.py
  function hash_string (line 7) | def hash_string(string: str) -> str:
  function process_arc (line 11) | def process_arc(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_gsm8k (line 33) | def process_gsm8k(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_hellaswag (line 52) | def process_hellaswag(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_mmlu (line 109) | def process_mmlu(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_truthfulqa (line 136) | def process_truthfulqa(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_winogrande (line 144) | def process_winogrande(dataset: datasets.Dataset) -> datasets.Dataset:
  function winogrande_doc_to_text (line 173) | def winogrande_doc_to_text(doc):  # Mirrored from the winogrande task
  function winogrande_doc_to_target (line 178) | def winogrande_doc_to_target(doc):  # Mirrored from the winogrande task
  function winogrande_doc_to_choice (line 183) | def winogrande_doc_to_choice(doc):  # Mirrored from the winogrande task

FILE: lm_eval/tasks/metabench/process_docs_permute.py
  function hash_string (line 8) | def hash_string(string: str) -> str:
  function process_arc (line 12) | def process_arc(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_hellaswag (line 44) | def process_hellaswag(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_mmlu (line 109) | def process_mmlu(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_truthfulqa (line 147) | def process_truthfulqa(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_winogrande (line 170) | def process_winogrande(dataset: datasets.Dataset) -> datasets.Dataset:
  function winogrande_doc_to_text (line 213) | def winogrande_doc_to_text(doc):  # Mirrored from the winogrande task
  function winogrande_doc_to_target (line 218) | def winogrande_doc_to_target(doc):  # Mirrored from the winogrande task
  function winogrande_doc_to_choice (line 223) | def winogrande_doc_to_choice(doc):  # Mirrored from the winogrande task

FILE: lm_eval/tasks/mgsm/utils.py
  function add_regex_pattern (line 97) | def add_regex_pattern(regex_pattern):
  function gen_lang_yamls (line 131) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
  function main (line 204) | def main() -> None:

FILE: lm_eval/tasks/mimic_repsum/utils.py
  function doc_eval (line 28) | def doc_eval(pred, refs):
  function doc_to_text (line 74) | def doc_to_text(doc) -> str:
  function doc_to_target (line 101) | def doc_to_target(doc) -> str:
  function is_non_str_iterable (line 123) | def is_non_str_iterable(obj):
  function process_results (line 127) | def process_results(doc, results):

FILE: lm_eval/tasks/mimic_repsum/utils_perplexity.py
  function process_results (line 6) | def process_results(doc, results):

FILE: lm_eval/tasks/minerva_math/utils.py
  function doc_to_text (line 29) | def doc_to_text(doc: dict) -> str:
  function process_docs (line 33) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  function list_fewshot_samples (line 49) | def list_fewshot_samples() -> list[dict]:
  function process_results (line 74) | def process_results(doc: dict, results: list[str]) -> dict[str, int]:
  function last_boxed_only_string (line 99) | def last_boxed_only_string(string: str) -> Optional[str]:
  function remove_boxed (line 129) | def remove_boxed(s: str) -> str:
  class timeout (line 143) | class timeout:
    method __init__ (line 144) | def __init__(self, seconds=1, error_message="Timeout"):
    method handle_timeout (line 148) | def handle_timeout(self, signum, frame):
    method __enter__ (line 151) | def __enter__(self):
    method __exit__ (line 155) | def __exit__(self, type, value, traceback):
  function is_equiv (line 159) | def is_equiv(x1: str, x2: str) -> bool:
  function get_unnormalized_answer (line 202) | def get_unnormalized_answer(text: str) -> str:
  function normalize_final_answer (line 274) | def normalize_final_answer(final_answer: str) -> str:

FILE: lm_eval/tasks/mlqa/generate_tasks.py
  function main (line 23) | def main() -> None:

FILE: lm_eval/tasks/mlqa/utils.py
  function whitespace_tokenize (line 24) | def whitespace_tokenize(text):
  function mixed_segmentation (line 28) | def mixed_segmentation(text):
  function normalize_answer (line 48) | def normalize_answer(s, lang):
  function f1_score (line 91) | def f1_score(prediction, ground_truth, lang):
  function exact_match_score (line 104) | def exact_match_score(prediction, ground_truth, lang):
  function metric_max_over_ground_truths (line 108) | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths, ...
  function process_docs (line 116) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_results_lang (line 129) | def process_results_lang(doc, results, lang):
  function process_results_en (line 140) | def process_results_en(doc, results):
  function process_results_es (line 144) | def process_results_es(doc, results):
  function process_results_hi (line 148) | def process_results_hi(doc, results):
  function process_results_vi (line 152) | def process_results_vi(doc, results):
  function process_results_de (line 156) | def process_results_de(doc, results):
  function process_results_ar (line 160) | def process_results_ar(doc, results):
  function process_results_zh (line 164) | def process_results_zh(doc, results):

FILE: lm_eval/tasks/mmlu-pro-plus/utils.py
  function format_cot_example (line 24) | def format_cot_example(example, including_answer=True):
  function process_docs (line 46) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu/_generate_configs.py
  function parse_args (line 78) | def parse_args():

FILE: lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py
  class MultiChoiceRegexFilter (line 8) | class MultiChoiceRegexFilter(RegexFilter):
    method __init__ (line 11) | def __init__(
    method apply (line 34) | def apply(self, resps, docs):

FILE: lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py
  class MultiChoiceRegexFilter (line 8) | class MultiChoiceRegexFilter(RegexFilter):
    method __init__ (line 11) | def __init__(
    method apply (line 34) | def apply(self, resps, docs):

FILE: lm_eval/tasks/mmlu_pro/utils.py
  function format_cot_example (line 7) | def format_cot_example(example, including_answer=True):
  function process_docs (line 34) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/af/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/ar/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/bn/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/cs/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/de/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/en/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/es/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/fr/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/hi/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/hu/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/id/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/it/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/ja/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/ko/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/mr/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/ne/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/pt/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/ru/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/sr/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/sw/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/te/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/template/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/th/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/uk/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/ur/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/vi/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/wo/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/yo/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/zh/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlu_prox/zu/utils.py
  function format_cot_example (line 32) | def format_cot_example(example, including_answer=True):
  function process_docs (line 53) | def process_docs(dataset, subject):

FILE: lm_eval/tasks/mmlusr/answer_only/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/mmlusr/config.py
  function parse_args (line 79) | def parse_args():

FILE: lm_eval/tasks/mmlusr/question_and_answer/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/mmlusr/question_only/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/mmmu/utils.py
  function doc_to_image (line 26) | def doc_to_image(doc):
  function doc_to_text (line 42) | def doc_to_text(doc):
  function _doc_to_text (line 54) | def _doc_to_text(doc):
  function process_results (line 76) | def process_results(doc, results):
  function parse_multi_choice_response (line 105) | def parse_multi_choice_response(response, all_choices, index2ans):
  function check_is_number (line 163) | def check_is_number(string):
  function normalize_str (line 175) | def normalize_str(string):
  function extract_numbers (line 200) | def extract_numbers(string):
  function parse_open_response (line 223) | def parse_open_response(response):
  function eval_multi_choice (line 299) | def eval_multi_choice(gold_i, pred_i):
  function eval_open (line 316) | def eval_open(gold_i, pred_i):

FILE: lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py
  function main (line 6) | def main() -> None:

FILE: lm_eval/tasks/model_written_evals/persona/_generate_configs.py
  function main (line 6) | def main() -> None:

FILE: lm_eval/tasks/moral_stories/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/mts_dialog/utils.py
  function doc_eval (line 24) | def doc_eval(pred, refs):
  function doc_to_text (line 67) | def doc_to_text(doc) -> str:
  function doc_to_target (line 71) | def doc_to_target(doc) -> str:
  function process_results (line 75) | def process_results(doc, results):

FILE: lm_eval/tasks/mts_dialog/utils_perplexity.py
  function process_results (line 6) | def process_results(doc, results):

FILE: lm_eval/tasks/mutual/utils.py
  function process_docs (line 4) | def process_docs(dataset):
  function process_results (line 30) | def process_results(doc, results):

FILE: lm_eval/tasks/noreval/ask_gec/errant.py
  function parse_args (line 9) | def parse_args():
  function read_examples (line 30) | def read_examples(fpath: str):
  function save_results (line 47) | def save_results(fpath: str, obj: dict):
  function evaluate (line 58) | def evaluate(fpath: str, out_fpath: str):
  function main (line 93) | def main():

FILE: lm_eval/tasks/noreval/norec/utils.py
  function multi_f1 (line 5) | def multi_f1(items):

FILE: lm_eval/tasks/noreval/noridiom/utils.py
  function normalize (line 7) | def normalize(text):
  function f1 (line 12) | def f1(prediction, completion):
  function process_results (line 27) | def process_results(doc, results):
  function filter_dataset_nb (line 39) | def filter_dataset_nb(dataset):
  function filter_dataset_nn (line 43) | def filter_dataset_nn(dataset):

FILE: lm_eval/tasks/noreval/noropenbookqa/utils.py
  function filter_dataset (line 4) | def filter_dataset(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/noreval/norquad/utils.py
  function process_results (line 5) | def process_results(doc, results):
  function process_docs (line 13) | def process_docs(dataset: datasets.Dataset):
  function p0 (line 23) | def p0(doc):
  function p1 (line 31) | def p1(doc):
  function p2 (line 39) | def p2(doc):
  function p3 (line 49) | def p3(doc):
  function p4 (line 57) | def p4(doc):

FILE: lm_eval/tasks/noreval/norsumm/utils.py
  function process_results (line 20) | def process_results(doc, results):
  function bleu (line 50) | def bleu(refs, preds):
  function rouge (line 73) | def rouge(refs, preds):
  function bertscore_f1 (line 107) | def bertscore_f1(references, predictions):

FILE: lm_eval/tasks/noreval/nortruthfulqa/generation/utils.py
  function preprocess_function (line 19) | def preprocess_function(examples):
  function process_docs (line 41) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_results (line 45) | def process_results(doc, results):
  function bleu (line 98) | def bleu(refs, preds):
  function rouge (line 121) | def rouge(refs, preds):

FILE: lm_eval/tasks/noreval/nortruthfulqa/multiple_choice/nno/utils.py
  function p0_nn (line 1) | def p0_nn(doc):
  function p1_nn (line 6) | def p1_nn(doc):
  function p2_nn (line 14) | def p2_nn(doc):
  function p3_nn (line 22) | def p3_nn(doc):
  function p4_nn (line 30) | def p4_nn(doc):

FILE: lm_eval/tasks/noreval/nortruthfulqa/multiple_choice/nob/utils.py
  function p0_nb (line 1) | def p0_nb(doc):
  function p1_nb (line 6) | def p1_nb(doc):
  function p2_nb (line 14) | def p2_nb(doc):
  function p3_nb (line 22) | def p3_nb(doc):
  function p4_nb (line 30) | def p4_nb(doc):

FILE: lm_eval/tasks/noreval/nrk_quiz_qa/nno/utils.py
  function p0_nn (line 1) | def p0_nn(doc):
  function p1_nn (line 6) | def p1_nn(doc):
  function p2_nn (line 12) | def p2_nn(doc):
  function p3_nn (line 30) | def p3_nn(doc):
  function p4_nn (line 41) | def p4_nn(doc):

FILE: lm_eval/tasks/noreval/nrk_quiz_qa/nob/utils.py
  function p0_nb (line 1) | def p0_nb(doc):
  function p1_nb (line 6) | def p1_nb(doc):
  function p2_nb (line 12) | def p2_nb(doc):
  function p3_nb (line 32) | def p3_nb(doc):
  function p4_nb (line 43) | def p4_nb(doc):

FILE: lm_eval/tasks/noticia/utils.py
  function clean_text (line 6) | def clean_text(text: str) -> str:
  function rouge1 (line 20) | def rouge1(items):
  function average_len (line 27) | def average_len(items):
  function rouge1_agg (line 34) | def rouge1_agg(items):
  function average_len_agg (line 48) | def average_len_agg(items):

FILE: lm_eval/tasks/okapi/arc_multilingual/utils.py
  function preprocess (line 6) | def preprocess(text):
  function process_docs (line 14) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/okapi/hellaswag_multilingual/utils.py
  function preprocess (line 6) | def preprocess(text):
  function process_docs (line 15) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/okapi/mmlu_multilingual/_generate_configs.py
  function main (line 6) | def main() -> None:

FILE: lm_eval/tasks/okapi/truthfulqa_multilingual/utils.py
  function preprocess (line 23) | def preprocess(text):
  function process_docs (line 33) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  function process_results_mc2 (line 48) | def process_results_mc2(doc, results):

FILE: lm_eval/tasks/olaph/utils.py
  function doc_eval (line 25) | def doc_eval(pred, refs):
  function doc_to_text (line 68) | def doc_to_text(doc) -> str:
  function doc_to_target (line 72) | def doc_to_target(doc) -> str:
  function process_docs (line 76) | def process_docs(dataset: datasets.Dataset):
  function process_results (line 89) | def process_results(doc, results):

FILE: lm_eval/tasks/olaph/utils_perplexity.py
  function process_results (line 6) | def process_results(doc, results):

FILE: lm_eval/tasks/openai-mmmlu/_generate_configs.py
  function load_json (line 31) | def load_json(path: Path):
  function description_for (line 36) | def description_for(subject: str, display_name: str) -> str:
  function subject_alias (line 44) | def subject_alias(subject: str, display_name: str) -> str:
  function quote (line 48) | def quote(value: str) -> str:
  function write_file (line 57) | def write_file(path: Path, content: str) -> None:
  function subject_yaml (line 62) | def subject_yaml(base_yaml: str, language: dict, subject: str, category:...
  function category_yaml (line 78) | def category_yaml(language: dict, category: str) -> str:
  function language_group_yaml (line 91) | def language_group_yaml(language: dict, categories: Iterable[str]) -> str:
  function master_group_yaml (line 102) | def master_group_yaml(language_groups: Iterable[str]) -> str:
  function parse_args (line 112) | def parse_args():

FILE: lm_eval/tasks/openai-mmmlu/default/utils.py
  function _normalize_subject_name (line 10) | def _normalize_subject_name(name: str) -> str:
  function _filter_subject (line 25) | def _filter_subject(dataset, subject):
  function _register_subject_filters (line 35) | def _register_subject_filters():

FILE: lm_eval/tasks/paloma/paloma_utils.py
  function doc_to_target (line 1) | def doc_to_target(doc):

FILE: lm_eval/tasks/paws-x/_generate_config.py
  function gen_lang_yamls (line 49) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
  function main (line 91) | def main() -> None:

FILE: lm_eval/tasks/paws-x/utils.py
  function general_detokenize (line 4) | def general_detokenize(string):
  function lowercase_first_letter (line 14) | def lowercase_first_letter(text):
  function process_docs_paraphrases (line 18) | def process_docs_paraphrases(dataset):

FILE: lm_eval/tasks/pisa/utils.py
  function replace_images_tokens (line 31) | def replace_images_tokens(input_string):
  function parse_options (line 40) | def parse_options(options):
  function construct_prompt (line 51) | def construct_prompt(doc, mc_prompt=""):
  function pisa_doc_to_text (line 58) | def pisa_doc_to_text(doc):
  function pisa_doc_to_visual (line 63) | def pisa_doc_to_visual(doc):
  function pisa_process_results (line 70) | def pisa_process_results(doc, results, **kwargs):
  function pisa_process_results_llm_judged (line 85) | def pisa_process_results_llm_judged(doc, results, **kwargs):
  function eval_multi_choice (line 114) | def eval_multi_choice(gold_i, pred_i):
  function eval_open (line 129) | def eval_open(gold_i, pred_i):
  function parse_multi_choice_response (line 158) | def parse_multi_choice_response(response, all_choices, index2ans):
  function extract_numbers (line 219) | def extract_numbers(string):
  function check_is_number (line 242) | def check_is_number(string):
  function normalize_str (line 251) | def normalize_str(string):
  function get_multi_choice_info (line 274) | def get_multi_choice_info(options):
  function build_user_prompt (line 291) | def build_user_prompt(student_answer: str, options: List[str], correct: ...
  function judge_mcq (line 312) | def judge_mcq(pred: str, options: List[str], correct: str) -> int:

FILE: lm_eval/tasks/portuguese_bench/flores_pt/create_yamls_flores_pt.py
  function doc_to_text (line 257) | def doc_to_text(src: str, tgt: str) -> str:
  function doc_to_target (line 265) | def doc_to_target(tgt: str) -> str:
  function gen_lang_yamls (line 272) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
  function main (line 315) | def main() -> None:

FILE: lm_eval/tasks/pubmedqa/preprocess_pubmedqa.py
  function doc_to_text (line 1) | def doc_to_text(doc) -> str:

FILE: lm_eval/tasks/qa4mre/preprocess_qa4mre.py
  function qa4mre_process (line 1) | def qa4mre_process(doc):
  function doc_to_target (line 5) | def doc_to_target(doc):

FILE: lm_eval/tasks/qasper/metrics.py
  function normalize_answer (line 6) | def normalize_answer(s):
  function f1_abstractive (line 28) | def f1_abstractive(predictions, references):

FILE: lm_eval/tasks/qasper/utils.py
  function process_docs (line 6) | def process_docs(dataset, set_answer_type="bool"):

FILE: lm_eval/tasks/race/preprocess_race.py
  function process_ast (line 4) | def process_ast(string):
  function last_problem (line 8) | def last_problem(doc):
  function get_answer_option (line 12) | def get_answer_option(problem):
  function doc_to_choice (line 18) | def doc_to_choice(doc):
  function doc_to_text (line 24) | def doc_to_text(doc):
  function doc_to_target (line 37) | def doc_to_target(doc):

FILE: lm_eval/tasks/realtoxicityprompts/metric.py
  function toxicity_perspective_api (line 12) | def toxicity_perspective_api(

FILE: lm_eval/tasks/ruler/common_utils.py
  function get_tokenizer (line 21) | def get_tokenizer(
  function postprocess_pred (line 30) | def postprocess_pred(prediction: list[str]) -> list[str]:
  function string_match_all (line 43) | def string_match_all(preds: list[str], refs: list[list[str]]) -> float:
  function string_match_part (line 53) | def string_match_part(preds: list[str], refs: list[list[str]]) -> float:
  function process_results (line 63) | def process_results(doc: dict, results: list[str]) -> dict[str, float]:
  function process_results_part (line 73) | def process_results_part(doc: dict, results: list[str]) -> dict[str, flo...
  function aggregate_metrics (line 83) | def aggregate_metrics(metrics: list[float]) -> float:

FILE: lm_eval/tasks/ruler/cwe_utils.py
  function get_example (line 43) | def get_example(num_words, common_repeats=30, uncommon_repeats=3, common...
  function generate_input_output (line 55) | def generate_input_output(
  function sys_word_pair_random (line 84) | def sys_word_pair_random(
  function get_dataset (line 168) | def get_dataset(pretrained, seq=None, **kwargs):
  function get_cw_dataset (line 176) | def get_cw_dataset(**kwargs):

FILE: lm_eval/tasks/ruler/essays.py
  function fetch_url (line 27) | async def fetch_url(client: httpx.AsyncClient, url: str) -> str:
  function process_html_essay (line 34) | async def process_html_essay(
  function process_text_essay (line 56) | async def process_text_essay(
  function get_essays (line 71) | async def get_essays() -> Dict[str, str]:
  function get_all_essays (line 121) | def get_all_essays() -> Dict[str, str]:

FILE: lm_eval/tasks/ruler/fwe_utils.py
  function generate_input_output (line 38) | def generate_input_output(
  function sys_kwext (line 85) | def sys_kwext(
  function get_dataset (line 145) | def get_dataset(pretrained, max_seq_length=None, **kwargs):
  function fwe_download (line 154) | def fwe_download(**kwargs):

FILE: lm_eval/tasks/ruler/niah_utils.py
  function download_dataset (line 15) | def download_dataset(df: Generator) -> dict[str, datasets.Dataset]:
  function niah_single_1 (line 23) | def niah_single_1(**kwargs):
  function niah_single_2 (line 40) | def niah_single_2(**kwargs):
  function niah_single_3 (line 57) | def niah_single_3(**kwargs):
  function niah_multikey_1 (line 74) | def niah_multikey_1(**kwargs):
  function niah_multikey_2 (line 92) | def niah_multikey_2(**kwargs):
  function niah_multikey_3 (line 109) | def niah_multikey_3(**kwargs):
  function niah_multivalue (line 126) | def niah_multivalue(**kwargs):
  function niah_multiquery (line 144) | def niah_multiquery(**kwargs):

FILE: lm_eval/tasks/ruler/prepare_niah.py
  function cached_sent_tokenize (line 65) | def cached_sent_tokenize(text: str) -> List[str]:
  function download_nltk_resources (line 69) | def download_nltk_resources():
  function generate_random_number (line 88) | def generate_random_number(num_digits=7) -> str:
  function generate_random_word (line 94) | def generate_random_word() -> str:
  function generate_random_uuid (line 99) | def generate_random_uuid() -> str:
  function generate_random (line 103) | def generate_random(type_needle: str) -> str:
  function generate_input_output (line 114) | def generate_input_output(
  function generate_samples (line 213) | def generate_samples(
  function get_haystack (line 327) | def get_haystack(

FILE: lm_eval/tasks/ruler/qa_utils.py
  function download_json (line 37) | def download_json(url) -> dict:
  function read_squad (line 45) | def read_squad(
  function read_hotpotqa (line 76) | def read_hotpotqa(
  function generate_input_output (line 99) | def generate_input_output(
  function generate_samples (line 134) | def generate_samples(
  function get_dataset (line 203) | def get_dataset(pretrained, docs, qas, max_seq_length=None, **kwargs) ->...
  function get_qa_dataset (line 216) | def get_qa_dataset(ds, **kwargs) -> dict[str, datasets.Dataset]:
  function get_squad (line 234) | def get_squad(**kwargs):
  function get_hotpotqa (line 238) | def get_hotpotqa(**kwargs):

FILE: lm_eval/tasks/ruler/vt_utils.py
  function generate_chains (line 45) | def generate_chains(
  function generate_input_output (line 70) | def generate_input_output(num_noises, num_chains, num_hops, is_icl=False):
  function randomize_icl (line 120) | def randomize_icl(icl_example: str) -> str:
  function sys_vartrack_w_noise_random (line 129) | def sys_vartrack_w_noise_random(
  function get_dataset (line 224) | def get_dataset(
  function get_vt_dataset (line 244) | def get_vt_dataset(**kwargs) -> dict[str, datasets.Dataset]:

FILE: lm_eval/tasks/score/agi_eval/utils_agieval.py
  function initial_process_docs (line 46) | def initial_process_docs(doc: Dataset) -> Dataset:
  function prompt_robustness_process_results (line 107) | def prompt_robustness_process_results(doc, results) -> Dict[str, float]:
  function option_order_robustness_process_results (line 121) | def option_order_robustness_process_results(doc, results) -> Dict[str, f...
  function non_greedy_robustness_process_results (line 148) | def non_greedy_robustness_process_results(doc, results) -> Dict[str, flo...
  function per_prompt_accuracy (line 159) | def per_prompt_accuracy(results: List[Dict[str, Any]], p_id=0) -> float:
  function per_option_accuracy (line 185) | def per_option_accuracy(results: List[Dict[str, Any]], always_opt="a") -...
  function non_greedy_accuracy (line 207) | def non_greedy_accuracy(results: List[Dict[str, Any]]) -> float:

FILE: lm_eval/tasks/score/math/math_grader.py
  function _check_antlr_version (line 91) | def _check_antlr_version():
  function _fix_fracs (line 109) | def _fix_fracs(string):
  function _str_is_int (line 144) | def _str_is_int(x: str) -> bool:
  function _str_to_int (line 153) | def _str_to_int(x: str) -> bool:
  function _inject_implicit_mixed_number (line 162) | def _inject_implicit_mixed_number(step: str):
  function _strip_properly_formatted_commas (line 172) | def _strip_properly_formatted_commas(expr: str):
  function _remove_right_units (line 183) | def _remove_right_units(expr):
  function _process_and_or_inside_text (line 204) | def _process_and_or_inside_text(string):
  function _remove_left_and_right (line 210) | def _remove_left_and_right(expr):
  function _fix_sqrt (line 217) | def _fix_sqrt(string):
  function _fix_interval (line 222) | def _fix_interval(expr):
  function _inject_implicit_mixed_fraction (line 230) | def _inject_implicit_mixed_fraction(step: str):
  function normalize_answer_string (line 251) | def normalize_answer_string(expr: str) -> str:
  function is_digit (line 351) | def is_digit(s):
  function normalize (line 363) | def normalize(answer) -> str:
  function math_equal (line 378) | def math_equal(
  function symbolic_equal (line 531) | def symbolic_equal(a, b, tolerance, timeout=10.0):
  function extract_answer (line 564) | def extract_answer(
  class TimeoutException (line 615) | class TimeoutException(Exception):
  function time_limit (line 620) | def time_limit(seconds: float):
  function format_intervals (line 632) | def format_intervals(prediction):

FILE: lm_eval/tasks/score/math/utils_math.py
  function find_boxed_entries (line 44) | def find_boxed_entries(answer_str):
  function extract_answer_dataset (line 92) | def extract_answer_dataset(solution: str, problem: str, corrected_answer...
  function process_docs (line 119) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
  function prompt_robustness_process_docs (line 143) | def prompt_robustness_process_docs(doc: datasets.Dataset) -> datasets.Da...
  function non_greedy_robustness_process_docs (line 152) | def non_greedy_robustness_process_docs(doc: datasets.Dataset) -> dataset...
  function process_results (line 161) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]:
  function non_greedy_robustness_process_results (line 178) | def non_greedy_robustness_process_results(
  function per_prompt_accuracy (line 185) | def per_prompt_accuracy(results: List[Dict[str, Any]], p_id=0) -> float:
  function calculate_consistency_rate (line 211) | def calculate_consistency_rate(responses: List[List[str]]) -> float:
  function math_prompt_consistency_rate (line 234) | def math_prompt_consistency_rate(results: List[Dict[str, Any]]) -> float:
  function non_greedy_accuracy (line 257) | def non_greedy_accuracy(results: List[Dict[str, Any]]) -> float:

FILE: lm_eval/tasks/score/mmlu_pro/utils_mmlu_pro.py
  function non_greedy_robustness_process_results (line 61) | def non_greedy_robustness_process_results(doc, results) -> Dict[str, flo...
  function prompt_robustness_process_results (line 73) | def prompt_robustness_process_results(doc, results) -> Dict[str, float]:
  function option_order_robustness_process_results (line 94) | def option_order_robustness_process_results(doc, results) -> Dict[str, f...
  function per_prompt_macro_accuracy (line 123) | def per_prompt_macro_accuracy(results: List[Dict[str, Any]], p_id=0) -> ...
  function per_option_macro_accuracy (line 154) | def per_option_macro_accuracy(results: List[Dict[str, Any]], always_opt=...
  function non_greedy_macro_accuracy (line 187) | def non_greedy_macro_accuracy(results: List[Dict[str, Any]]) -> float:

FILE: lm_eval/tasks/score/non_greedy_summarizer.py
  function load_json_logs (line 33) | def load_json_logs(file_paths, subtasks):
  function calculate_consistency_rate (line 71) | def calculate_consistency_rate(responses: List[List[str]]) -> float:
  function calculate_math_consistency_rate (line 94) | def calculate_math_consistency_rate(responses: List[List[str]]) -> float:
  function main (line 117) | def main():

FILE: lm_eval/tasks/score/utils.py
  function __repeat_elements (line 36) | def __repeat_elements(lst, n):
  function process_docs_add_prompts (line 43) | def process_docs_add_prompts(
  function option_order_robustness_process_docs (line 74) | def option_order_robustness_process_docs(
  function non_greedy_robustness_process_docs (line 135) | def non_greedy_robustness_process_docs(
  function robustness_doc_to_text (line 165) | def robustness_doc_to_text(doc: Dataset) -> str:
  function __postprocess_pred (line 189) | def __postprocess_pred(pred):
  function translate_model_answer_to_labels (line 199) | def translate_model_answer_to_labels(answer, labels, option_format=None):
  function calculate_consistency_rate (line 220) | def calculate_consistency_rate(responses: List[List[str]]) -> float:
  function prompt_consistency_rate (line 243) | def prompt_consistency_rate(results: List[Dict[str, Any]]) -> float:
  function options_consistency_rate (line 266) | def options_consistency_rate(results: List[Dict[str, Any]], labels) -> f...

FILE: lm_eval/tasks/scrolls/task.py
  function _download_metric (line 45) | def _download_metric():
  function _process_doc_prepended_question (line 66) | def _process_doc_prepended_question(doc):
  function _drop_duplicates_in_input (line 82) | def _drop_duplicates_in_input(untokenized_dataset):
  function _num_cpu_cores (line 103) | def _num_cpu_cores():
  class _SCROLLSTask (line 115) | class _SCROLLSTask(ConfigurableTask):
    method __init__ (line 123) | def __init__(self, config=None):
    method has_training_docs (line 128) | def has_training_docs(self):
    method has_validation_docs (line 131) | def has_validation_docs(self):
    method has_test_docs (line 134) | def has_test_docs(self):
    method training_docs (line 137) | def training_docs(self):
    method validation_docs (line 148) | def validation_docs(self):
    method should_decontaminate (line 159) | def should_decontaminate(self):
    method doc_to_decontamination_query (line 162) | def doc_to_decontamination_query(self, doc):
    method download (line 165) | def download(self, *args, **kwargs):
    method _get_prune_text (line 173) | def _get_prune_text(self, sample):
    method prune (line 176) | def prune(self):
    method doc_to_target (line 202) | def doc_to_target(self, doc):
    method doc_to_text (line 205) | def doc_to_text(self, doc):
    method higher_is_better (line 208) | def higher_is_better(self):
    method _scrolls_metrics (line 212) | def _scrolls_metrics(self):
    method _make_compute_metrics (line 215) | def _make_compute_metrics(self, value):
    method aggregation (line 225) | def aggregation(self):
  class _SCROLLSMultipleChoiceTask (line 232) | class _SCROLLSMultipleChoiceTask(_SCROLLSTask):
    method __post_init__ (line 233) | def __post_init__(self):
    method _scrolls_metrics (line 236) | def _scrolls_metrics(self):
    method aggregation (line 239) | def aggregation(self):
    method higher_is_better (line 242) | def higher_is_better(self):
    method process_results (line 245) | def process_results(self, doc, results):
    method construct_requests (line 259) | def construct_requests(
  class _SCROLLSSummaryTask (line 277) | class _SCROLLSSummaryTask(_SCROLLSTask):
    method _process_doc (line 278) | def _process_doc(self, doc):
    method _scrolls_metrics (line 281) | def _scrolls_metrics(self):
    method process_results (line 288) | def process_results(self, doc, results):
    method construct_requests (line 295) | def construct_requests(
    method doc_to_text (line 306) | def doc_to_text(self, doc):
  class Qasper (line 310) | class Qasper(_SCROLLSTask):
    method _process_doc (line 317) | def _process_doc(self, doc):
    method _scrolls_metrics (line 327) | def _scrolls_metrics(self):
    method process_results (line 330) | def process_results(self, doc, results):
    method construct_requests (line 339) | def construct_requests(
  class QuALITY (line 371) | class QuALITY(_SCROLLSMultipleChoiceTask):
    method _normalize_answer (line 380) | def _normalize_answer(text):
    method _process_doc (line 383) | def _process_doc(self, doc):
  class NarrativeQA (line 399) | class NarrativeQA(_SCROLLSTask):
    method _process_doc (line 406) | def _process_doc(self, doc):
    method _scrolls_metrics (line 409) | def _scrolls_metrics(self):
    method _get_prune_text (line 412) | def _get_prune_text(self, doc):
    method process_results (line 419) | def process_results(self, doc, results):
    method construct_requests (line 422) | def construct_requests(
  class ContractNLI (line 434) | class ContractNLI(_SCROLLSMultipleChoiceTask):
    method _process_doc (line 442) | def _process_doc(self, doc):
    method doc_to_text (line 448) | def doc_to_text(self, doc):
  class GovReport (line 452) | class GovReport(_SCROLLSSummaryTask):
  class SummScreenFD (line 465) | class SummScreenFD(_SCROLLSSummaryTask):
  class QMSum (line 473) | class QMSum(_SCROLLSSummaryTask):
    method _process_doc (line 482) | def _process_doc(self, doc):
    method doc_to_text (line 485) | def doc_to_text(self, doc):

FILE: lm_eval/tasks/simple_cooccurrence_bias/utils.py
  function process_results (line 6) | def process_results(doc, results):
  function process_results_gen (line 29) | def process_results_gen(doc, results):

FILE: lm_eval/tasks/slr_bench/lm_eval_slr_bench.py
  function process_results (line 19) | def process_results(doc, results):

FILE: lm_eval/tasks/spanish_bench/flores_es/create_yamls_flores_es.py
  function doc_to_text (line 257) | def doc_to_text(src: str, tgt: str) -> str:
  function doc_to_target (line 265) | def doc_to_target(tgt: str) -> str:
  function gen_lang_yamls (line 272) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
  function main (line 315) | def main() -> None:

FILE: lm_eval/tasks/spanish_bench/utils.py
  function lowercase_first_letter (line 10) | def lowercase_first_letter(text):
  function process_doc_nli (line 14) | def process_doc_nli(dataset):
  function process_xlsum (line 38) | def process_xlsum(dataset):
  function process_docs_paraphrases (line 48) | def process_docs_paraphrases(dataset):
  function process_docs_copa_es (line 76) | def process_docs_copa_es(dataset):
  function rouge1 (line 85) | def rouge1(items):
  function rouge1_agg (line 92) | def rouge1_agg(items):

FILE: lm_eval/tasks/squad_completion/task.py
  class SQUADCompletion (line 11) | class SQUADCompletion(ConfigurableTask):
    method __init__ (line 16) | def __init__(self, **kwargs):
    method has_training_docs (line 19) | def has_training_docs(self):
    method has_validation_docs (line 22) | def has_validation_docs(self):
    method has_test_docs (line 25) | def has_test_docs(self):
    method validation_docs (line 28) | def validation_docs(self):
    method doc_to_text (line 31) | def doc_to_text(self, doc):
    method doc_to_target (line 34) | def doc_to_target(self, doc):
    method construct_requests (line 37) | def construct_requests(
    method process_results (line 63) | def process_results(self, doc, results):
    method aggregation (line 78) | def aggregation(self):
    method higher_is_better (line 88) | def higher_is_better(self):
  function contains_score (line 99) | def contains_score(prediction: str, labels: List[str]):

FILE: lm_eval/tasks/squadv2/task.py
  function _squad_metric (line 39) | def _squad_metric(predictions, references):
  function _squad_agg (line 46) | def _squad_agg(key, items):
  class SQuAD2 (line 52) | class SQuAD2(ConfigurableTask):
    method __init__ (line 57) | def __init__(self, config=None):
    method has_training_docs (line 65) | def has_training_docs(self):
    method has_validation_docs (line 68) | def has_validation_docs(self):
    method has_test_docs (line 71) | def has_test_docs(self):
    method training_docs (line 74) | def training_docs(self):
    method validation_docs (line 77) | def validation_docs(self):
    method doc_to_text (line 80) | def doc_to_text(self, doc):
    method should_decontaminate (line 94) | def should_decontaminate(self):
    method doc_to_decontamination_query (line 97) | def doc_to_decontamination_query(self, doc):
    method doc_to_target (line 100) | def doc_to_target(self, doc):
    method construct_requests (line 108) | def construct_requests(
    method process_results (line 139) | def process_results(self, doc, results):
    method aggregation (line 197) | def aggregation(self):
    method higher_is_better (line 230) | def higher_is_better(self):

FILE: lm_eval/tasks/super_glue/cb/aggregate.py
  function cb_multi_fi (line 4) | def cb_multi_fi(items):

FILE: lm_eval/tasks/super_glue/cb/t5_utils.py
  function mean_3class_f1 (line 1) | def mean_3class_f1(predictions, references):  # This is a passthrough fu...
  function agg_mean_3class_f1 (line 11) | def agg_mean_3class_f1(items):

FILE: lm_eval/tasks/super_glue/copa/utils.py
  function convert_choice (line 1) | def convert_choice(choice):
  function doc_to_text (line 5) | def doc_to_text(doc):
  function doc_to_target (line 14) | def doc_to_target(doc):
  function doc_to_choice (line 20) | def doc_to_choice(doc):

FILE: lm_eval/tasks/super_glue/multirc/t5_utils.py
  function f1 (line 6) | def f1(predictions, references):  # This is a passthrough function
  function agg_f1 (line 20) | def agg_f1(items):
  function em (line 29) | def em(predictions, references):  # This is a passthrough function
  function agg_em (line 43) | def agg_em(items):

FILE: lm_eval/tasks/super_glue/record/t5_utils.py
  function doc_to_text (line 11) | def doc_to_text(doc):
  function process_docs (line 28) | def process_docs(dataset):
  function normalize_squad (line 50) | def normalize_squad(answer):
  function em (line 76) | def em(predictions, references):  # This is a passthrough function
  function f1 (line 80) | def f1(predictions, references):  # This is a passthrough function
  function squad_em_agg (line 84) | def squad_em_agg(items):
  function squad_f1_agg (line 104) | def squad_f1_agg(items):

FILE: lm_eval/tasks/super_glue/record/util.py
  function doc_to_text (line 8) | def doc_to_text(doc):
  function format_answer (line 16) | def format_answer(query, entity):
  function doc_to_target (line 20) | def doc_to_target(doc):
  function doc_to_choice (line 25) | def doc_to_choice(doc):
  function process_docs (line 29) | def process_docs(dataset: datasets.Dataset):
  function process_results (line 41) | def process_results(doc, results):

FILE: lm_eval/tasks/super_glue/wsc/preprocess_wsc.py
  function default_doc_to_text (line 4) | def default_doc_to_text(x):

FILE: lm_eval/tasks/super_glue/wsc/t5_utils.py
  function doc_to_text (line 5) | def doc_to_text(x):
  function _wsc_inputs (line 10) | def _wsc_inputs(x):
  function clean (line 80) | def clean(s: str) -> str:
  function process_results (line 86) | def process_results(docs: dict, resps: List):

FILE: lm_eval/tasks/swde/task.py
  class SWDE (line 10) | class SWDE(ConfigurableTask):
    method __init__ (line 15) | def __init__(self, **kwargs):
    method has_training_docs (line 18) | def has_training_docs(self):
    method has_validation_docs (line 21) | def has_validation_docs(self):
    method has_test_docs (line 24) | def has_test_docs(self):
    method validation_docs (line 27) | def validation_docs(self):
    method doc_to_text (line 30) | def doc_to_text(self, doc):
    method doc_to_target (line 33) | def doc_to_target(self, doc):
    method construct_requests (line 36) | def construct_requests(
    method process_results (line 60) | def process_results(self, doc, results):
    method aggregation (line 75) | def aggregation(self):
    method higher_is_better (line 85) | def higher_is_better(self):
  function contains_score (line 96) | def contains_score(prediction: str, labels: List[str]):

FILE: lm_eval/tasks/tinyBenchmarks/agg_functions.py
  function agg_pirt (line 15) | def agg_pirt(items: List[float], benchmark: str) -> float:
  function agg_gpirt_arc (line 21) | def agg_gpirt_arc(items: List[float], benchmark: str = "arc") -> float:
  function agg_gpirt_gsm8k (line 27) | def agg_gpirt_gsm8k(items: List[float], benchmark: str = "gsm8k") -> float:
  function agg_gpirt_hellaswag (line 33) | def agg_gpirt_hellaswag(items: List[float], benchmark: str = "hellaswag"...
  function agg_gpirt_mmlu (line 39) | def agg_gpirt_mmlu(items: List[float], benchmark: str = "mmlu") -> float:
  function agg_gpirt_truthfulqa (line 45) | def agg_gpirt_truthfulqa(items: List[float], benchmark: str = "truthfulq...
  function agg_gpirt_winogrande (line 51) | def agg_gpirt_winogrande(items: List[float], benchmark: str = "winogrand...

FILE: lm_eval/tasks/tinyBenchmarks/utils_hellaswag.py
  function preprocess (line 9) | def preprocess(text):
  function process_docs (line 18) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/tinyBenchmarks/utils_truthfulqa.py
  function process_results_mc2 (line 12) | def process_results_mc2(doc, results):
  function process_docs_gen (line 25) | def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset:
  function preprocess_function (line 29) | def preprocess_function(examples):
  function process_results_gen (line 53) | def process_results_gen(doc, results):
  function bleu (line 124) | def bleu(refs, preds):
  function rouge (line 147) | def rouge(refs, preds):

FILE: lm_eval/tasks/tinyBenchmarks/utils_winogrande.py
  function doc_to_text (line 4) | def doc_to_text(doc):
  function doc_to_target (line 9) | def doc_to_target(doc):
  function doc_to_choice (line 14) | def doc_to_choice(doc):

FILE: lm_eval/tasks/tmlu/default/_generate_configs.py
  function parse_args (line 79) | def parse_args():

FILE: lm_eval/tasks/tmlu/default/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/tmmluplus/default/_generate_configs.py
  function parse_args (line 109) | def parse_args():

FILE: lm_eval/tasks/tmmluplus/default/utils.py
  function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/toxigen/utils.py
  function doc_to_target (line 4) | def doc_to_target(doc):

FILE: lm_eval/tasks/translation/utils.py
  function code_to_language (line 35) | def code_to_language(code):
  function gen_lang_yamls (line 41) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
  function main (line 100) | def main() -> None:

FILE: lm_eval/tasks/truthfulqa-multi/utils.py
  function process_results_mc2 (line 18) | def process_results_mc2(doc, results):
  function process_docs_gen (line 34) | def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset:
  function preprocess_function (line 38) | def preprocess_function(examples):
  function process_results_gen (line 80) | def process_results_gen(doc, results):
  function bleu (line 151) | def bleu(refs, preds):
  function rouge (line 174) | def rouge(refs, preds):

FILE: lm_eval/tasks/truthfulqa/utils.py
  function process_results_mc2 (line 10) | def process_results_mc2(doc, results):
  function process_docs_gen (line 27) | def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset:
  function preprocess_function (line 31) | def preprocess_function(examples):
  function process_results_gen (line 55) | def process_results_gen(doc, results):
  function bleu (line 126) | def bleu(refs, preds):
  function rouge (line 149) | def rouge(refs, preds):

FILE: lm_eval/tasks/unitxt/task.py
  function assert_unitxt_installed (line 30) | def assert_unitxt_installed():
  function score (line 46) | def score(items, metric):
  class Unitxt (line 57) | class Unitxt(ConfigurableTask):
    method __init__ (line 60) | def __init__(
    method download (line 76) | def download(self, dataset_kwargs: Optional[Dict[str, Any]] = None) ->...
    method has_training_docs (line 82) | def has_training_docs(self):
    method has_validation_docs (line 85) | def has_validation_docs(self):
    method has_test_docs (line 88) | def has_test_docs(self):
    method training_docs (line 91) | def training_docs(self):
    method validation_docs (line 94) | def validation_docs(self):
    method test_docs (line 97) | def test_docs(self):
    method doc_to_text (line 100) | def doc_to_text(self, doc):
    method should_decontaminate (line 103) | def should_decontaminate(self):
    method doc_to_target (line 106) | def doc_to_target(self, doc):
    method get_arguments (line 109) | def get_arguments(self, doc, ctx):
    method fewshot_context (line 112) | def fewshot_context(self, doc, **kwargs) -> str:
    method construct_requests (line 125) | def construct_requests(self, doc, ctx, **kwargs):
    method process_results (line 148) | def process_results(self, doc, results):
    method aggregation (line 169) | def aggregation(self):
    method higher_is_better (line 180) | def higher_is_better(self):
  function extract_images (line 193) | def extract_images(text, instance):
  class UnitxtMultiModal (line 206) | class UnitxtMultiModal(Unitxt):
    method doc_to_text (line 209) | def doc_to_text(self, doc):
    method doc_to_image (line 212) | def doc_to_image(self, doc):
    method get_arguments (line 216) | def get_arguments(self, doc, ctx):

FILE: lm_eval/tasks/webqs/utils.py
  function doc_to_choice (line 4) | def doc_to_choice(doc: Dict) -> List[str]:
  function doc_to_target (line 9) | def doc_to_target(doc: Dict) -> List[int]:
  function _remove_prefixes (line 15) | def _remove_prefixes(aliases):

FILE: lm_eval/tasks/wikitext/preprocess_wikitext.py
  function wikitext_detokenizer (line 4) | def wikitext_detokenizer(doc):
  function process_results (line 39) | def process_results(doc, results):

FILE: lm_eval/tasks/winogender/utils.py
  function filter_dataset (line 4) | def filter_dataset(dataset: datasets.Dataset, gender: str) -> datasets.D...
  function filter_male (line 8) | def filter_male(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_female (line 12) | def filter_female(dataset: datasets.Dataset) -> datasets.Dataset:
  function filter_neutral (line 16) | def filter_neutral(dataset: datasets.Dataset) -> datasets.Dataset:

FILE: lm_eval/tasks/winogrande/preprocess_winogrande.py
  function doc_to_text (line 1) | def doc_to_text(doc):
  function doc_to_target (line 6) | def doc_to_target(doc):
  function doc_to_choice (line 11) | def doc_to_choice(doc):

FILE: lm_eval/tasks/wmt2016/metrics.py
  function bleu (line 4) | def bleu(predictions, references):
  function agg_bleu (line 8) | def agg_bleu(items):

FILE: lm_eval/tasks/wsc273/utils.py
  function process_doc (line 16) | def process_doc(dataset):
  function __normalize_option (line 27) | def __normalize_option(doc, option):

FILE: lm_eval/tasks/xcopa/utils.py
  function convert_choice (line 4) | def convert_choice(choice):
  function doc_to_text (line 8) | def doc_to_text(doc, connector):
  function doc_to_choice (line 14) | def doc_to_choice(doc):

FILE: lm_eval/tasks/xnli/utils.py
  function gen_lang_yamls (line 104) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
  function main (line 148) | def main() -> None:

FILE: lm_eval/tasks/xquad/utils.py
  function process_results_qa (line 10) | def process_results_qa(doc, results):

FILE: lm_eval/tasks/xwinograd/utils.py
  function doc_to_text (line 13) | def doc_to_text(doc: Dict) -> int:
  function doc_to_target (line 25) | def doc_to_target(doc: Dict) -> str:
  function doc_to_choice (line 36) | def doc_to_choice(doc: Dict) -> List[str]:
  function gen_lang_yamls (line 43) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None:
  function main (line 76) | def main() -> None:

FILE: lm_eval/utils.py
  class _LMEvalFormatter (line 35) | class _LMEvalFormatter(logging.Formatter):
    method format (line 38) | def format(self, record):
  function is_torch_available (line 43) | def is_torch_available() -> bool:
  function is_transformers_available (line 47) | def is_transformers_available() -> bool:
  function wrap_text (line 51) | def wrap_text(string: str, width: int = 140, **kwargs) -> str | None:
  function setup_logging (line 68) | def setup_logging(verbosity=logging.INFO):
  function warning_once (line 111) | def warning_once(logger: logging.Logger, msg: str, *args):
  function info_once (line 117) | def info_once(logger: logging.Logger, msg: str, *args):
  function maybe_warn (line 122) | def maybe_warn(msg: str, verbose: bool = True):
  function hash_string (line 129) | def hash_string(string: str) -> str:
  function escaped_split (line 133) | def escaped_split(text, sep_char, maxsplit=-1):
  function handle_arg_string (line 156) | def handle_arg_string(arg):
  function handle_non_serializable (line 199) | def handle_non_serializable(o):
  function sanitize_list (line 208) | def sanitize_list(sub):
  function simple_parse_args_string (line 220) | def simple_parse_args_string(args_string: str | None) -> dict:
  function join_iters (line 239) | def join_iters(iters):
  function group (line 244) | def group(arr, fn):
  function pattern_match (line 255) | def pattern_match(patterns, source_list):
  function softmax (line 266) | def softmax(x) -> np.ndarray:
  function general_detokenize (line 272) | def general_detokenize(string) -> str:
  function get_file_task_name (line 282) | def get_file_task_name(filename: str) -> str:
  function get_file_datetime (line 289) | def get_file_datetime(filename: str) -> str:
  function sanitize_model_name (line 296) | def sanitize_model_name(model_name: str) -> str:
  function sanitize_task_name (line 303) | def sanitize_task_name(task_name: str) -> str:
  function get_latest_filename (line 310) | def get_latest_filename(filenames: list[str]) -> str:
  function get_results_filenames (line 317) | def get_results_filenames(filenames: list[str]) -> list[str]:
  function get_sample_results_filenames (line 324) | def get_sample_results_filenames(filenames: list[str]) -> list[str]:
  function get_rolling_token_windows (line 331) | def get_rolling_token_windows(
  function make_disjoint_window (line 374) | def make_disjoint_window(
  class EnhancedJSONEncoder (line 382) | class EnhancedJSONEncoder(json.JSONEncoder):
    method default (line 388) | def default(self, o):
  class Reorderer (line 394) | class Reorderer:
    method __init__ (line 395) | def __init__(self, arr: list[Any], fn: Callable) -> None:
    method get_reordered (line 412) | def get_reordered(self):
    method get_original (line 420) | def get_original(self, newarr):
  function _build_hierarchy_info (line 442) | def _build_hierarchy_info(
  function make_table (line 475) | def make_table(result_dict, column: str = "results", sort_results: bool ...
  function positional_deprecated (line 561) | def positional_deprecated(fn):
  function ignore_constructor (line 580) | def ignore_constructor(loader, node):
  function import_function (line 584) | def import_function(loader: yaml.Loader, node, yaml_path: Path):
  function regex_replace (line 606) | def regex_replace(string, pattern, repl, count: int = 0):
  function apply_template (line 617) | def apply_template(template: str, doc: dict) -> str:
  function create_iterator (line 622) | def create_iterator(raw_iterator, *, rank=0, world_size=1, limit=None):
  function weighted_f1_score (line 631) | def weighted_f1_score(items):
  function convert_pil_to_hash (line 641) | def convert_pil_to_hash(value):
  function convert_bytes_to_hash (line 649) | def convert_bytes_to_hash(value):
  function hash_dict_images (line 653) | def hash_dict_images(data_dict):
  class RemoteTokenizer (line 697) | class RemoteTokenizer:
    method __init__ (line 702) | def __init__(
    method _request_with_retries (line 741) | def _request_with_retries(self, method, url, **kwargs):
    method _validate_server (line 760) | def _validate_server(self):
    method tokenizer_info (line 769) | def tokenizer_info(self) -> dict:
    method eos_token (line 778) | def eos_token(self) -> str | None:
    method bos_token (line 782) | def bos_token(self) -> str | None:
    method pad_token (line 786) | def pad_token(self) -> str | None:
    method eos_token_id (line 790) | def eos_token_id(self) -> int | None:
    method bos_token_id (line 796) | def bos_token_id(self) -> int | None:
    method eot_token (line 802) | def eot_token(self) -> int | None:
    method encode (line 805) | def encode(self, text: str) -> list[int]:
    method decode (line 814) | def decode(self, tokens: list[int]) -> str:
    method batch_decode (line 823) | def batch_decode(self, tokens_list: list[list[int]]) -> list[str]:
    method apply_chat_template (line 826) | def apply_chat_template(
    method __call__ (line 839) | def __call__(self, text: str, add_special_tokens: bool = False, **kwar...
  function check_remote_tokenizer_support (line 844) | def check_remote_tokenizer_support(
  function set_torch_seed (line 910) | def set_torch_seed(seed: int):
  function random_name_id (line 917) | def random_name_id() -> str:

FILE: scripts/build_benchmark.py
  function parse_args (line 17) | def parse_args():

FILE: scripts/clean_training_data/compress_and_package.py
  function process_task (line 16) | def process_task(
  function compress_and_move (line 31) | def compress_and_move(working_directory, output_directory, process_count):

FILE: scripts/clean_training_data/generate_13_grams.py
  function handler (line 46) | def handler(signal_received, frame):
  function yield_pile (line 51) | def yield_pile(start_offsets=None, checkpoint_offset=None):
  class Buckets (line 86) | class Buckets:
    method __init__ (line 87) | def __init__(self, directory, num_buckets):
    method add_data (line 104) | def add_data(self, key, value):
    method save_checkpoint (line 109) | def save_checkpoint(self):
    method close_buckets (line 116) | def close_buckets(self):
  function do_ngrams_in_buckets (line 121) | def do_ngrams_in_buckets(n_value, working_directory, bucket_count):

FILE: scripts/clean_training_data/investigate_pile.py
  function get_file_stats (line 12) | def get_file_stats(file_path, tqdm_func, global_tqdm):
  function get_files (line 36) | def get_files():
  function get_stats (line 43) | def get_stats():

FILE: scripts/clean_training_data/janitor_util.cpp
  function is_whitespace (line 9) | bool is_whitespace(char ch) noexcept {
  function is_punctuation (line 15) | bool is_punctuation(char c) noexcept {
  function clean_ngram (line 24) | std::vector<std::string> clean_ngram(std::string const &input,
  function clean_ngram_with_indices (line 109) | std::vector<std::tuple<std::string, size_t, size_t>>
  function PYBIND11_MODULE (line 194) | PYBIND11_MODULE(janitor_util, m) {

FILE: scripts/clean_training_data/process_sorted_buckets.py
  function process_bucket (line 35) | def process_bucket(
  function process_sorted_buckets (line 97) | def process_sorted_buckets(working_directory, move_dir, process_count):

FILE: scripts/clean_training_data/sort_13_gram_buckets.py
  function handler (line 28) | def handler(signal_received, frame):
  function sort_13_gram_buckets (line 33) | def sort_13_gram_buckets(working_directory):

FILE: scripts/make_table_results.py
  function make_table (line 16) | def make_table(result_dict):

FILE: scripts/make_table_tasks.py
  function check (line 17) | def check(tf):

FILE: scripts/model_comparator.py
  function memory_stats (line 19) | def memory_stats():
  function calculate_z_value (line 25) | def calculate_z_value(res1: dict, res2: dict) -> tuple[float, float]:
  function print_results (line 36) | def print_results(
  function parse_args (line 64) | def parse_args():

FILE: scripts/regression.py
  function parse_args (line 29) | def parse_args():
  function eval_models (line 47) | def eval_models(args, branch=None):
  function extract_value (line 108) | def extract_value(args, results, model, task, err=False):
  function format_value (line 126) | def format_value(args, results, model, task):
  function format_diff (line 132) | def format_diff(args, results1, results2, model, task):
  function main (line 139) | def main():

FILE: scripts/requests_caching.py
  function run_model_for_task_caching (line 35) | def run_model_for_task_caching(tasks: list[str], cache_requests: str):
  function request_caching_arg_to_dict (line 96) | def request_caching_arg_to_dict(cache_requests: str) -> dict:

FILE: scripts/write_out.py
  function parse_args (line 19) | def parse_args():
  function main (line 42) | def main():

FILE: scripts/zeno_visualize.py
  function parse_args (line 22) | def parse_args():
  function sanitize_string (line 39) | def sanitize_string(model_args_raw: Union[str, dict]) -> str:
  function main (line 55) | def main():
  function tasks_for_model (line 163) | def tasks_for_model(model: str, data_path: str):
  function generate_dataset (line 182) | def generate_dataset(
  function generate_system_df (line 230) | def generate_system_df(data, config):

FILE: tests/conftest.py
  function on_ci (line 13) | def on_ci():
  function fewshot_config (line 19) | def fewshot_config():
  function task_config (line 30) | def task_config():
  function mock_configurable_task (line 48) | def mock_configurable_task(task_config):

FILE: tests/models/test_api.py
  function api (line 10) | def api():
  function api_tokenized (line 17) | def api_tokenized():
  function api_batch_ssl_tokenized (line 26) | def api_batch_ssl_tokenized():
  function test_create_payload_generate (line 36) | def test_create_payload_generate(api):
  function test_create_payload_loglikelihood (line 57) | def test_create_payload_loglikelihood(api):
  function test_model_generate_call_usage (line 103) | def test_model_generate_call_usage(
  function test_model_tokenized_call_usage (line 143) | def test_model_tokenized_call_usage(
  class DummyAsyncContextManager (line 164) | class DummyAsyncContextManager:
    method __init__ (line 165) | def __init__(self, result):
    method __aenter__ (line 168) | async def __aenter__(self):
    method __aexit__ (line 171) | async def __aexit__(self, exc_type, exc, tb):
  function test_get_batched_requests_with_no_ssl (line 190) | def test_get_batched_requests_with_no_ssl(
  function test_local_completionsapi_remote_tokenizer_authenticated (line 231) | def test_local_completionsapi_remote_tokenizer_authenticated(monkeypatch):
  function test_local_completionsapi_remote_tokenizer_unauthenticated (line 254) | def test_local_completionsapi_remote_tokenizer_unauthenticated(monkeypat...
  function test_localchatcompletion_remote_tokenizer_authenticated (line 277) | def test_localchatcompletion_remote_tokenizer_authenticated(monkeypatch):
  function test_localchatcompletion_remote_tokenizer_unauthenticated (line 302) | def test_localchatcompletion_remote_tokenizer_unauthenticated(monkeypatch):

FILE: tests/models/test_bos_handling.py
  function _mock_version (line 27) | def _mock_version(name):
  class MockModuleFinder (line 36) | class MockModuleFinder:
    method __init__ (line 39) | def __init__(self, modules):
    method find_spec (line 42) | def find_spec(self, fullname, path, target=None):
    method create_module (line 51) | def create_module(self, spec):
    method exec_module (line 60) | def exec_module(self, module):
  function pythia_tokenizer (line 80) | def pythia_tokenizer():
  function olmo_tokenizer (line 97) | def olmo_tokenizer():
  function create_hf_mock (line 118) | def create_hf_mock(tokenizer, add_bos_token, backend="causal"):
  function create_vllm_mock (line 135) | def create_vllm_mock(tokenizer, add_bos_token):
  class TestHasBosPrefix (line 152) | class TestHasBosPrefix:
    method test_none_bos_returns_false (line 155) | def test_none_bos_returns_false(self):
    method test_detects_single_bos_string (line 160) | def test_detects_single_bos_string(self):
    method test_detects_multiple_bos_variants (line 166) | def test_detects_multiple_bos_variants(self):
  class TestAddSpecialKwargs (line 175) | class TestAddSpecialKwargs:
    method test_explicit_add_special_tokens_takes_precedence (line 178) | def test_explicit_add_special_tokens_takes_precedence(self):
    method test_falls_back_to_add_bos (line 183) | def test_falls_back_to_add_bos(self):
    method test_both_none_returns_empty (line 188) | def test_both_none_returns_empty(self):
  class TestDefaultsToNone (line 198) | class TestDefaultsToNone:
    method test_huggingface_none_uses_tokenizer_default (line 202) | def test_huggingface_none_uses_tokenizer_default(self, tokenizer_name,...
    method test_vllm_none_uses_tokenizer_default (line 218) | def test_vllm_none_uses_tokenizer_default(self, tokenizer_name, request):
  class TestNoDuplicateBos (line 239) | class TestNoDuplicateBos:
    method test_huggingface_detects_bos_in_single_string (line 243) | def test_huggingface_detects_bos_in_single_string(self, tokenizer_name...
    method test_huggingface_adds_bos_when_missing (line 267) | def test_huggingface_adds_bos_when_missing(self, tokenizer_name, reque...
    method test_huggingface_follows_tokenizer_default (line 280) | def test_huggingface_follows_tokenizer_default(self, tokenizer_name, r...
    method test_vllm_handles_mixed_batch (line 297) | def test_vllm_handles_mixed_batch(self, tokenizer_name, add_bos_token,...
    method test_vllm_preserves_order_in_mixed_batch (line 337) | def test_vllm_preserves_order_in_mixed_batch(
  class TestChatTemplateCompatibility (line 376) | class TestChatTemplateCompatibility:
    method test_huggingface_chat_template_no_duplicate_bos (line 380) | def test_huggingface_chat_template_no_duplicate_bos(self, tokenizer_na...
    method test_vllm_mixed_chat_batch (line 405) | def test_vllm_mixed_chat_batch(self, tokenizer_name, add_bos_token, re...
    method test_huggingface_seq2seq_skips_causal_bos_logic (line 453) | def test_huggingface_seq2seq_skips_causal_bos_logic(self, pythia_token...
  class TestLoglikelihoodBosHandling (line 472) | class TestLoglikelihoodBosHandling:
    method test_empty_context_continuation_with_bos (line 477) | def test_empty_context_continuation_with_bos(
    method test_empty_context_continuation_without_bos (line 532) | def test_empty_context_continuation_without_bos(
    method test_context_with_bos_prefix (line 576) | def test_context_with_bos_prefix(self, tokenizer_name, add_bos_token, ...
  class TestEdgeCases (line 616) | class TestEdgeCases:
    method test_explicit_override_takes_precedence (line 619) | def test_explicit_override_takes_precedence(self, pythia_tokenizer):
    method test_vllm_empty_input (line 630) | def test_vllm_empty_input(self):

FILE: tests/models/test_gguf.py
  function gguf_completion_mock (line 15) | def gguf_completion_mock(base_url=None, **kwargs):
  class GGUFLMTest (line 92) | class GGUFLMTest(unittest.TestCase):
    method test_loglikelihood (line 96) | def test_loglikelihood(self, gguf_completion_mock):
    method test_generate_until (line 118) | def test_generate_until(self, gguf_completion_mock):

FILE: tests/models/test_gptqmodel.py
  function assert_less_than (line 8) | def assert_less_than(value, threshold, desc):
  class Test_GPTQModel (line 14) | class Test_GPTQModel:
    method test_gptqmodel (line 18) | def test_gptqmodel(self) -> None:

FILE: tests/models/test_hf_steered.py
  class Test_SteeredModel (line 23) | class Test_SteeredModel:
    method test_load_with_sae_lens (line 114) | def test_load_with_sae_lens(self) -> None:
    method test_loglikelihood (line 126) | def test_loglikelihood(self) -> None:
    method test_generate_until (line 145) | def test_generate_until(self) -> None:
    method test_loglikelihood_rolling (line 149) | def test_loglikelihood_rolling(self) -> None:
    method test_toc_encode (line 153) | def test_toc_encode(self) -> None:
    method test_toc_decode (line 157) | def test_toc_decode(self) -> None:
    method test_batch_encode (line 161) | def test_batch_encode(self) -> None:
    method test_model_generate (line 165) | def test_model_generate(self) -> None:

FILE: tests/models/test_huggingface.py
  class Test_HFLM (line 26) | class Test_HFLM:
    method test_logliklihood (line 110) | def test_logliklihood(self) -> None:
    method test_generate_until (line 129) | def test_generate_until(self) -> None:
    method test_logliklihood_rolling (line 133) | def test_logliklihood_rolling(self) -> None:
    method test_toc_encode (line 137) | def test_toc_encode(self) -> None:
    method test_toc_decode (line 141) | def test_toc_decode(self) -> None:
    method test_batch_encode (line 145) | def test_batch_encode(self) -> None:
    method test_model_generate (line 149) | def test_model_generate(self) -> None:

FILE: tests/models/test_model_utils.py
  class TestTruncateTokens (line 6) | class TestTruncateTokens:
    method test_left (line 7) | def test_left(self):
    method test_right (line 11) | def test_right(self):
    method test_middle (line 15) | def test_middle(self):
    method test_middle_even (line 20) | def test_middle_even(self):
    method test_no_truncation_needed (line 25) | def test_no_truncation_needed(self):
    method test_unknown_strategy (line 29) | def test_unknown_strategy(self):
  class TestMaybeTruncate (line 35) | class TestMaybeTruncate:
    method test_case1_no_truncation (line 39) | def test_case1_no_truncation(self):
    method test_case1_no_truncation_with_adjust (line 47) | def test_case1_no_truncation_with_adjust(self):
    method test_case2_truncate_prompt_no_adjust (line 56) | def test_case2_truncate_prompt_no_adjust(self):
    method test_case2_no_adjust_is_default (line 65) | def test_case2_no_adjust_is_default(self):
    method test_case2_prompt_fits_but_gen_too_large_no_adjust (line 73) | def test_case2_prompt_fits_but_gen_too_large_no_adjust(self):
    method test_case3_reduce_gen_toks (line 83) | def test_case3_reduce_gen_toks(self):
    method test_case4_truncate_left (line 92) | def test_case4_truncate_left(self):
    method test_case4_truncate_right (line 105) | def test_case4_truncate_right(self):
    method test_case4_truncate_middle (line 118) | def test_case4_truncate_middle(self):
    method test_case4_default_strategy_is_left (line 132) | def test_case4_default_strategy_is_left(self):
    method test_min_gen_toks_zero_reduces_to_zero (line 144) | def test_min_gen_toks_zero_reduces_to_zero(self):
    method test_min_gen_toks_zero_truncates_prompt (line 157) | def test_min_gen_toks_zero_truncates_prompt(self):
    method test_raises_when_max_len_too_small (line 171) | def test_raises_when_max_len_too_small(self):
  class TestNormalizeGenKwargs (line 183) | class TestNormalizeGenKwargs:
    method test_until_string_converted_to_list (line 188) | def test_until_string_converted_to_list(self):
    method test_until_list_passed_through (line 192) | def test_until_list_passed_through(self):
    method test_until_missing_defaults_to_empty_list (line 196) | def test_until_missing_defaults_to_empty_list(self):
    method test_max_gen_toks_used_directly (line 202) | def test_max_gen_toks_used_directly(self):
    method test_max_new_tokens_converted (line 206) | def test_max_new_tokens_converted(self):
    method test_max_tokens_converted (line 210) | def test_max_tokens_converted(self):
    method test_max_completion_tokens_converted (line 214) | def test_max_completion_tokens_converted(self):
    method test_default_max_gen_toks_when_none_provided (line 218) | def test_default_max_gen_toks_when_none_provided(self):
    method test_custom_default_max_gen_toks (line 222) | def test_custom_default_max_gen_toks(self):
    method test_max_token_priority_max_gen_toks_first (line 226) | def test_max_token_priority_max_gen_toks_first(self):
    method test_max_token_priority_max_new_tokens_second (line 236) | def test_max_token_priority_max_new_tokens_second(self):
    method test_max_token_priority_max_tokens_third (line 246) | def test_max_token_priority_max_tokens_third(self):
    method test_do_sample_none_temperature_zero_sets_do_sample_false (line 257) | def test_do_sample_none_temperature_zero_sets_do_sample_false(self):
    method test_do_sample_none_temperature_positive_sets_do_sample_true (line 261) | def test_do_sample_none_temperature_positive_sets_do_sample_true(self):
    method test_do_sample_false_sets_temperature_zero (line 265) | def test_do_sample_false_sets_temperature_zero(self):
    method test_do_sample_false_temperature_positive_forces_temperature_zero (line 269) | def test_do_sample_false_temperature_positive_forces_temperature_zero(...
    method test_do_sample_true_temperature_positive_preserved (line 273) | def test_do_sample_true_temperature_positive_preserved(self):
    method test_do_sample_true_temperature_zero_preserved (line 278) | def test_do_sample_true_temperature_zero_preserved(self):
    method test_extra_kwargs_passed_through (line 285) | def test_extra_kwargs_passed_through(self):
    method test_original_dict_not_mutated (line 297) | def test_original_dict_not_mutated(self):

FILE: tests/models/test_openvino.py
  function test_evaluator (line 33) | def test_evaluator(backend, model_id, task):
  function test_ov_config (line 89) | def test_ov_config():

FILE: tests/models/test_sglang.py
  class Test_SGlang (line 15) | class Test_SGlang:
    method setup_class (line 31) | def setup_class(cls):
    method test_logliklihood (line 45) | def test_logliklihood(self) -> None:
    method test_generate_until (line 51) | def test_generate_until(self) -> None:
    method test_logliklihood_rolling (line 58) | def test_logliklihood_rolling(self) -> None:
    method test_evaluator (line 88) | def test_evaluator(self) -> None:

FILE: tests/models/test_vllm.py
  class Test_VLLM (line 11) | class Test_VLLM:
    method test_logliklihood (line 33) | def test_logliklihood(self) -> None:
    method test_generate_until (line 39) | def test_generate_until(self) -> None:
    method test_logliklihood_rolling (line 45) | def test_logliklihood_rolling(self) -> None:

FILE: tests/models/test_vllm_context_length.py
  class TestVLLMContextLength (line 19) | class TestVLLMContextLength:
    method test_loglikelihood_tokens_truncates_to_max_length_minus_one (line 22) | def test_loglikelihood_tokens_truncates_to_max_length_minus_one(self) ...
    method test_loglikelihood_tokens_no_truncation_when_within_limit (line 65) | def test_loglikelihood_tokens_no_truncation_when_within_limit(self) ->...
    method test_loglikelihood_tokens_truncates_at_exactly_max_length (line 102) | def test_loglikelihood_tokens_truncates_at_exactly_max_length(self) ->...
    method test_loglikelihood_tokens_boundary_at_max_length_minus_one (line 139) | def test_loglikelihood_tokens_boundary_at_max_length_minus_one(self) -...
    method test_loglikelihood_rolling_uses_max_length_minus_two (line 176) | def test_loglikelihood_rolling_uses_max_length_minus_two(self) -> None:

FILE: tests/scripts/test_zeno_visualize.py
  function test_zeno_sanitize_string (line 11) | def test_zeno_sanitize_string():

FILE: tests/test_aggregation_pipeline.py
  function _m (line 31) | def _m(d: dict[str, Any]) -> _TaskMetrics:
  class MockTask (line 36) | class MockTask(Task):
    method __init__ (line 41) | def __init__(
    method task_name (line 54) | def task_name(self):
    method dump_config (line 57) | def dump_config(self) -> dict:
    method aggregation (line 60) | def aggregation(self):
    method higher_is_better (line 63) | def higher_is_better(self):
    method eval_docs (line 67) | def eval_docs(self):
    method has_training_docs (line 71) | def has_training_docs(self):
    method has_validation_docs (line 74) | def has_validation_docs(self):
    method has_test_docs (line 77) | def has_test_docs(self):
    method test_docs (line 80) | def test_docs(self):
    method doc_to_text (line 83) | def doc_to_text(self, doc):
    method doc_to_target (line 86) | def doc_to_target(self, doc):
    method construct_requests (line 89) | def construct_requests(self, doc, ctx, **kwargs):
    method process_results (line 92) | def process_results(self, doc, results):
  function _make_acc (line 96) | def _make_acc(
  class TestTaskToGroupPipeline (line 112) | class TestTaskToGroupPipeline:
    method test_single_task_single_group (line 115) | def test_single_task_single_group(self):
    method test_two_tasks_weighted_group (line 132) | def test_two_tasks_weighted_group(self):
    method test_two_tasks_unweighted_group (line 156) | def test_two_tasks_unweighted_group(self):
    method test_sample_len_is_total_not_per_filter (line 177) | def test_sample_len_is_total_not_per_filter(self):
    method test_multiple_metrics_sample_count (line 211) | def test_multiple_metrics_sample_count(self):
  class TestNestedGroupPipeline (line 248) | class TestNestedGroupPipeline:
    method test_two_level_hierarchy (line 251) | def test_two_level_hierarchy(self):
    method test_parent_with_mixed_children (line 285) | def test_parent_with_mixed_children(self):
  class TestGroupStderrPipeline (line 319) | class TestGroupStderrPipeline:
    method test_group_stderr_aggregated (line 322) | def test_group_stderr_aggregated(self):
    method test_group_stderr_na_when_task_has_single_sample (line 346) | def test_group_stderr_na_when_task_has_single_sample(self):
  class TestGroupAggregationWarnings (line 360) | class TestGroupAggregationWarnings:
    method test_warns_when_metric_missing_in_some_tasks (line 363) | def test_warns_when_metric_missing_in_some_tasks(self, caplog):
    method test_warns_when_metric_missing_in_all_tasks (line 400) | def test_warns_when_metric_missing_in_all_tasks(self, caplog):
    method test_no_warning_when_all_tasks_have_metric (line 423) | def test_no_warning_when_all_tasks_have_metric(self, caplog):

FILE: tests/test_cli_subcommands.py
  class TestHarnessCLI (line 20) | class TestHarnessCLI:
    method test_harness_cli_init (line 23) | def test_harness_cli_init(self):
    method test_harness_cli_has_subcommands (line 29) | def test_harness_cli_has_subcommands(self):
    method test_harness_cli_backward_compatibility (line 37) | def test_harness_cli_backward_compatibility(self):
    method test_harness_cli_help_default (line 47) | def test_harness_cli_help_default(self):
    method test_harness_cli_run_help_only (line 58) | def test_harness_cli_run_help_only(self):
  class TestListCommand (line 65) | class TestListCommand:
    method test_list_command_creation (line 68) | def test_list_command_creation(self):
    method test_list_command_arguments (line 75) | def test_list_command_arguments(self):
    method test_list_command_choices (line 90) | def test_list_command_choices(self):
    method test_list_command_execute_tasks (line 106) | def test_list_command_execute_tasks(self, mock_task_manager):
    method test_list_command_execute_groups (line 123) | def test_list_command_execute_groups(self, mock_task_manager):
  class TestRunCommand (line 142) | class TestRunCommand:
    method test_run_command_creation (line 145) | def test_run_command_creation(self):
    method test_run_command_basic_arguments (line 152) | def test_run_command_basic_arguments(self):
    method test_run_command_tasks_comma_separated (line 164) | def test_run_command_tasks_comma_separated(self):
    method test_run_command_tasks_mixed_format (line 175) | def test_run_command_tasks_mixed_format(self):
    method test_run_command_tasks_None (line 185) | def test_run_command_tasks_None(self):
    method test_run_command_model_args (line 194) | def test_run_command_model_args(self):
    method test_run_command_batch_size (line 210) | def test_run_command_batch_size(self):
    method test_run_command_seed_parsing (line 228) | def test_run_command_seed_parsing(self):
    method test_run_command_execute_basic (line 250) | def test_run_command_execute_basic(
  class TestValidateCommand (line 299) | class TestValidateCommand:
    method test_validate_command_creation (line 302) | def test_validate_command_creation(self):
    method test_validate_command_arguments (line 309) | def test_validate_command_arguments(self):
    method test_validate_command_requires_tasks (line 325) | def test_validate_command_requires_tasks(self):
    method test_validate_command_execute_success (line 335) | def test_validate_command_execute_success(self, mock_task_manager):
    method test_validate_command_execute_missing_tasks (line 354) | def test_validate_command_execute_missing_tasks(self, mock_task_manager):
  class TestEvaluatorConfigTaskLoading (line 376) | class TestEvaluatorConfigTaskLoading:
    method test_process_tasks_comma_separated_in_list (line 380) | def test_process_tasks_comma_separated_in_list(self, mock_task_manager):
    method test_process_tasks_mixed_comma_and_space_separated (line 400) | def test_process_tasks_mixed_comma_and_space_separated(self, mock_task...
    method test_process_tasks_string_comma_separated (line 421) | def test_process_tasks_string_comma_separated(self, mock_task_manager):
    method test_custom_yaml_file_relative_path (line 439) | def test_custom_yaml_file_relative_path(self, tmp_path):
    method test_missing_yaml_file_raises_error (line 466) | def test_missing_yaml_file_raises_error(self, tmp_path):
  class TestEvaluatorConfigFromCLI (line 480) | class TestEvaluatorConfigFromCLI:
    method test_defaults_applied (line 483) | def test_defaults_applied(self, tmp_path):
    method test_cli_args_override_defaults (line 508) | def test_cli_args_override_defaults(self, tmp_path):
    method test_model_args_dict_passed_through (line 531) | def test_model_args_dict_passed_through(self, tmp_path):
    method test_gen_kwargs_passed_through (line 549) | def test_gen_kwargs_passed_through(self, tmp_path):
    method test_none_args_use_defaults (line 567) | def test_none_args_use_defaults(self, tmp_path):
    method test_fewshot_as_multiturn_defaults_with_chat_template (line 586) | def test_fewshot_as_multiturn_defaults_with_chat_template(self, tmp_pa...
    method test_empty_tasks_allowed_at_config_level (line 603) | def test_empty_tasks_allowed_at_config_level(self):
    method test_validation_error_log_samples_without_output (line 615) | def test_validation_error_log_samples_without_output(self):
  class TestCLIUtils (line 633) | class TestCLIUtils:
    method test_try_parse_json_with_json_string (line 636) | def test_try_parse_json_with_json_string(self):
    method test_try_parse_json_with_dict (line 641) | def test_try_parse_json_with_dict(self):
    method test_try_parse_json_with_none (line 647) | def test_try_parse_json_with_none(self):
    method test_try_parse_json_with_plain_string (line 652) | def test_try_parse_json_with_plain_string(self):
    method test_try_parse_json_with_invalid_json (line 657) | def test_try_parse_json_with_invalid_json(self):
    method test_int_or_none_list_single_value (line 664) | def test_int_or_none_list_single_value(self):
    method test_int_or_none_list_multiple_values (line 669) | def test_int_or_none_list_multiple_values(self):
    method test_int_or_none_list_with_none (line 674) | def test_int_or_none_list_with_none(self):
    method test_int_or_none_list_invalid_value (line 679) | def test_int_or_none_list_invalid_value(self):
    method test_int_or_none_list_too_few_values (line 684) | def test_int_or_none_list_too_few_values(self):
    method test_int_or_none_list_too_many_values (line 689) | def test_int_or_none_list_too_many_values(self):
    method test_request_caching_arg_to_dict_none (line 694) | def test_request_caching_arg_to_dict_none(self):
    method test_request_caching_arg_to_dict_true (line 699) | def test_request_caching_arg_to_dict_true(self):
    method test_request_caching_arg_to_dict_refresh (line 708) | def test_request_caching_arg_to_dict_refresh(self):
    method test_request_caching_arg_to_dict_delete (line 717) | def test_request_caching_arg_to_dict_delete(self):
    method test_request_caching_arg_to_dict_invalid (line 726) | def test_request_caching_arg_to_dict_invalid(self):
    method test_cache_requests_argparse_integration (line 731) | def test_cache_requests_argparse_integration(self):
    method test_check_argument_types_raises_on_untyped (line 748) | def test_check_argument_types_raises_on_untyped(self):
    method test_check_argument_types_passes_on_typed (line 758) | def test_check_argument_types_passes_on_typed(self):
    method test_check_argument_types_skips_const_actions (line 766) | def test_check_argument_types_skips_const_actions(self):
  class TestMergeDictAction (line 775) | class TestMergeDictAction:
    method test_comma_separated_key_value (line 778) | def test_comma_separated_key_value(self):
    method test_space_separated_key_value (line 786) | def test_space_separated_key_value(self):
    method test_json_dict_input (line 794) | def test_json_dict_input(self):
    method test_json_nested_dict (line 802) | def test_json_nested_dict(self):
    method test_empty_values (line 810) | def test_empty_values(self):
    method test_type_coercion (line 818) | def test_type_coercion(self):
    method test_multiple_invocations_merge (line 828) | def test_multiple_invocations_merge(self):
    method test_key_overwrite (line 836) | def test_key_overwrite(self):
  class TestEvaluatorConfigPrecedence (line 845) | class TestEvaluatorConfigPrecedence:
    method test_cli_overrides_yaml_overrides_defaults (line 848) | def test_cli_overrides_yaml_overrides_defaults(self, tmp_path):
    method test_yaml_overrides_defaults (line 895) | def test_yaml_overrides_defaults(self, tmp_path):
    method test_cli_overrides_yaml_with_explicit_zero (line 923) | def test_cli_overrides_yaml_with_explicit_zero(self, tmp_path):

FILE: tests/test_evaluator.py
  function test_evaluator (line 37) | def test_evaluator(
  function test_printed_results (line 114) | def test_printed_results(

FILE: tests/test_evaluator_utils.py
  function _m (line 32) | def _m(d: dict[str, Any]) -> _TaskMetrics:
  class MockEvalTask (line 37) | class MockEvalTask(Task):
    method __init__ (line 42) | def __init__(
    method task_name (line 58) | def task_name(self):
    method dump_config (line 62) | def dump_config(self) -> dict:
    method aggregation (line 65) | def aggregation(self):
    method higher_is_better (line 68) | def higher_is_better(self):
    method eval_docs (line 73) | def eval_docs(self):
    method has_training_docs (line 77) | def has_training_docs(self):
    method has_validation_docs (line 80) | def has_validation_docs(self):
    method has_test_docs (line 83) | def has_test_docs(self):
    method test_docs (line 86) | def test_docs(self):
    method doc_to_text (line 89) | def doc_to_text(self, doc):
    method doc_to_target (line 92) | def doc_to_target(self, doc):
    method construct_requests (line 95) | def construct_requests(self, doc, ctx, **kwargs):
    method process_results (line 98) | def process_results(self, doc, results):
  function make_result_acc (line 102) | def make_result_acc(
  class TestEvalResults (line 120) | class TestEvalResults:
    method test_default_fields_are_empty (line 121) | def test_default_fields_are_empty(self):
    method test_fields_are_independent_instances (line 132) | def test_fields_are_independent_instances(self):
  class TestGetSampleSize (line 144) | class TestGetSampleSize:
    method _task (line 145) | def _task(self, n: int = 100):
    method test_limit_none_returns_none (line 148) | def test_limit_none_returns_none(self):
    method test_limit_integer_returns_int (line 151) | def test_limit_integer_returns_int(self):
    method test_limit_fractional_rounds_up (line 154) | def test_limit_fractional_rounds_up(self):
    method test_limit_fractional_small (line 158) | def test_limit_fractional_small(self):
    method test_limit_one_is_treated_as_integer (line 162) | def test_limit_one_is_treated_as_integer(self):
    method test_limit_float_exactly_one_is_integer (line 166) | def test_limit_float_exactly_one_is_integer(self):
  class TestComputeTaskAggregations (line 178) | class TestComputeTaskAggregations:
    method _task (line 179) | def _task(self, agg=None):
    method test_single_metric_mean_aggregation (line 182) | def test_single_metric_mean_aggregation(self):
    method test_stderr_with_bootstrap_iters_zero (line 189) | def test_stderr_with_bootstrap_iters_zero(self):
    method test_stderr_with_bootstrap_iters_none (line 195) | def test_stderr_with_bootstrap_iters_none(self):
    method test_stderr_with_positive_bootstrap_iters (line 201) | def test_stderr_with_positive_bootstrap_iters(self):
    method test_stderr_na_for_single_sample (line 207) | def test_stderr_na_for_single_sample(self):
    method test_fallback_to_mean_for_unknown_metric (line 214) | def test_fallback_to_mean_for_unknown_metric(self):
    method test_multiple_metrics_and_filters (line 221) | def test_multiple_metrics_and_filters(self):
    method test_bleu_metric_bootstrap_cap (line 233) | def test_bleu_metric_bootstrap_cap(self):
  class TestCollectResults (line 246) | class TestCollectResults:
    method _simple_acc (line 247) | def _simple_acc(self):
    method test_single_task_basic_collection (line 257) | def test_single_task_basic_collection(self):
    method test_alias_from_task_config (line 266) | def test_alias_from_task_config(self):
    method test_alias_defaults_to_task_name (line 271) | def test_alias_defaults_to_task_name(self):
    method test_configs_populated (line 278) | def test_configs_populated(self):
    method test_versions_populated (line 283) | def test_versions_populated(self):
    method test_num_fewshot_populated (line 288) | def test_num_fewshot_populated(self):
    method test_higher_is_better_populated (line 293) | def test_higher_is_better_populated(self):
    method test_samples_populated (line 298) | def test_samples_populated(self):
    method test_groups_stored (line 303) | def test_groups_stored(self):
    method test_groups_default_to_empty (line 311) | def test_groups_default_to_empty(self):
    method test_multiple_tasks (line 316) | def test_multiple_tasks(self):
  class TestGetRootGroups (line 335) | class TestGetRootGroups:
    method test_single_root_group (line 336) | def test_single_root_group(self):
    method test_root_excludes_children (line 341) | def test_root_excludes_children(self):
    method test_multiple_independent_roots (line 348) | def test_multiple_independent_roots(self):
    method test_empty_groups (line 355) | def test_empty_groups(self):
    method test_deep_hierarchy (line 358) | def test_deep_hierarchy(self):
    method test_deep_hierarchy_multiple_roots (line 367) | def test_deep_hierarchy_multiple_roots(self):
  class TestCollectGroupsBottomUp (line 399) | class TestCollectGroupsBottomUp:
    method test_single_group_no_children (line 400) | def test_single_group_no_children(self):
    method test_parent_child_order (line 405) | def test_parent_child_order(self):
    method test_deep_hierarchy_order (line 413) | def test_deep_hierarchy_order(self):
    method test_no_duplicates (line 423) | def test_no_duplicates(self):
    method test_empty_groups (line 434) | def test_empty_groups(self):
  class TestAggregateGroups (line 443) | class TestAggregateGroups:
    method test_group_metrics_added_to_results (line 444) | def test_group_metrics_added_to_results(self):
    method test_no_groups_noop (line 467) | def test_no_groups_noop(self):
    method test_bottom_up_aggregation (line 475) | def test_bottom_up_aggregation(self):
  class TestProcessResults (line 512) | class TestProcessResults:
    method _basic_acc (line 513) | def _basic_acc(self):
    method test_returns_eval_results (line 517) | def test_returns_eval_results(self):
    method test_with_groups (line 521) | def test_with_groups(self):
    method test_without_groups (line 529) | def test_without_groups(self):
  class TestGetResultsData (line 540) | class TestGetResultsData:
    method test_preserves_sample_len (line 541) | def test_preserves_sample_len(self):
    method test_alias_not_indented (line 547) | def test_alias_not_indented(self):
    method test_group_with_aggregation_in_group_results (line 571) | def test_group_with_aggregation_in_group_results(self):
    method test_group_without_aggregation_not_in_group_results (line 585) | def test_group_without_aggregation_not_in_group_results(self):
    method test_task_only_in_task_results (line 598) | def test_task_only_in_task_results(self):
  class TestPropagateHigherIsBetter (line 611) | class TestPropagateHigherIsBetter:
    method test_propagation_from_children (line 612) | def test_propagation_from_children(self):
    method test_conflicting_values_set_to_none (line 620) | def test_conflicting_values_set_to_none(self):
    method test_conflicting_values_log_warning (line 630) | def test_conflicting_values_log_warning(self, caplog):
    method test_no_children_in_higher_is_better (line 641) | def test_no_children_in_higher_is_better(self):
    method test_multiple_metrics_mixed (line 650) | def test_multiple_metrics_mixed(self):
    method test_empty_groups_list (line 664) | def test_empty_groups_list(self):
  class TestToEvalResults (line 676) | class TestToEvalResults:
    method _make_eval_acc (line 679) | def _make_eval_acc(self, *, with_group: bool = False, has_aggregation:...
    method test_output_has_required_keys (line 704) | def test_output_has_required_keys(self):
    method test_results_contain_task_metrics (line 718) | def test_results_contain_task_metrics(self):
    method test_n_samples_effective_from_sample_len (line 724) | def test_n_samples_effective_from_sample_len(self):
    method test_groups_key_present_when_group_has_aggregation (line 732) | def test_groups_key_present_when_group_has_aggregation(self):
    method test_groups_key_absent_when_no_group_has_aggregation (line 738) | def test_groups_key_absent_when_no_group_has_aggregation(self):
    method test_groups_key_absent_when_no_groups (line 743) | def test_groups_key_absent_when_no_groups(self):
    method test_samples_included_when_provided (line 748) | def test_samples_included_when_provided(self):
    method test_samples_absent_when_not_provided (line 754) | def test_samples_absent_when_not_provided(self):
    method test_higher_is_better_propagated_to_groups (line 759) | def test_higher_is_better_propagated_to_groups(self):
    method test_configs_sorted (line 765) | def test_configs_sorted(self):
    method test_versions_sorted (line 770) | def test_versions_sorted(self):
  class TestCollectResultsNSamples (line 781) | class TestCollectResultsNSamples:
    method test_n_samples_effective_equals_sample_len (line 784) | def test_n_samples_effective_equals_sample_len(self):
    method test_n_samples_original_from_eval_docs (line 795) | def test_n_samples_original_from_eval_docs(self):

FILE: tests/test_fewshot_context.py
  function default_delimiters (line 19) | def default_delimiters():
  class TestMessage (line 29) | class TestMessage:
    method test_to_dict_excludes_private_fields (line 32) | def test_to_dict_excludes_private_fields(self):
    method test_to_text_appends_delimiter (line 41) | def test_to_text_appends_delimiter(self):
    method test_to_text_empty_delimiter (line 49) | def test_to_text_empty_delimiter(self):
  class TestMaybeDelimit (line 63) | class TestMaybeDelimit:
    method test_both_present_no_whitespace (line 66) | def test_both_present_no_whitespace(self):
    method test_prefix_ends_with_space (line 72) | def test_prefix_ends_with_space(self):
    method test_suffix_starts_with_space (line 78) | def test_suffix_starts_with_space(self):
    method test_both_have_whitespace (line 84) | def test_both_have_whitespace(self):
    method test_prefix_only (line 90) | def test_prefix_only(self):
    method test_suffix_only (line 95) | def test_suffix_only(self):
    method test_both_empty (line 100) | def test_both_empty(self):
    method test_custom_delimiter (line 105) | def test_custom_delimiter(self):
  class TestMultiturnToSingleturn (line 117) | class TestMultiturnToSingleturn:
    method test_collapses_user_messages (line 120) | def test_collapses_user_messages(self):
    method test_preserves_final_assistant (line 134) | def test_preserves_final_assistant(self):
    method test_preserves_system_message (line 150) | def test_preserves_system_message(self):
    method test_system_with_assistant_ending (line 164) | def test_system_with_assistant_ending(self):
  function messages_to_text (line 185) | def messages_to_text(msgs: list[Message]) -> str:
  class TestBuildQaTurn (line 190) | class TestBuildQaTurn:
    method task (line 194) | def task(self):
    method test_basic_qa_format (line 198) | def test_basic_qa_format(self, task):
    method test_no_answer_format (line 211) | def test_no_answer_format(self, task):
    method test_choice_with_int_answer (line 220) | def test_choice_with_int_answer(self, task):
    method test_answer_as_string_directly (line 235) | def test_answer_as_string_directly(self, task):
    method test_answer_as_list (line 244) | def test_answer_as_list(self, task):
    method test_gen_prefix_without_answer (line 252) | def test_gen_prefix_without_answer(self, task):
    method test_gen_prefix_with_answer (line 264) | def test_gen_prefix_with_answer(self, task):
    method test_gen_prefix_spacing_added_when_needed (line 282) | def test_gen_prefix_spacing_added_when_needed(self, task):
    method test_gen_prefix_no_extra_space_when_prefix_has_trailing (line 290) | def test_gen_prefix_no_extra_space_when_prefix_has_trailing(self, task):
    method test_gen_prefix_no_extra_space_when_answer_has_leading (line 298) | def test_gen_prefix_no_extra_space_when_answer_has_leading(self, task):
    method test_gen_prefix_without_answer_preserves_content (line 306) | def test_gen_prefix_without_answer_preserves_content(self, task):
    method test_gen_prefix_with_trailing_space_without_answer (line 319) | def test_gen_prefix_with_trailing_space_without_answer(self, task):
    method test_custom_delimiters (line 327) | def test_custom_delimiters(self, task):
    method test_empty_delimiters (line 335) | def test_empty_delimiters(self, task):
    method test_whitespace_delimiter_matrix (line 343) | def test_whitespace_delimiter_matrix(self, task):
    method test_raises_on_non_string_question (line 419) | def test_raises_on_non_string_question(self, task):
    method test_answer_index_zero_uses_delimiter (line 424) | def test_answer_index_zero_uses_delimiter(self, task):
    method test_answer_index_nonzero_uses_delimiter (line 443) | def test_answer_index_nonzero_uses_delimiter(self, task):
  class TestFewshotContext (line 460) | class TestFewshotContext:
    method test_zero_shot_format (line 463) | def test_zero_shot_format(self, mock_configurable_task):
    method test_one_shot_format (line 476) | def test_one_shot_format(self, mock_configurable_task):
    method test_two_shot_format (line 491) | def test_two_shot_format(self, mock_configurable_task):
    method test_with_system_instruction (line 509) | def test_with_system_instruction(self, mock_configurable_task):
    method test_with_description (line 525) | def test_with_description(self, mock_configurable_task):
    method test_system_instruction_and_description (line 539) | def test_system_instruction_and_description(self, mock_configurable_ta...
    method test_with_choices (line 557) | def test_with_choices(self, mock_configurable_task):
    method test_custom_delimiters (line 582) | def test_custom_delimiters(self, mock_configurable_task):
    method test_gen_prefix_in_fewshot (line 601) | def test_gen_prefix_in_fewshot(self, mock_configurable_task):
    method test_sampler_excludes_eval_doc_when_same_split (line 622) | def test_sampler_excludes_eval_doc_when_same_split(self, mock_configur...
    method test_sampler_no_exclusion_when_different_split (line 640) | def test_sampler_no_exclusion_when_different_split(self, mock_configur...
    method test_chat_template_multiturn (line 658) | def test_chat_template_multiturn(self, mock_configurable_task):
    method test_chat_template_singleturn (line 688) | def test_chat_template_singleturn(self, mock_configurable_task):
  class TestChatTemplateFormat (line 722) | class TestChatTemplateFormat:
    method test_messages_to_dict_list (line 725) | def test_messages_to_dict_list(self):
    method test_singleturn_collapse_for_chat (line 741) | def test_singleturn_collapse_for_chat(self):

FILE: tests/test_group.py
  class MockTask (line 18) | class MockTask(Task):
    method __init__ (line 23) | def __init__(self, task_name: str):
    method task_name (line 27) | def task_name(self):
    method has_training_docs (line 30) | def has_training_docs(self):
    method has_validation_docs (line 33) | def has_validation_docs(self):
    method has_test_docs (line 36) | def has_test_docs(self):
    method test_docs (line 39) | def test_docs(self):
    method doc_to_text (line 42) | def doc_to_text(self, doc):
    method doc_to_target (line 45) | def doc_to_target(self, doc):
    method construct_requests (line 48) | def construct_requests(self, doc, ctx, **kwargs):
    method process_results (line 51) | def process_results(self, doc, results):
    method aggregation (line 54) | def aggregation(self):
    method higher_is_better (line 57) | def higher_is_better(self):
  class TestAggMetricConfig (line 61) | class TestAggMetricConfig:
    method test_default_filter_list_is_none (line 64) | def test_default_filter_list_is_none(self):
    method test_explicit_filter_list (line 69) | def test_explicit_filter_list(self):
    method test_string_filter_normalized_to_list (line 74) | def test_string_filter_normalized_to_list(self):
    method test_empty_filter_list (line 80) | def test_empty_filter_list(self):
    method test_multiple_filters (line 85) | def test_multiple_filters(self):
    method test_default_aggregation_is_mean (line 90) | def test_default_aggregation_is_mean(self):
    method test_default_weight_by_size_is_true (line 95) | def test_default_weight_by_size_is_true(self):
  class TestGroupFilterDiscovery (line 101) | class TestGroupFilterDiscovery:
    method setup_method (line 104) | def setup_method(self):
    method test_discover_filters_single_filter (line 132) | def test_discover_filters_single_filter(self):
    method test_discover_filters_multiple_filters (line 147) | def test_discover_filters_multiple_filters(self):
    method test_discover_filters_no_matches (line 156) | def test_discover_filters_no_matches(self):
    method test_discover_filters_excludes_stderr (line 166) | def test_discover_filters_excludes_stderr(self):
    method test_discover_filters_partial_availability (line 185) | def test_discover_filters_partial_availability(self):
  class TestGroupAggregation (line 196) | class TestGroupAggregation:
    method setup_method (line 199) | def setup_method(self):
    method test_auto_discovery_aggregates_all_filters (line 225) | def test_auto_discovery_aggregates_all_filters(self):
    method test_explicit_filter_list_backward_compatibility (line 253) | def test_explicit_filter_list_backward_compatibility(self):
    method test_multiple_explicit_filters (line 271) | def test_multiple_explicit_filters(self):
    method test_empty_filter_list_no_aggregation (line 289) | def test_empty_filter_list_no_aggregation(self):
    method test_multiple_metrics_auto_discovery (line 308) | def test_multiple_metrics_auto_discovery(self):
    method test_mixed_auto_and_explicit_filters (line 344) | def test_mixed_auto_and_explicit_filters(self):
    method test_stderr_aggregation_with_auto_discovery (line 379) | def test_stderr_aggregation_with_auto_discovery(self):
    method test_sample_len_count_with_auto_discovery (line 404) | def test_sample_len_count_with_auto_discovery(self):
    method test_sample_count_per_metric_with_asymmetric_filters (line 426) | def test_sample_count_per_metric_with_asymmetric_filters(self):
  class TestGroupWeightedAggregation (line 441) | class TestGroupWeightedAggregation:
    method test_weighted_aggregation_auto_discovery (line 444) | def test_weighted_aggregation_auto_discovery(self):
  class TestGroupEdgeCases (line 490) | class TestGroupEdgeCases:
    method test_no_aggregation_config (line 493) | def test_no_aggregation_config(self):
    method test_task_not_in_metrics (line 504) | def test_task_not_in_metrics(self):
    method test_metric_missing_in_some_tasks (line 528) | def test_metric_missing_in_some_tasks(self, caplog):
  class TestGroup (line 573) | class TestGroup:
    method setup_method (line 576) | def setup_method(self):
    method test_add_task_uses_task_name (line 582) | def test_add_task_uses_task_name(self):
    method test_add_group_uses_name (line 588) | def test_add_group_uses_name(self):
    method test_pop_existing_child (line 595) | def test_pop_existing_child(self):
    method test_pop_nonexistent_child_no_error (line 602) | def test_pop_nonexistent_child_no_error(self):
    method test_get_existing (line 607) | def test_get_existing(self):
    method test_get_missing_returns_none (line 612) | def test_get_missing_returns_none(self):
    method test_contains_present (line 618) | def test_contains_present(self):
    method test_contains_absent (line 623) | def test_contains_absent(self):
    method test_iter_yields_child_values (line 629) | def test_iter_yields_child_values(self):
    method test_len (line 640) | def test_len(self):
    method test_get_all_tasks_recursive (line 650) | def test_get_all_tasks_recursive(self):
    method test_get_all_tasks_non_recursive (line 662) | def test_get_all_tasks_non_recursive(self):
    method test_get_all_groups_recursive (line 674) | def test_get_all_groups_recursive(self):
    method test_get_all_groups_non_recursive (line 686) | def test_get_all_groups_non_recursive(self):
    method test_child_names_returns_keys (line 698) | def test_child_names_returns_keys(self):
    method test_has_aggregation_true (line 706) | def test_has_aggregation_true(self):
    method test_has_aggregation_false_none (line 713) | def test_has_aggregation_false_none(self):
    method test_has_aggregation_false_empty (line 717) | def test_has_aggregation_false_empty(self):
    method test_repr (line 723) | def test_repr(self):
  class TestGroupSerialization (line 732) | class TestGroupSerialization:
    method test_to_dict_round_trip (line 735) | def test_to_dict_round_trip(self):
    method test_from_config_basic (line 753) | def test_from_config_basic(self):
    method test_from_config_single_dict_agg_metric (line 775) | def test_from_config_single_dict_agg_metric(self):
    method test_from_config_missing_group_key (line 786) | def test_from_config_missing_group_key(self):
    method test_to_dict_no_optional_fields (line 792) | def test_to_dict_no_optional_fields(self):
  class TestAggMetricConfigValidation (line 803) | class TestAggMetricConfigValidation:
    method test_invalid_aggregation_raises (line 806) | def test_invalid_aggregation_raises(self):
    method test_callable_aggregation_allowed (line 810) | def test_callable_aggregation_allowed(self):

FILE: tests/test_janitor.py
  function simple_ngram (line 39) | def simple_ngram(sequence, n):
  function test_form_ngrams (line 51) | def test_form_ngrams():
  function test_word_ngrams (line 62) | def test_word_ngrams():
  function test_split_indices (line 76) | def test_split_indices():
  function test_word_ngrams_indices (line 106) | def test_word_ngrams_indices():
  function test_janitor1 (line 142) | def test_janitor1():
  function test_janitor2 (line 185) | def test_janitor2():
  function test_janitor3 (line 222) | def test_janitor3():
  function test_janitor4 (line 256) | def test_janitor4():
  function test_janitor5 (line 298) | def test_janitor5():
  function test_janitor6 (line 342) | def test_janitor6():
  function test_janitor7 (line 394) | def test_janitor7():
  function test_janitor8 (line 449) | def test_janitor8():

FILE: tests/test_metrics.py
  class MockConfigurableTask (line 8) | class MockConfigurableTask(ConfigurableTask):
    method __init__ (line 11) | def __init__(self):
    method doc_to_choice (line 36) | def doc_to_choice(self, doc):
    method doc_to_target (line 39) | def doc_to_target(self, doc):
    method has_training_docs (line 43) | def has_training_docs(self):
    method has_validation_docs (line 46) | def has_validation_docs(self):
    method has_test_docs (line 49) | def has_test_docs(self):
    method download (line 52) | def download(self, **kwargs):
  function test_acc_mutual_info_slicing (line 56) | def test_acc_mutual_info_slicing():
  function test_acc_mutual_info_different_predictions (line 93) | def test_acc_mutual_info_different_predictions():
  function test_acc_mutual_info_without_metric (line 127) | def test_acc_mutual_info_without_metric():
  function test_bootstrap_internal_no_mp (line 156) | def test_bootstrap_internal_no_mp():
  function test_dict_metric_uses_custom_aggregation (line 181) | def test_dict_metric_uses_custom_aggregation():

FILE: tests/test_misc.py
  function test_bootstrapping (line 8) | def test_bootstrapping():

FILE: tests/test_prompt.py
  function test_mmlu_prompt_rendering (line 196) | def test_mmlu_prompt_rendering(

FILE: tests/test_registry.py
  class TestRegistryBasics (line 27) | class TestRegistryBasics:
    method test_create_registry (line 30) | def test_create_registry(self):
    method test_decorator_registration (line 36) | def test_decorator_registration(self):
    method test_decorator_multiple_aliases (line 48) | def test_decorator_multiple_aliases(self):
    method test_decorator_auto_name (line 60) | def test_decorator_auto_name(self):
    method test_lazy_registration (line 70) | def test_lazy_registration(self):
    method test_unknown_key_error (line 87) | def test_unknown_key_error(self):
    method test_default_value (line 96) | def test_default_value(self):
    method test_iteration (line 104) | def test_iteration(self):
    method test_contains (line 115) | def test_contains(self):
    method test_keys_values_items (line 123) | def test_keys_values_items(self):
  class TestRegistryCollisions (line 134) | class TestRegistryCollisions:
    method test_duplicate_raises_error (line 137) | def test_duplicate_raises_error(self):
    method test_placeholder_upgrade (line 153) | def test_placeholder_upgrade(self):
    method test_same_object_no_error (line 170) | def test_same_object_no_error(self):
  class TestRegistryFreeze (line 183) | class TestRegistryFreeze:
    method test_freeze (line 186) | def test_freeze(self):
    method test_freeze_all (line 199) | def test_freeze_all(self):
  class TestRegistryThreadSafety (line 207) | class TestRegistryThreadSafety:
    method test_concurrent_registration (line 210) | def test_concurrent_registration(self):
    method test_concurrent_access (line 232) | def test_concurrent_access(self):
  class TestModelRegistry (line 255) | class TestModelRegistry:
    method test_model_registry_exists (line 258) | def test_model_registry_exists(self):
    method test_lazy_model_loading (line 262) | def test_lazy_model_loading(self):
    method test_get_model_error (line 277) | def test_get_model_error(self):
  class TestFilterRegistry (line 285) | class TestFilterRegistry:
    method test_filter_registry_exists (line 288) | def test_filter_registry_exists(self):
    method test_register_filter (line 292) | def test_register_filter(self):
    method test_get_filter_callable (line 304) | def test_get_filter_callable(self):
  class TestMetricRegistry (line 313) | class TestMetricRegistry:
    method test_metric_registry_exists (line 316) | def test_metric_registry_exists(self):
    method test_aggregation_registry_exists (line 320) | def test_aggregation_registry_exists(self):
    method test_register_aggregation (line 324) | def test_register_aggregation(self):
    method test_register_metric (line 334) | def test_register_metric(self):
    method test_builtin_metrics_loaded (line 355) | def test_builtin_metrics_loaded(self):
  class TestBackwardCompatibility (line 365) | class TestBackwardCompatibility:
    method test_registry_aliases (line 368) | def test_registry_aliases(self):
  class TestRegistryClear (line 387) | class TestRegistryClear:
    method test_clear (line 390) | def test_clear(self):

FILE: tests/test_requests_caching.py
  function setup_and_teardown (line 26) | def setup_and_teardown():
  function clear_cache (line 35) | def clear_cache():
  function get_cache_files (line 44) | def get_cache_files(tasks: Optional[List[str]] = None) -> Tuple[List[str...
  function assert_created (line 57) | def assert_created(tasks: List[str], file_task_names: List[str]):
  function requests_caching_true (line 65) | def requests_caching_true(tasks: List[str]):
  function requests_caching_refresh (line 74) | def requests_caching_refresh(tasks: List[str]):
  function requests_caching_delete (line 94) | def requests_caching_delete(tasks: List[str]):
  function run_tests (line 108) | def run_tests():

FILE: tests/test_samplers.py
  function sample_docs (line 21) | def sample_docs() -> list[dict]:
  function large_docs (line 33) | def large_docs() -> list[dict]:
  class TestContextSampler (line 43) | class TestContextSampler:
    method test_sample_returns_exactly_n_documents (line 47) | def test_sample_returns_exactly_n_documents(self, sample_docs, n):
    method test_sample_with_seed_is_reproducible (line 55) | def test_sample_with_seed_is_reproducible(self, sample_docs):
    method test_different_seeds_produce_different_samples (line 65) | def test_different_seeds_produce_different_samples(self, large_docs):
    method test_sample_zero_returns_empty (line 75) | def test_sample_zero_returns_empty(self, sample_docs):
    method test_sample_negative_raises (line 83) | def test_sample_negative_raises(self, sample_docs):
    method test_sample_excludes_eval_doc (line 90) | def test_sample_excludes_eval_doc(self, sample_docs):
    method test_sample_all_docs_with_exclusion (line 100) | def test_sample_all_docs_with_exclusion(self, sample_docs):
    method test_fewshot_indices_filters_documents (line 110) | def test_fewshot_indices_filters_documents(self, sample_docs):
    method test_set_rnd_changes_random_state (line 123) | def test_set_rnd_changes_random_state(self, large_docs):
    method test_replace_df_updates_documents (line 133) | def test_replace_df_updates_documents(self, sample_docs):
    method test_replace_df_resets_loaded_state (line 143) | def test_replace_df_resets_loaded_state(self, sample_docs):
    method test_empty_df_raises_on_sample (line 156) | def test_empty_df_raises_on_sample(self):
    method test_none_df_defaults_to_empty (line 163) | def test_none_df_defaults_to_empty(self):
    method test_sample_with_df_override (line 169) | def test_sample_with_df_override(self, sample_docs, large_docs):
  class TestRmEvalDoc (line 180) | class TestRmEvalDoc:
    method test_removes_matching_doc (line 183) | def test_removes_matching_doc(self):
    method test_limits_to_n_results (line 193) | def test_limits_to_n_results(self):
    method test_no_match_returns_all (line 203) | def test_no_match_returns_all(self):
  class TestFirstNSampler (line 218) | class TestFirstNSampler:
    method test_returns_first_n_in_order (line 221) | def test_returns_first_n_in_order(self, sample_docs):
    method test_is_deterministic (line 232) | def test_is_deterministic(self, sample_docs):
    method test_sample_all (line 242) | def test_sample_all(self, sample_docs):
    method test_exceeding_available_raises (line 250) | def test_exceeding_available_raises(self, sample_docs):
    method test_ignores_eval_doc (line 257) | def test_ignores_eval_doc(self, sample_docs):
  class TestSamplerRegistry (line 273) | class TestSamplerRegistry:
    method test_registry_contains_default (line 276) | def test_registry_contains_default(self):
    method test_registry_contains_first_n (line 281) | def test_registry_contains_first_n(self):
    method test_get_sampler_returns_class (line 286) | def test_get_sampler_returns_class(self):
    method test_get_sampler_unknown_raises_keyerror (line 293) | def test_get_sampler_unknown_raises_keyerror(self):
    method test_get_sampler_error_lists_available (line 298) | def test_get_sampler_error_lists_available(self):
  class TestSamplerIntegration (line 309) | class TestSamplerIntegration:
    method test_method_chaining (line 312) | def test_method_chaining(self, sample_docs):
    method test_sampler_from_registry (line 320) | def test_sampler_from_registry(self, sample_docs):
    method test_first_n_from_registry (line 329) | def test_first_n_from_registry(self, sample_docs):

FILE: tests/test_task_manager.py
  function custom_task_name (line 18) | def custom_task_name():
  function custom_task_tag (line 23) | def custom_task_tag():
  function task_yaml (line 28) | def task_yaml(pytestconfig, custom_task_name, custom_task_tag):
  function task_code (line 38) | def task_code():
  function custom_task_files_dir (line 57) | def custom_task_files_dir(task_yaml, task_code, custom_task_name):
  function test_python_task_inclusion (line 68) | def test_python_task_inclusion(
  class TestConfigLoader (line 89) | class TestConfigLoader:
    method test_load_simple_yaml (line 90) | def test_load_simple_yaml(self, tmp_path):
    method test_load_yaml_with_include (line 106) | def test_load_yaml_with_include(self, tmp_path):
    method test_load_yaml_with_function_tag_resolved (line 130) | def test_load_yaml_with_function_tag_resolved(self, tmp_path):
    method test_load_yaml_without_function_resolution (line 148) | def test_load_yaml_without_function_resolution(self, tmp_path):
    method test_load_yaml_recursive_includes (line 163) | def test_load_yaml_recursive_includes(self, tmp_path):
    method test_load_yaml_cycle_detection (line 188) | def test_load_yaml_cycle_detection(self, tmp_path):
  class TestKind (line 210) | class TestKind:
    method test_kind_enum_values (line 211) | def test_kind_enum_values(self):
  class TestEntry (line 219) | class TestEntry:
    method test_entry_dataclass_fields (line 220) | def test_entry_dataclass_fields(self):
  class TestTaskIndex (line 236) | class TestTaskIndex:
    method test_build_from_directory (line 237) | def test_build_from_directory(self, tmp_path):
    method test_deterministic_traversal (line 252) | def test_deterministic_traversal(self, tmp_path):
    method test_duplicate_task_detection (line 267) | def test_duplicate_task_detection(self, tmp_path, caplog):
    method test_duplicate_group_detection (line 287) | def test_duplicate_group_detection(self, tmp_path, caplog):
    method test_kind_detection_task (line 312) | def test_kind_detection_task(self):
    method test_kind_detection_group (line 318) | def test_kind_detection_group(self):
    method test_kind_detection_py_task (line 324) | def test_kind_detection_py_task(self):
    method test_tag_registration (line 330) | def test_tag_registration(self, tmp_path):
    method test_ignore_pycache (line 347) | def test_ignore_pycache(self, tmp_path):
  function shared_task_manager (line 366) | def shared_task_manager():
  function test_configs_task_manager (line 372) | def test_configs_task_manager():
  class TestTaskManagerIntegration (line 378) | class TestTaskManagerIntegration:
    method test_initialization (line 379) | def test_initialization(self, shared_task_manager):
    method test_all_tasks_sorted (line 383) | def test_all_tasks_sorted(self, shared_task_manager):
    method test_all_groups_property (line 388) | def test_all_groups_property(self, shared_task_manager):
    method test_all_subtasks_property (line 398) | def test_all_subtasks_property(self, shared_task_manager):
    method test_all_tags_property (line 406) | def test_all_tags_property(self, shared_task_manager):
    method test_load_task_by_name (line 416) | def test_load_task_by_name(self, test_configs_task_manager):
    method test_load_group_by_name (line 421) | def test_load_group_by_name(self, test_configs_task_manager):
    method test_load_tag_by_name (line 431) | def test_load_tag_by_name(self, shared_task_manager):
    method test_include_path (line 438) | def test_include_path(self):
    method test_include_defaults_false (line 445) | def test_include_defaults_false(self):
    method test_include_resolution (line 454) | def test_include_resolution(self):
    method test_include_inheritance_override (line 461) | def test_include_inheritance_override(self):
    method test_include_custom_metrics (line 476) | def test_include_custom_metrics(self):
    method test_group_loading (line 489) | def test_group_loading(self):
    method test_include_group (line 496) | def test_include_group(self):
    method test_match_tasks_glob (line 507) | def test_match_tasks_glob(self, shared_task_manager):
    method test_name_is_registered (line 513) | def test_name_is_registered(self, shared_task_manager):
    method test_name_is_task_tag (line 518) | def test_name_is_task_tag(self, shared_task_manager):
    method test_include_path_precedence (line 526) | def test_include_path_precedence(self, shared_task_manager):
    method test_include_defaults_false_with_custom_path (line 585) | def test_include_defaults_false_with_custom_path(self):
    method test_include_defaults_true_with_new_tasks (line 637) | def test_include_defaults_true_with_new_tasks(self, shared_task_manager):
    method test_tag_expansion_in_group (line 691) | def test_tag_expansion_in_group(self, test_configs_task_manager):
    method test_nested_group_with_tag (line 715) | def test_nested_group_with_tag(self, test_configs_task_manager):
    method test_inline_subgroup_syntax (line 746) | def test_inline_subgroup_syntax(self, test_configs_task_manager):
  class TestTaskManagerLoad (line 782) | class TestTaskManagerLoad:
    method test_load_task_by_name (line 789) | def test_load_task_by_name(self, test_configs_task_manager):
    method test_load_group_by_name (line 794) | def test_load_group_by_name(self, test_configs_task_manager):
    method test_load_group_map (line 802) | def test_load_group_map(self, test_configs_task_manager):
    method test_load_tag_by_name (line 810) | def test_load_tag_by_name(self, shared_task_manager):
    method test_include_inheritance_override (line 818) | def test_include_inheritance_override(self):
    method test_include_custom_metrics (line 829) | def test_include_custom_metrics(self):
    method test_tag_expansion_in_group (line 842) | def test_tag_expansion_in_group(self, test_configs_task_manager):
    method test_nested_group_with_tag (line 852) | def test_nested_group_with_tag(self, test_configs_task_manager):
    method test_include_path_precedence (line 865) | def test_include_path_precedence(self, shared_task_manager):
    method test_load_returns_same_tasks_as_legacy (line 902) | def test_load_returns_same_tasks_as_legacy(self, test_configs_task_man...
  class TestGroupBuilding (line 926) | class TestGroupBuilding:
    method tm (line 934) | def tm(self):
    method test_existing_group_ref_has_children (line 940) | def test_existing_group_ref_has_children(self, tm):
    method test_existing_group_ref_overrides_propagate (line 961) | def test_existing_group_ref_overrides_propagate(self, tm):
    method test_group_level_config_propagates_to_children (line 978) | def test_group_level_config_propagates_to_children(self, tm):
    method test_caller_overrides_beat_group_defaults (line 993) | def test_caller_overrides_beat_group_defaults(self, tm):
    method test_mixed_members_string_ref (line 1009) | def test_mixed_members_string_ref(self, tm):
    method test_mixed_members_dict_with_overrides (line 1018) | def test_mixed_members_dict_with_overrides(self, tm):
    method test_mixed_members_inline_subgroup (line 1024) | def test_mixed_members_inline_subgroup(self, tm):
    method test_empty_group_has_no_children (line 1041) | def test_empty_group_has_no_children(self, tm):
    method test_parse_aggregation_with_list (line 1055) | def test_parse_aggregation_with_list(self):
    method test_parse_aggregation_single_dict_normalized (line 1076) | def test_parse_aggregation_single_dict_normalized(self):
    method test_parse_aggregation_missing_returns_none (line 1093) | def test_parse_aggregation_missing_returns_none(self):
    method test_group_alias_preserved (line 1105) | def test_group_alias_preserved(self, tm):
    method test_group_metadata_includes_factory_meta (line 1114) | def test_group_metadata_includes_factory_meta(self):
    method test_deeply_nested_get_all_tasks_recursive (line 1129) | def test_deeply_nested_get_all_tasks_recursive(self, tm):
    method test_deeply_nested_get_all_tasks_non_recursive (line 1141) | def test_deeply_nested_get_all_tasks_non_recursive(self, tm):

FILE: tests/test_tasks.py
  function get_new_tasks_else_default (line 20) | def get_new_tasks_else_default():
  function task_class (line 32) | def task_class(
  function limit (line 50) | def limit() -> int:
  class BaseTasks (line 54) | class BaseTasks:
    method test_download (line 59) | def test_download(self, task_class: ConfigurableTask):
    method test_has_training_docs (line 63) | def test_has_training_docs(self, task_class: ConfigurableTask):
    method test_check_training_docs (line 66) | def test_check_training_docs(self, task_class: ConfigurableTask):
    method test_has_validation_docs (line 70) | def test_has_validation_docs(self, task_class):
    method test_check_validation_docs (line 73) | def test_check_validation_docs(self, task_class):
    method test_has_test_docs (line 77) | def test_has_test_docs(self, task_class):
    method test_check_test_docs (line 80) | def test_check_test_docs(self, task_class):
    method test_should_decontaminate (line 85) | def test_should_decontaminate(self, task_class):
    method test_doc_to_text (line 91) | def test_doc_to_text(self, task_class, limit):
    method test_create_choices (line 114) | def test_create_choices(self, task_class, limit):
    method test_doc_to_target (line 126) | def test_doc_to_target(self, task_class, limit):
    method test_build_all_requests (line 138) | def test_build_all_requests(self, task_class, limit):
    method test_construct_requests (line 143) | def test_construct_requests(self, task_class, limit):
  class TestNewTasksElseDefault (line 165) | class TestNewTasksElseDefault(BaseTasks):

FILE: tests/test_unitxt_tasks.py
  function limit (line 17) | def limit() -> int:
  class TestUnitxtTasks (line 28) | class TestUnitxtTasks(BaseTasks):
    method test_check_training_docs (line 35) | def test_check_training_docs(self, task_class: ConfigurableTask):
    method test_check_validation_docs (line 39) | def test_check_validation_docs(self, task_class):
    method test_check_test_docs (line 43) | def test_check_test_docs(self, task_class):
    method test_doc_to_text (line 48) | def test_doc_to_text(self, task_class, limit: int):

FILE: tests/test_utils.py
  function test_get_rolling_token_windows_v1 (line 25) | def test_get_rolling_token_windows_v1():
  function test_get_rolling_token_windows_v2 (line 55) | def test_get_rolling_token_windows_v2():
  function test_get_rolling_token_windows_v3 (line 84) | def test_get_rolling_token_windows_v3():
  function test_get_rolling_token_windows_v4 (line 129) | def test_get_rolling_token_windows_v4():
  function test_get_rolling_token_windows_v5 (line 170) | def test_get_rolling_token_windows_v5():
  function test_get_rolling_token_windows_v6 (line 199) | def test_get_rolling_token_windows_v6():
  function test_get_rolling_token_windows_empty (line 223) | def test_get_rolling_token_windows_empty():
  function test_make_disjoint_window (line 236) | def test_make_disjoint_window():
  class TestCollator (line 245) | class TestCollator:
    method make_generate_sample (line 246) | def make_generate_sample(self, end=10):
    method make_loglikelihood_sample (line 259) | def make_loglikelihood_sample(self, end=11):
    method make_loglikelihood_sample_group (line 266) | def make_loglikelihood_sample_group(self, end=11):
    method test_generations (line 275) | def test_generations(self, batch_size, end):
    method test_loglikelihood (line 305) | def test_loglikelihood(self, batch_size, end):
    method test_context_grouping (line 328) | def test_context_grouping(self, batch_size):
  function test_aggregate_mean (line 369) | def test_aggregate_mean():
  function test_aggregate_stderrs (line 388) | def test_aggregate_stderrs(samples):
  function test_remote_tokenizer_custom_cert_and_token (line 405) | def test_remote_tokenizer_custom_cert_and_token(monkeypatch):
  function test_remote_tokenizer_no_cert (line 433) | def test_remote_tokenizer_no_cert(monkeypatch):
  function test_remote_tokenizer_http_url (line 458) | def test_remote_tokenizer_http_url(monkeypatch):
  function test_check_remote_tokenizer_support (line 482) | def test_check_remote_tokenizer_support(monkeypatch):
  function test_apply_chat_template (line 520) | def test_apply_chat_template(monkeypatch):
  class TestRequiresDelimiter (line 552) | class TestRequiresDelimiter:
    method test_no_whitespace_requires_delimiter (line 555) | def test_no_whitespace_requires_delimiter(self):
    method test_prefix_ends_with_space (line 559) | def test_prefix_ends_with_space(self):
    method test_suffix_starts_with_space (line 563) | def test_suffix_starts_with_space(self):
    method test_both_have_whitespace (line 567) | def test_both_have_whitespace(self):
    method test_prefix_ends_with_newline (line 571) | def test_prefix_ends_with_newline(self):
    method test_suffix_starts_with_tab (line 575) | def test_suffix_starts_with_tab(self):
  class TestMaybeDelimit (line 580) | class TestMaybeDelimit:
    method test_both_present_no_whitespace (line 583) | def test_both_present_no_whitespace(self):
    method test_both_present_prefix_has_space (line 587) | def test_both_present_prefix_has_space(self):
    method test_both_present_suffix_has_space (line 591) | def test_both_present_suffix_has_space(self):
    method test_custom_delimiter (line 595) | def test_custom_delimiter(self):
    method test_prefix_is_none (line 599) | def test_prefix_is_none(self):
    method test_prefix_is_empty (line 603) | def test_prefix_is_empty(self):
    method test_suffix_is_none (line 607) | def test_suffix_is_none(self):
    method test_suffix_is_empty (line 611) | def test_suffix_is_empty(self):
    method test_both_none (line 615) | def test_both_none(self):
    method test_both_empty (line 619) | def test_both_empty(self):
    method test_newline_delimiter (line 623) | def test_newline_delimiter(self):
    method test_prefix_ends_with_newline_no_extra_delimiter (line 627) | def test_prefix_ends_with_newline_no_extra_delimiter(self):
  class TestHandleArgString (line 632) | class TestHandleArgString:
    method test_bool_true (line 635) | def test_bool_true(self):
    method test_bool_false (line 640) | def test_bool_false(self):
    method test_none (line 644) | def test_none(self):
    method test_positive_int (line 648) | def test_positive_int(self):
    method test_negative_int (line 652) | def test_negative_int(self):
    method test_float (line 656) | def test_float(self):
    method test_negative_float (line 660) | def test_negative_float(self):
    method test_scientific_notation (line 663) | def test_scientific_notation(self):
    method test_plain_string (line 667) | def test_plain_string(self):
    method test_explicit_quoted_string_preserves_numeric (line 670) | def test_explicit_quoted_string_preserves_numeric(self):
    method test_explicit_single_quoted_string (line 675) | def test_explicit_single_quoted_string(self):
    method test_empty_string (line 679) | def test_empty_string(self):
    method test_whitespace_stripped (line 682) | def test_whitespace_stripped(self):
  class TestSimpleParseArgsString (line 686) | class TestSimpleParseArgsString:
    method test_basic_parsing (line 689) | def test_basic_parsing(self):
    method test_numeric_revision_stays_int_by_default (line 693) | def test_numeric_revision_stays_int_by_default(self):
    method test_quoted_revision_stays_string (line 698) | def test_quoted_revision_stays_string(self):
    method test_none_input (line 704) | def test_none_input(self):
    method test_empty_input (line 707) | def test_empty_input(self):
    method test_bool_and_float_coercion (line 710) | def test_bool_and_float_coercion(self):

FILE: tests/utils.py
  function load_changed_files (line 13) | def load_changed_files(file_path: str) -> list[str]:
  function parser (line 24) | def parser(full_path: list[str]) -> list[str]:
  function new_tasks (line 36) | def new_tasks() -> list[str] | None: