SYMBOL INDEX (4474 symbols across 720 files) FILE: examples/transformer-lens.py function evaluate_lm_eval (line 12) | def evaluate_lm_eval(lens_model: HookedTransformer, tasks: list[str], **... FILE: lm_eval/__init__.py function __getattr__ (line 17) | def __getattr__(name): FILE: lm_eval/__main__.py function cli_evaluate (line 5) | def cli_evaluate() -> None: FILE: lm_eval/_cli/harness.py class HarnessCLI (line 10) | class HarnessCLI: method __init__ (line 13) | def __init__(self): method parse_args (line 46) | def parse_args(self) -> argparse.Namespace: method execute (line 58) | def execute(self, args: argparse.Namespace) -> None: FILE: lm_eval/_cli/ls.py class List (line 7) | class List(SubCommand): method __init__ (line 10) | def __init__(self, subparsers: argparse._SubParsersAction, *args, **kw... method _add_args (line 51) | def _add_args(self) -> None: method _execute (line 66) | def _execute(self, args: argparse.Namespace) -> None: FILE: lm_eval/_cli/run.py class Run (line 18) | class Run(SubCommand): method __init__ (line 21) | def __init__(self, subparsers: argparse._SubParsersAction, *args, **kw... method _add_args (line 49) | def _add_args(self) -> None: method _execute (line 338) | def _execute(args: argparse.Namespace) -> None: FILE: lm_eval/_cli/subcommand.py class SubCommand (line 5) | class SubCommand(ABC): method __init__ (line 8) | def __init__(self, *args, **kwargs): method create (line 12) | def create(cls, subparsers: argparse._SubParsersAction): method _add_args (line 17) | def _add_args(self) -> None: FILE: lm_eval/_cli/utils.py function try_parse_json (line 12) | def try_parse_json(value: str | dict[str, Any] | None) -> str | dict[str... function _int_or_none_list_arg_type (line 28) | def _int_or_none_list_arg_type( function request_caching_arg_to_dict (line 66) | def request_caching_arg_to_dict(cache_requests: str | None) -> dict[str,... function check_argument_types (line 81) | def check_argument_types(parser: argparse.ArgumentParser) -> None: function handle_cli_value_string (line 95) | def handle_cli_value_string(arg: str) -> bool | int | float | str: function key_val_to_dict (line 111) | def key_val_to_dict(args: str) -> dict[str, Any]: class MergeDictAction (line 125) | class MergeDictAction(argparse.Action): method __call__ (line 128) | def __call__( class SplitArgs (line 159) | class SplitArgs(argparse.Action): method __call__ (line 160) | def __call__(self, parser, namespace, values, option_string=None): FILE: lm_eval/_cli/validate.py class Validate (line 8) | class Validate(SubCommand): method __init__ (line 11) | def __init__(self, subparsers: argparse._SubParsersAction, *args, **kw... method _add_args (line 78) | def _add_args(self) -> None: method _execute (line 95) | def _execute(self, args: argparse.Namespace) -> None: FILE: lm_eval/api/filter.py class Filter (line 8) | class Filter(ABC): method __init__ (line 17) | def __init__(self, **kwargs) -> None: method apply (line 23) | def apply(self, resps: Union[List, Iterable], docs: List[dict]) -> Ite... class FilterEnsemble (line 34) | class FilterEnsemble: method apply (line 45) | def apply(self, instances: List[Instance]) -> None: FILE: lm_eval/api/group.py class Group (line 34) | class Group: method add (line 61) | def add(self, item: Task | Group) -> None: method pop (line 69) | def pop(self, name: str) -> Group | Task | None: method get (line 73) | def get(self, name: str) -> Task | Group | None: method __contains__ (line 77) | def __contains__(self, name: str) -> bool: method __iter__ (line 81) | def __iter__(self): method __len__ (line 85) | def __len__(self) -> int: method get_all_tasks (line 91) | def get_all_tasks(self, recursive: bool = True) -> list[Task]: method get_all_groups (line 112) | def get_all_groups(self, recursive: bool = True) -> list[Group]: method child_names (line 132) | def child_names(self) -> list[str]: method version (line 137) | def version(self) -> str: method has_aggregation (line 142) | def has_aggregation(self) -> bool: method _discover_filters_for_metric (line 149) | def _discover_filters_for_metric( method aggregate (line 183) | def aggregate(self, task_metrics: dict[str, _TaskMetrics]) -> _TaskMet... method to_dict (line 285) | def to_dict(self) -> dict[str, Any] | None: method from_config (line 303) | def from_config(cls, config: GroupConfig | dict[str, Any]) -> Group: method __repr__ (line 323) | def __repr__(self): class ConfigurableGroup (line 333) | class ConfigurableGroup(Group): method __init__ (line 336) | def __init__(self, config: dict | GroupConfig | None = None) -> None: method group (line 350) | def group(self): method group_alias (line 354) | def group_alias(self): method version (line 358) | def version(self) -> str: method config (line 364) | def config(self): method group_name (line 368) | def group_name(self): method from_group (line 372) | def from_group(cls, group: Group) -> ConfigurableGroup: method __eq__ (line 385) | def __eq__(self, other): method __hash__ (line 390) | def __hash__(self): method __repr__ (line 393) | def __repr__(self): FILE: lm_eval/api/instance.py class Instance (line 11) | class Instance: method __post_init__ (line 27) | def __post_init__(self) -> None: method args (line 32) | def args(self): FILE: lm_eval/api/metrics.py function bypass_agg (line 23) | def bypass_agg(arr): function nanmean (line 28) | def nanmean(arr): function mean (line 35) | def mean(arr): function median (line 40) | def median(arr): function perplexity (line 47) | def perplexity(items): function weighted_perplexity (line 52) | def weighted_perplexity(items): function bits_per_byte (line 57) | def bits_per_byte(items): function f1_score (line 62) | def f1_score(items): function matthews_corrcoef (line 74) | def matthews_corrcoef(items): function bleu (line 84) | def bleu(items): function chrf (line 102) | def chrf(items): function ter (line 117) | def ter(items): function brier_score (line 133) | def brier_score(items): # This is a passthrough function function brier_score_fn (line 148) | def brier_score_fn(items): # This is a passthrough function function acc_fn (line 158) | def acc_fn(items): # This is a passthrough function function acc_norm_fn (line 168) | def acc_norm_fn(items): # This is a passthrough function function acc_mutual_info_fn (line 178) | def acc_mutual_info_fn(items): # This is a passthrough function function acc_bytes_fn (line 188) | def acc_bytes_fn(items): # This is a passthrough function function exact_match_hf_evaluate (line 210) | def exact_match_hf_evaluate( function exact_match_fn (line 254) | def exact_match_fn(**kwargs): function perplexity_fn (line 264) | def perplexity_fn(items): # This is a passthrough function function likelihood_fn (line 274) | def likelihood_fn(items): # This is a passthrough function function word_perplexity_fn (line 284) | def word_perplexity_fn(items): # This is a passthrough function function byte_perplexity_fn (line 294) | def byte_perplexity_fn(items): # This is a passthrough function function bits_per_byte_fn (line 304) | def bits_per_byte_fn(items): # This is a passthrough function function pop_stddev (line 308) | def pop_stddev(arr): function sample_stddev (line 313) | def sample_stddev(arr: Sequence[T]) -> float: function mean_stderr (line 318) | def mean_stderr(arr): function bypass (line 328) | def bypass(items): function mcc_fn (line 338) | def mcc_fn(items): # This is a passthrough function function f1_fn (line 348) | def f1_fn(items): # This is a passthrough function function bleu_fn (line 358) | def bleu_fn(items): # This is a passthrough function function chrf_fn (line 368) | def chrf_fn(items): # This is a passthrough function function ter_fn (line 378) | def ter_fn(items): # This is a passthrough function function acc_all (line 388) | def acc_all(items): function acc_all_stderr (line 407) | def acc_all_stderr(items): function metric_max_over_ground_truths (line 425) | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): function weighted_mean (line 434) | def weighted_mean(items): function is_non_str_iterable (line 439) | def is_non_str_iterable(obj): function _sacreformat (line 443) | def _sacreformat(refs, preds): class _bootstrap_internal (line 474) | class _bootstrap_internal: method __init__ (line 480) | def __init__(self, f: Callable[[Sequence[T]], float], n: int) -> None: method __call__ (line 484) | def __call__(self, v: tuple[int, Sequence[T]]) -> list[float]: function _bootstrap_internal_no_mp (line 494) | def _bootstrap_internal_no_mp( function bootstrap_stderr (line 516) | def bootstrap_stderr( function stderr_for_metric (line 555) | def stderr_for_metric( function pooled_sample_stderr (line 590) | def pooled_sample_stderr(stderrs: List[float], sizes: List[int]): function combined_sample_stderr (line 608) | def combined_sample_stderr(stderrs: List[float], sizes: List[int], metri... function aggregate_subtask_metrics (line 640) | def aggregate_subtask_metrics(metrics, sizes, weight_by_size=True): FILE: lm_eval/api/model.py class LM (line 25) | class LM(abc.ABC): method __init__ (line 32) | def __init__(self) -> None: method loglikelihood (line 40) | def loglikelihood(self, requests: list["Instance"]) -> list[tuple[floa... method loglikelihood_rolling (line 58) | def loglikelihood_rolling(self, requests: list["Instance"]) -> list[fl... method generate_until (line 100) | def generate_until(self, requests: list["Instance"]) -> list[str]: method apply_chat_template (line 113) | def apply_chat_template( method create_from_arg_string (line 131) | def create_from_arg_string( method create_from_arg_obj (line 149) | def create_from_arg_obj( method device (line 176) | def device(self): method rank (line 180) | def rank(self) -> int: method world_size (line 185) | def world_size(self) -> int: method all_gather (line 189) | def all_gather(self, tensor): method gather_object (line 196) | def gather_object(self, obj, dst=0): method barrier (line 203) | def barrier(self) -> None: method tokenizer_name (line 208) | def tokenizer_name(self) -> str: method chat_template (line 217) | def chat_template(self, chat_template: bool | str = False) -> str | None: method set_cache_hook (line 225) | def set_cache_hook(self, cache_hook: "CacheHook") -> None: function hash_args (line 230) | def hash_args(attr: str, args: Iterable[Any]) -> str: class CacheHook (line 235) | class CacheHook: method __init__ (line 236) | def __init__(self, cachinglm: Optional["CachingLM"]) -> None: method add_partial (line 243) | def add_partial(self, attr: str, req: Iterable[Any], res: Any) -> None: class CachingLM (line 250) | class CachingLM: method __init__ (line 251) | def __init__(self, lm: LM, cache_db: str) -> None: method __getattr__ (line 269) | def __getattr__(self, attr: str) -> Any: method get_cache_hook (line 327) | def get_cache_hook(self) -> "CacheHook": class TemplateLM (line 331) | class TemplateLM(LM): method eot_token_id (line 343) | def eot_token_id(self) -> int: method prefix_token_id (line 347) | def prefix_token_id(self): method tok_encode (line 352) | def tok_encode( method _loglikelihood_tokens (line 363) | def _loglikelihood_tokens( method _encode_pair (line 368) | def _encode_pair( method loglikelihood (line 408) | def loglikelihood( method loglikelihood_rolling (line 449) | def loglikelihood_rolling( method generate_until (line 455) | def generate_until(self, requests, disable_tqdm: bool = False) -> list... method chat_template (line 458) | def chat_template(self, chat_template: bool | str = False) -> str | None: FILE: lm_eval/api/registry.py function _materialise_placeholder (line 101) | def _materialise_placeholder(ph: Placeholder) -> Any: function _suggest_similar (line 125) | def _suggest_similar( function _build_key_error_msg (line 142) | def _build_key_error_msg(name: str, alias: str, keys: Iterable[str]) -> ... class Registry (line 156) | class Registry(Generic[T]): method __init__ (line 164) | def __init__( method register (line 183) | def register( method _materialise (line 261) | def _materialise(self, ph: Placeholder) -> T: method get (line 273) | def get(self, alias: str) -> T: ... method get (line 276) | def get(self, alias: str, default: D) -> T | D: ... method get (line 278) | def get(self, alias: str, default: D | Any = _MISSING) -> T | D: method __getitem__ (line 329) | def __getitem__(self, alias: str) -> T: method __contains__ (line 333) | def __contains__(self, alias: str) -> bool: method __iter__ (line 337) | def __iter__(self): method __len__ (line 341) | def __len__(self): method __repr__ (line 345) | def __repr__(self) -> str: method keys (line 352) | def keys(self): method values (line 356) | def values(self): method items (line 363) | def items(self): method origin (line 372) | def origin(self, alias: str) -> str | None: method freeze (line 391) | def freeze(self): method _clear (line 402) | def _clear(self): # pragma: no cover function freeze_all (line 426) | def freeze_all(): function register_model (line 465) | def register_model(*names): function get_model (line 491) | def get_model(model_name: str): function register_filter (line 525) | def register_filter(name: str): function get_filter (line 545) | def get_filter(filter_name: str | Callable) -> Callable: function register_metric (line 575) | def register_metric(**args): function get_metric (line 609) | def get_metric(name: str, hf_evaluate_metric: bool = False) -> Callable ... function register_aggregation (line 643) | def register_aggregation(name: str): function get_aggregation (line 660) | def get_aggregation(name: str) -> Callable[..., float] | None: function get_metric_aggregation (line 680) | def get_metric_aggregation(name: str) -> Callable[..., float] | None: function is_higher_better (line 700) | def is_higher_better(metric_name: str) -> bool | None: FILE: lm_eval/api/samplers.py class ContextSampler (line 17) | class ContextSampler: method __init__ (line 18) | def __init__( method sample (line 31) | def sample( method set_rnd (line 69) | def set_rnd(self, rnd: int | None): method replace_df (line 73) | def replace_df(self, df: Sequence[dict[str, Any]]): method fewshot_docs (line 78) | def fewshot_docs(self): method rm_eval_doc (line 88) | def rm_eval_doc(doc: _T, _iter: Iterable[_T], n=None) -> Sequence[_T]: class FirstNSampler (line 96) | class FirstNSampler(ContextSampler): method sample (line 97) | def sample(self, n: int, eval_doc=None, df=None, **kwargs): class BalancedSampler (line 108) | class BalancedSampler(ContextSampler): method sample (line 109) | def sample(self, n: int, eval_doc=None, df=None, **kwargs): class ManualSampler (line 118) | class ManualSampler(ContextSampler): method sample (line 119) | def sample(self, n: int, eval_doc=None, df=None, **kwargs): function get_sampler (line 130) | def get_sampler(name: str): FILE: lm_eval/api/task.py class Task (line 64) | class Task(abc.ABC): method __init__ (line 85) | def __init__( method download (line 125) | def download( method config (line 164) | def config(self) -> TaskConfig: method has_training_docs (line 169) | def has_training_docs(self): method has_validation_docs (line 174) | def has_validation_docs(self): method has_test_docs (line 179) | def has_test_docs(self): method training_docs (line 183) | def training_docs(self) -> Iterable: method validation_docs (line 190) | def validation_docs(self) -> Iterable: method test_docs (line 197) | def test_docs(self) -> Iterable: method fewshot_docs (line 204) | def fewshot_docs(self) -> Iterable: method _process_doc (line 221) | def _process_doc(self, doc: dict) -> dict: method instances (line 233) | def instances(self) -> list[Instance]: method fewshot_examples (line 239) | def fewshot_examples(self, k, rnd): method doc_to_decontamination_query (line 245) | def doc_to_decontamination_query(self, doc): method doc_to_text (line 251) | def doc_to_text(self, doc): method doc_to_target (line 255) | def doc_to_target(self, doc): method doc_to_image (line 259) | def doc_to_image(self, doc): method doc_to_audio (line 262) | def doc_to_audio(self, doc): method doc_to_prefix (line 265) | def doc_to_prefix(self, doc): method build_all_requests (line 268) | def build_all_requests( method construct_requests (line 382) | def construct_requests(self, doc, ctx, **kwargs): method process_results (line 403) | def process_results(self, doc, results): method aggregation (line 416) | def aggregation(self): method higher_is_better (line 425) | def higher_is_better(self): method get_config (line 433) | def get_config(self, key: str) -> Any: method count_bytes (line 437) | def count_bytes(cls, doc): method count_words (line 442) | def count_words(cls, doc): method fewshot_context (line 447) | def fewshot_context(self, doc, num_fewshot, rnd=None, description=None... method apply_filters (line 505) | def apply_filters(self) -> list[Instance] | None: method dump_config (line 514) | def dump_config(self) -> dict: method set_config (line 520) | def set_config(self, key: str, value: Any, update: bool = False) -> None: method override_metric (line 535) | def override_metric(self, metric_name: str) -> None: method set_fewshot_seed (line 560) | def set_fewshot_seed(self, seed: int | None = None) -> None: method eval_docs (line 566) | def eval_docs(self) -> datasets.Dataset | list[dict]: method doc_iterator (line 576) | def doc_iterator( method resolve_field (line 609) | def resolve_field(doc: dict[str, Any], field: str | None = None): method task_name (line 614) | def task_name(self) -> str: class ConfigurableTask (line 618) | class ConfigurableTask(Task): method __init__ (line 623) | def __init__( method download (line 855) | def download(self, dataset_kwargs: dict[str, Any] | None = None, **kwa... method has_training_docs (line 875) | def has_training_docs(self) -> bool: method has_validation_docs (line 878) | def has_validation_docs(self) -> bool: method has_test_docs (line 881) | def has_test_docs(self) -> bool: method training_docs (line 884) | def training_docs(self) -> datasets.Dataset: method validation_docs (line 892) | def validation_docs(self) -> datasets.Dataset: method test_docs (line 900) | def test_docs(self) -> datasets.Dataset: method fewshot_docs (line 906) | def fewshot_docs(self): method fewshot_context (line 933) | def fewshot_context( method build_qa_turn (line 1044) | def build_qa_turn( method multiple_input_context (line 1109) | def multiple_input_context( method apply_filters (line 1160) | def apply_filters(self) -> list[Instance] | None: method should_decontaminate (line 1169) | def should_decontaminate(self): method doc_to_decontamination_query (line 1172) | def doc_to_decontamination_query(self, doc: dict): method _process_doc (line 1189) | def _process_doc(self, doc: dict) -> dict: method doc_to_text (line 1200) | def doc_to_text(self, doc, doc_to_text=None): method doc_to_target (line 1236) | def doc_to_target(self, doc: Mapping, doc_to_target=None) -> int | str... method doc_to_choice (line 1282) | def doc_to_choice(self, doc: Any, doc_to_choice=None) -> list[str]: method doc_to_image (line 1308) | def doc_to_image(self, doc: Any, doc_to_image=None) -> int | str | lis... method doc_to_audio (line 1331) | def doc_to_audio(self, doc: Any, doc_to_audio=None) -> int | str | lis... method doc_to_prefix (line 1354) | def doc_to_prefix(self, doc): method construct_requests (line 1362) | def construct_requests( method process_results (line 1455) | def process_results(self, doc, results): method aggregation (line 1666) | def aggregation(self) -> dict: method higher_is_better (line 1669) | def higher_is_better(self) -> dict: method get_config (line 1672) | def get_config(self, key: str) -> Any: method task_name (line 1676) | def task_name(self) -> str: method __repr__ (line 1679) | def __repr__(self): class MultipleChoiceTask (line 1688) | class MultipleChoiceTask(Task): method doc_to_target (line 1691) | def doc_to_target(self, doc: dict) -> str: method construct_requests (line 1694) | def construct_requests(self, doc: dict, ctx: str, **kwargs) -> list[In... method process_results (line 1707) | def process_results(self, doc: dict, results: Iterable[tuple[float, bo... method higher_is_better (line 1722) | def higher_is_better(self) -> dict: method aggregation (line 1728) | def aggregation(self) -> dict: class PerplexityTask (line 1735) | class PerplexityTask(Task): method has_training_docs (line 1738) | def has_training_docs(self) -> bool: method fewshot_examples (line 1741) | def fewshot_examples(self, k: int, rnd) -> list: method fewshot_context (line 1748) | def fewshot_context(self, doc: dict, num_fewshot: int) -> Literal[""]: method higher_is_better (line 1756) | def higher_is_better(self) -> dict: method doc_to_decontamination_query (line 1763) | def doc_to_decontamination_query(self, doc): method doc_to_text (line 1766) | def doc_to_text(self, doc) -> str: method doc_to_target (line 1769) | def doc_to_target(self, doc): method construct_requests (line 1772) | def construct_requests(self, doc: dict, ctx: str | None, **kwargs): method process_results (line 1784) | def process_results(self, doc: dict, results: tuple[float]) -> dict: method aggregation (line 1794) | def aggregation(self) -> dict: method count_bytes (line 1802) | def count_bytes(cls, doc) -> int: method count_words (line 1806) | def count_words(cls, doc) -> int: FILE: lm_eval/api/utils.py function maybe_delimit (line 7) | def maybe_delimit(prefix: str | None, suffix: str | None, delimiter: str... function requires_delimiter (line 20) | def requires_delimiter(prefix: str, suffix: str) -> bool: function ends_with_whitespace (line 27) | def ends_with_whitespace(s: str) -> bool: class Message (line 33) | class Message: method to_dict (line 51) | def to_dict(self) -> dict[str, str]: method to_text (line 55) | def to_text(self) -> str: function messages_to_text (line 60) | def messages_to_text(messages: list[Message]) -> str: function multiturn_to_singleturn (line 65) | def multiturn_to_singleturn(messages: list[Message]) -> list[dict[str, A... function format_turn (line 86) | def format_turn(content: str, role: str, type: str | None = None) -> dic... function random_task_id (line 95) | def random_task_id(): FILE: lm_eval/caching/cache.py function load_from_cache (line 26) | def load_from_cache(file_name: str, cache: bool = False): function save_to_cache (line 41) | def save_to_cache(file_name, obj): function delete_cache (line 53) | def delete_cache(key: str = ""): FILE: lm_eval/config/evaluate_config.py class EvaluatorConfig (line 29) | class EvaluatorConfig: method from_cli (line 196) | def from_cli(cls, namespace: Namespace) -> "EvaluatorConfig": method from_config (line 231) | def from_config(cls, config_path: str | Path) -> "EvaluatorConfig": method load_yaml_config (line 241) | def load_yaml_config(config_path: str | Path) -> dict[str, Any]: method _parse_dict_args (line 261) | def _parse_dict_args(self): method _configure (line 268) | def _configure(self): method _validate_arguments (line 274) | def _validate_arguments(self): method _process_arguments (line 314) | def _process_arguments(self): method process_tasks (line 336) | def process_tasks(self, metadata: dict | None = None) -> "TaskManager": method _set_trust_remote_code (line 414) | def _set_trust_remote_code(self): FILE: lm_eval/config/group.py class AggMetricConfig (line 7) | class AggMetricConfig: method __post_init__ (line 34) | def __post_init__(self): class GroupConfig (line 47) | class GroupConfig: method __post_init__ (line 93) | def __post_init__(self): method to_dict (line 104) | def to_dict(self, keep_callable: bool = False) -> dict[str, str]: method serialize_function (line 113) | def serialize_function( FILE: lm_eval/config/task.py class FewshotConfig (line 21) | class FewshotConfig: method __post_init__ (line 43) | def __post_init__(self): method from_dict (line 50) | def from_dict( class TaskConfig (line 82) | class TaskConfig(dict): method __post_init__ (line 130) | def __post_init__(self) -> None: method __getitem__ (line 170) | def __getitem__(self, item): method __setitem__ (line 173) | def __setitem__(self, item, value): method to_dict (line 176) | def to_dict(self, keep_callable: bool = False) -> dict: method serialize_function (line 204) | def serialize_function( FILE: lm_eval/decontamination/archiver.py function json_serial (line 14) | def json_serial(obj: Any) -> str: class Archive (line 23) | class Archive: method __init__ (line 24) | def __init__(self, file_path: str, compression_level: int = 3) -> None: method add_data (line 33) | def add_data(self, data, meta=None) -> None: method commit (line 43) | def commit(self) -> None: class Reader (line 50) | class Reader: method __init__ (line 51) | def __init__(self) -> None: method read (line 54) | def read( class TextArchive (line 84) | class TextArchive: method __init__ (line 85) | def __init__(self, file_path, mode: str = "rb+") -> None: method add_data (line 96) | def add_data(self, data) -> None: method commit (line 99) | def commit(self) -> None: class TextReader (line 104) | class TextReader: method __init__ (line 105) | def __init__(self, file_path) -> None: method read_tqdm (line 110) | def read_tqdm(self, update_frequency: int = 10000): method read_and_tell (line 134) | def read_and_tell(self): method read (line 145) | def read(self): method read_slow (line 152) | def read_slow(self): class ZStdTextReader (line 164) | class ZStdTextReader: method __init__ (line 165) | def __init__(self, file) -> None: method read_tqdm (line 168) | def read_tqdm(self): FILE: lm_eval/decontamination/decontaminate.py function get_train_overlap_stub (line 14) | def get_train_overlap_stub(docs: dict, ngrams_path: str, ngrams_n_size: ... function get_train_overlap (line 37) | def get_train_overlap(docs_by_task_set: dict, ngrams_path: str, limit: i... FILE: lm_eval/decontamination/janitor.py function form_ngrams (line 25) | def form_ngrams(sequence: Iterator[T], n: int) -> Iterator[Tuple[T, ...]]: function word_ngrams (line 42) | def word_ngrams(s: str, n: int) -> Iterator[str]: function split_indices (line 74) | def split_indices(s: str) -> Iterator[Tuple[str, Tuple[int, int]]]: function word_ngrams_indices (line 81) | def word_ngrams_indices(s: str, n: int) -> Iterator[Tuple[str, Tuple[int... class Janitor (line 109) | class Janitor: method __init__ (line 111) | def __init__( method save_contamination_ngrams (line 140) | def save_contamination_ngrams(self, filename: str) -> None: method load_contamination_ngrams (line 144) | def load_contamination_ngrams(self, filename: str) -> None: method register_contaminant (line 152) | def register_contaminant(self, dirt_string: str) -> None: method clean (line 161) | def clean(self, dirty_string: str) -> List[str]: method _split_chunks (line 171) | def _split_chunks( method register_contaminant_cpp (line 196) | def register_contaminant_cpp(self, dirt_string) -> None: method clean_cpp (line 201) | def clean_cpp(self, dirty_string: str) -> List[str]: method normalize_string (line 211) | def normalize_string(self, s: str) -> str: method register_contaminant_python (line 214) | def register_contaminant_python(self, dirt_string: str) -> None: method clean_python (line 219) | def clean_python(self, dirty_string: str) -> List[str]: FILE: lm_eval/defaults.py function _strtobool (line 13) | def _strtobool(val: str) -> bool: function _envbool (line 25) | def _envbool(var: str, default: bool = False) -> bool: function default_gen_kwargs (line 38) | def default_gen_kwargs( FILE: lm_eval/evaluator.py function simple_evaluate (line 54) | def simple_evaluate( function evaluate (line 414) | def evaluate( FILE: lm_eval/evaluator_utils.py class ResultAcc (line 29) | class ResultAcc(TypedDict): function print_writeout (line 37) | def print_writeout(task: Task) -> None: function get_sample_size (line 49) | def get_sample_size(task, limit: int | float | None) -> int | None: function find_test_root (line 58) | def find_test_root(start_path: pathlib.Path) -> pathlib.Path: function run_task_tests (line 76) | def run_task_tests(task_list: list[str]): class EvalAcc (line 99) | class EvalAcc: method collect (line 120) | def collect(self) -> tuple[dict[str, _TaskMetrics], dict[str, _TaskMet... method _to_eval_results (line 134) | def _to_eval_results( function _compute_task_aggregations (line 173) | def _compute_task_aggregations( function _collect_results (line 222) | def _collect_results( function aggregate_groups (line 275) | def aggregate_groups( function _get_root_groups (line 302) | def _get_root_groups(groups: dict[str, Group]) -> list[Group]: function _collect_groups_bottom_up (line 319) | def _collect_groups_bottom_up(groups: dict[str, Group]) -> list[Group]: function _process_results (line 349) | def _process_results( function _propagate_num_fewshot (line 395) | def _propagate_num_fewshot( function _propagate_higher_is_better (line 404) | def _propagate_higher_is_better( function _log_selected_tasks (line 423) | def _log_selected_tasks( function _handle_back_comp (line 483) | def _handle_back_comp( FILE: lm_eval/filters/__init__.py function build_filter_ensemble (line 11) | def build_filter_ensemble( FILE: lm_eval/filters/custom.py class CustomFilter (line 6) | class CustomFilter(Filter): method __init__ (line 11) | def __init__(self, **kwargs) -> None: method apply (line 16) | def apply(self, resps, docs): FILE: lm_eval/filters/decontamination.py class DecontaminationFilter (line 6) | class DecontaminationFilter(Filter): method __init__ (line 13) | def __init__(self, path) -> None: method apply (line 21) | def apply(self, resps, docs) -> None: FILE: lm_eval/filters/extraction.py class RegexFilter (line 10) | class RegexFilter(Filter): method __init__ (line 18) | def __init__( method apply (line 33) | def apply(self, resps: list[list[str]], docs: list[dict]) -> list[list... class POSFilter (line 63) | class POSFilter(Filter): method __init__ (line 66) | def __init__( method apply (line 83) | def apply(self, resps, docs): class WhitespaceFilter (line 109) | class WhitespaceFilter(Filter): method apply (line 112) | def apply(self, resps: list[list[str]], docs: list[dict]) -> list[list... class MultiChoiceRegexFilter (line 126) | class MultiChoiceRegexFilter(RegexFilter): method __init__ (line 134) | def __init__( method apply (line 157) | def apply(self, resps: list[list[str]], docs: list[dict]) -> list[list... FILE: lm_eval/filters/selection.py class TakeFirstFilter (line 13) | class TakeFirstFilter(Filter): method __init__ (line 14) | def __init__(self) -> None: method apply (line 19) | def apply(self, resps, docs): class TakeKFilter (line 27) | class TakeKFilter(Filter): method __init__ (line 28) | def __init__(self, **kwargs) -> None: method apply (line 33) | def apply(self, resps, docs): class MajorityVoteFilter (line 44) | class MajorityVoteFilter(Filter): method __init__ (line 45) | def __init__(self) -> None: method apply (line 50) | def apply(self, resps, docs): FILE: lm_eval/filters/transformation.py class LowercaseFilter (line 8) | class LowercaseFilter(Filter): method __init__ (line 9) | def __init__(self) -> None: method apply (line 12) | def apply(self, resps, docs): class UppercaseFilter (line 20) | class UppercaseFilter(Filter): method __init__ (line 21) | def __init__(self) -> None: method apply (line 24) | def apply(self, resps, docs): class MapFilter (line 32) | class MapFilter(Filter): method __init__ (line 33) | def __init__(self, mapping_dict: dict = None, default_value=None) -> N... method apply (line 54) | def apply(self, resps, docs): class SPANFilter (line 62) | class SPANFilter(Filter): method __init__ (line 63) | def __init__(self) -> None: method apply (line 66) | def apply(self, resps, docs): FILE: lm_eval/loggers/evaluation_tracker.py class GeneralConfigTracker (line 38) | class GeneralConfigTracker: method __init__ (line 70) | def __init__(self) -> None: method _get_model_name (line 75) | def _get_model_name(model_args: str | dict[str, Any] | None) -> str | ... method log_experiment_args (line 95) | def log_experiment_args( method log_end_time (line 117) | def log_end_time(self) -> None: class EvaluationTracker (line 123) | class EvaluationTracker: method __init__ (line 130) | def __init__( method _api (line 222) | def _api(token: str | None = None) -> "HfApi | None": method save_results_aggregated (line 230) | def save_results_aggregated( method save_results_samples (line 320) | def save_results_samples( method recreate_metadata_card (line 424) | def recreate_metadata_card(self) -> None: FILE: lm_eval/loggers/utils.py function remove_none_pattern (line 15) | def remove_none_pattern(input_string: str) -> tuple[str, bool]: function _handle_non_serializable (line 37) | def _handle_non_serializable(o: Any) -> int | str | list: function get_commit_from_path (line 56) | def get_commit_from_path(repo_path: Path | str) -> str | None: function get_git_commit_hash (line 83) | def get_git_commit_hash(): function add_env_info (line 97) | def add_env_info(storage: dict[str, Any]): function add_tokenizer_info (line 131) | def add_tokenizer_info(storage: dict[str, Any], lm): FILE: lm_eval/loggers/wandb_logger.py function get_wandb_printer (line 16) | def get_wandb_printer() -> Literal["Printer"]: class WandbLogger (line 24) | class WandbLogger: method __init__ (line 25) | def __init__(self, init_args=None, config_args=None) -> None: method post_init (line 66) | def post_init(self, results: Dict[str, Any]) -> None: method _get_config (line 71) | def _get_config(self) -> Dict[str, Any]: method _sanitize_results_dict (line 82) | def _sanitize_results_dict(self) -> Tuple[Dict[str, str], Dict[str, An... method _log_results_as_table (line 118) | def _log_results_as_table(self) -> None: method _log_results_as_artifact (line 168) | def _log_results_as_artifact(self) -> None: method log_eval_result (line 180) | def log_eval_result(self) -> None: method _generate_dataset (line 196) | def _generate_dataset( method _log_samples_as_artifact (line 287) | def _log_samples_as_artifact( method log_eval_samples (line 307) | def log_eval_samples(self, samples: Dict[str, List[Dict[str, Any]]]) -... FILE: lm_eval/models/__init__.py function _register_all_models (line 60) | def _register_all_models(): FILE: lm_eval/models/anthropic_llms.py function anthropic_completion (line 17) | def anthropic_completion( function anthropic_chat (line 80) | def anthropic_chat( class AnthropicLM (line 145) | class AnthropicLM(LM): method __init__ (line 148) | def __init__( method eot_token_id (line 186) | def eot_token_id(self): method max_length (line 191) | def max_length(self) -> int: method max_gen_toks (line 195) | def max_gen_toks(self) -> int: method batch_size (line 199) | def batch_size(self): method device (line 204) | def device(self): method tok_encode (line 208) | def tok_encode(self, string: str) -> List[int]: method tok_decode (line 211) | def tok_decode(self, tokens: List[int]) -> str: method _loglikelihood_tokens (line 214) | def _loglikelihood_tokens(self, requests, disable_tqdm: bool = False): method generate_until (line 217) | def generate_until(self, requests, disable_tqdm: bool = False) -> List... method _model_call (line 261) | def _model_call(self, inps): method _model_generate (line 265) | def _model_generate(self, context, max_length, eos_token_id): method loglikelihood (line 269) | def loglikelihood(self, requests, disable_tqdm: bool = False): method loglikelihood_rolling (line 272) | def loglikelihood_rolling(self, requests, disable_tqdm: bool = False): class AnthropicChat (line 277) | class AnthropicChat(LocalCompletionsAPI): method __init__ (line 278) | def __init__( method api_key (line 297) | def api_key(self): method header (line 307) | def header(self): method _create_payload (line 313) | def _create_payload( method parse_generations (line 359) | def parse_generations( method tok_encode (line 370) | def tok_encode( method loglikelihood (line 379) | def loglikelihood(self, requests, **kwargs): FILE: lm_eval/models/api_models.py class JsonChatStr (line 54) | class JsonChatStr(NamedTuple): method encode (line 57) | def encode(self, encoding): function create_image_prompt (line 61) | def create_image_prompt( class TemplateAPI (line 104) | class TemplateAPI(TemplateLM): method __init__ (line 107) | def __init__( method _create_payload (line 252) | def _create_payload( method create_message (line 265) | def create_message( method parse_logprobs (line 297) | def parse_logprobs( method parse_generations (line 308) | def parse_generations(outputs: Union[Any, List[Any]], **kwargs) -> Lis... method api_key (line 313) | def api_key(self) -> str: method header (line 318) | def header(self) -> dict: method tokenizer_name (line 323) | def tokenizer_name(self) -> str: method apply_chat_template (line 330) | def apply_chat_template( method eot_token_id (line 353) | def eot_token_id(self) -> Optional[int]: method eos_string (line 365) | def eos_string(self) -> Optional[str]: method prefix_token_id (line 382) | def prefix_token_id(self) -> Optional[int]: method tok_encode (line 397) | def tok_encode( method decode_batch (line 446) | def decode_batch(self, tokens: List[List[int]]) -> List[str]: method model_call (line 454) | def model_call( method amodel_call (line 490) | async def amodel_call( method batch_loglikelihood_requests (line 552) | def batch_loglikelihood_requests( method get_batched_requests (line 575) | async def get_batched_requests( method _loglikelihood_tokens (line 620) | def _loglikelihood_tokens(self, requests, **kwargs) -> List[Tuple[floa... method generate_until (line 683) | def generate_until( method loglikelihood_rolling (line 832) | def loglikelihood_rolling( FILE: lm_eval/models/dummy.py class DummyLM (line 11) | class DummyLM(LM): method __init__ (line 14) | def __init__(self, *args, write_out: bool = False, **kwargs) -> None: method create_from_arg_string (line 19) | def create_from_arg_string(cls, arg_string, additional_config=None): method loglikelihood (line 22) | def loglikelihood(self, requests, disable_tqdm: bool = False): method generate_until (line 33) | def generate_until(self, requests, disable_tqdm: bool = False): method loglikelihood_rolling (line 45) | def loglikelihood_rolling(self, requests, disable_tqdm: bool = False): method tokenizer (line 54) | def tokenizer(self): method apply_chat_template (line 59) | def apply_chat_template( FILE: lm_eval/models/gguf.py function get_result (line 15) | def get_result(logprobs, context_length): class GGUFLM (line 37) | class GGUFLM(LM): method __init__ (line 38) | def __init__(self, base_url=None, max_length=2048, **kwargs): method gguf_completion (line 46) | def gguf_completion( method loglikelihood (line 75) | def loglikelihood(self, requests, disable_tqdm: bool = False): method generate_until (line 104) | def generate_until(self, requests, disable_tqdm: bool = False): method loglikelihood_rolling (line 129) | def loglikelihood_rolling(self, requests, disable_tqdm: bool = False): FILE: lm_eval/models/hf_audiolm.py class HFAUDIOLMQWEN (line 22) | class HFAUDIOLMQWEN(HFLM): method __init__ (line 30) | def __init__( method _create_tokenizer (line 42) | def _create_tokenizer( method apply_chat_template (line 85) | def apply_chat_template( method _model_multimodal_generate (line 98) | def _model_multimodal_generate(self, inputs, max_length, stop, **gener... method tok_batch_multimodal_encode (line 124) | def tok_batch_multimodal_encode( method generate_until (line 165) | def generate_until( method loglikelihood_rolling (line 290) | def loglikelihood_rolling(self, requests: list[Instance]) -> list[float]: method loglikelihood (line 296) | def loglikelihood( FILE: lm_eval/models/hf_steered.py function steer (line 23) | def steer( class SteeredModel (line 67) | class SteeredModel(HFLM): method __init__ (line 70) | def __init__( method derive_steer_config (line 147) | def derive_steer_config(cls, steer_path: str): method add (line 210) | def add( method clamp (line 231) | def clamp( method forward (line 270) | def forward(self, *args, **kwargs): method _model_call (line 274) | def _model_call(self, *args, **kwargs): method _model_generate (line 278) | def _model_generate(self, *args, **kwargs): FILE: lm_eval/models/hf_vlms.py class HFMultimodalLM (line 30) | class HFMultimodalLM(HFLM): method __init__ (line 38) | def __init__( method _create_tokenizer (line 112) | def _create_tokenizer( method tok_multimodal_encode (line 158) | def tok_multimodal_encode( method _encode_multimodal_pair (line 188) | def _encode_multimodal_pair(self, context, continuation, images): method apply_chat_template (line 218) | def apply_chat_template( method chat_template (line 275) | def chat_template(self, chat_template: bool | str = False) -> str | None: method tok_batch_multimodal_encode (line 287) | def tok_batch_multimodal_encode( method _model_multimodal_call (line 342) | def _model_multimodal_call(self, inps, imgs, attn_mask=None, labels=No... method _model_multimodal_generate (line 350) | def _model_multimodal_generate(self, inputs, max_length, stop, **gener... method _batch_images (line 376) | def _batch_images(self, image_encs): method loglikelihood_rolling (line 394) | def loglikelihood_rolling(self, requests: list[Instance]) -> list[float]: method loglikelihood (line 403) | def loglikelihood( method _multimodal_loglikelihood_tokens (line 439) | def _multimodal_loglikelihood_tokens( method generate_until (line 625) | def generate_until( FILE: lm_eval/models/huggingface.py class HFLM (line 60) | class HFLM(TemplateLM): method __init__ (line 70) | def __init__( method _get_accelerate_args (line 442) | def _get_accelerate_args( method config (line 529) | def config(self): method model (line 534) | def model(self): method eot_token_id (line 542) | def eot_token_id(self) -> int: method prefix_token_id (line 547) | def prefix_token_id(self) -> int: method max_length (line 556) | def max_length(self) -> int: method max_gen_toks (line 570) | def max_gen_toks(self) -> int: method batch_size (line 574) | def batch_size(self): method device (line 578) | def device(self): method rank (line 582) | def rank(self): method world_size (line 586) | def world_size(self): method all_gather (line 589) | def all_gather(self, tensor): method gather_object (line 594) | def gather_object(self, obj, dst=0): method barrier (line 601) | def barrier(self): method tokenizer_name (line 606) | def tokenizer_name(self) -> str: method _get_backend (line 609) | def _get_backend( method _get_config (line 669) | def _get_config( method _create_model (line 687) | def _create_model( method _create_tokenizer (line 856) | def _create_tokenizer( method _detect_batch_size (line 917) | def _detect_batch_size(self, requests: Sequence | None = None, pos: in... method tok_encode (line 976) | def tok_encode( method tok_batch_encode (line 1001) | def tok_batch_encode( method tok_decode (line 1044) | def tok_decode(self, tokens: Iterator[list[str]], skip_special_tokens:... method _model_call (line 1047) | def _model_call( method _model_generate (line 1089) | def _model_generate( method _select_cont_toks (line 1127) | def _select_cont_toks( method loglikelihood_rolling (line 1150) | def loglikelihood_rolling( method _batch_scheduler (line 1236) | def _batch_scheduler(self, pos, n_reordered_requests): method _loglikelihood_tokens (line 1253) | def _loglikelihood_tokens( method generate_until (line 1490) | def generate_until( method apply_chat_template (line 1634) | def apply_chat_template( method get_model_info (line 1661) | def get_model_info(self) -> dict: FILE: lm_eval/models/ibm_watsonx_ai.py class LogLikelihoodResult (line 21) | class LogLikelihoodResult(NamedTuple): function _verify_credentials (line 26) | def _verify_credentials(creds: dict) -> None: function get_watsonx_credentials (line 73) | def get_watsonx_credentials() -> dict[str, str | None]: class WatsonxLLM (line 120) | class WatsonxLLM(LM): method create_from_arg_string (line 127) | def create_from_arg_string( method __init__ (line 191) | def __init__( method _has_stop_token (line 228) | def _has_stop_token(response_tokens: list[str], context_tokens: list[s... method _check_model_logprobs_support (line 257) | def _check_model_logprobs_support(self): method _get_log_likelihood (line 278) | def _get_log_likelihood( method generate_until (line 312) | def generate_until(self, requests: list[Instance]) -> list[str]: method loglikelihood (line 349) | def loglikelihood(self, requests: list[Instance]) -> list[tuple[float,... method loglikelihood_rolling (line 416) | def loglikelihood_rolling(self, requests) -> list[float]: method tokenizer_name (line 470) | def tokenizer_name(self) -> str: method apply_chat_template (line 473) | def apply_chat_template( FILE: lm_eval/models/mamba_lm.py class MambaLMWrapper (line 10) | class MambaLMWrapper(HFLM): method __init__ (line 11) | def __init__( method _get_config (line 66) | def _get_config( method _create_model (line 84) | def _create_model( method _model_generate (line 114) | def _model_generate(self, context, max_length, stop, **generation_kwar... FILE: lm_eval/models/megatron_lm.py function _add_megatron_to_path (line 74) | def _add_megatron_to_path(): function _check_dist_ckpt (line 93) | def _check_dist_ckpt(load_path: str) -> bool: function _parse_extra_args (line 105) | def _parse_extra_args(extra_args: str | None) -> list[str]: class MegatronLMEval (line 130) | class MegatronLMEval(LM): method __init__ (line 154) | def __init__( method _validate_parallelism_config (line 247) | def _validate_parallelism_config(self, devices: int, tp: int, pp: int,... method _initialize_megatron (line 309) | def _initialize_megatron(self, **kwargs): method eot_token_id (line 595) | def eot_token_id(self) -> int: method prefix_token_id (line 606) | def prefix_token_id(self) -> int: method max_length (line 620) | def max_length(self) -> int: method max_gen_toks (line 624) | def max_gen_toks(self) -> int: method batch_size (line 628) | def batch_size(self) -> int: method device (line 632) | def device(self) -> torch.device: method rank (line 636) | def rank(self) -> int: method world_size (line 640) | def world_size(self) -> int: method accelerator (line 644) | def accelerator(self): method all_gather (line 648) | def all_gather(self, tensor: torch.Tensor) -> torch.Tensor: method gather_object (line 652) | def gather_object(self, obj, dst: int = 0): method barrier (line 661) | def barrier(self) -> None: class _Accelerator (line 665) | class _Accelerator: method __init__ (line 672) | def __init__(self, world_size, device): method wait_for_everyone (line 676) | def wait_for_everyone(self): method gather (line 681) | def gather(self, local_tensor): method gather_object (line 705) | def gather_object(self, local_obj): method tok_encode (line 714) | def tok_encode(self, string: str, add_special_tokens: bool = False) ->... method tok_decode (line 721) | def tok_decode(self, tokens: list[int]) -> str: method _encode_pair (line 728) | def _encode_pair( method _model_forward (line 744) | def _model_forward( method _distribute_requests (line 823) | def _distribute_requests(self, requests: list) -> tuple[list, list[int]]: method _gather_results (line 840) | def _gather_results(self, local_results: list, sizes: list[int]) -> list: method loglikelihood (line 860) | def loglikelihood(self, requests: list[Instance]) -> list[tuple[float,... method _loglikelihood_tokens (line 891) | def _loglikelihood_tokens( method loglikelihood_rolling (line 1010) | def loglikelihood_rolling( method generate_until (line 1055) | def generate_until( FILE: lm_eval/models/mistral3.py class Mistral3LM (line 33) | class Mistral3LM(HFLM): method __init__ (line 44) | def __init__(self, **kwargs): method _get_backend (line 59) | def _get_backend( method _model_call (line 74) | def _model_call( method max_length (line 99) | def max_length(self) -> int: FILE: lm_eval/models/nemo_lm.py function _patch_pretrained_cfg (line 42) | def _patch_pretrained_cfg( function _get_target_from_class (line 72) | def _get_target_from_class(target_class) -> str: function load_model (line 76) | def load_model( function setup_distributed_environment (line 145) | def setup_distributed_environment(trainer): class NeMoLM (line 168) | class NeMoLM(LM): method __init__ (line 169) | def __init__( method create_from_arg_string (line 275) | def create_from_arg_string(cls, arg_string, additional_config=None): method eot_token_id (line 283) | def eot_token_id(self): method max_length (line 290) | def max_length(self): method max_gen_toks (line 294) | def max_gen_toks(self): method batch_size (line 298) | def batch_size(self): method device (line 302) | def device(self): method rank (line 306) | def rank(self): method world_size (line 310) | def world_size(self): method all_gather (line 313) | def all_gather(self, tensor): method gather_object (line 320) | def gather_object(self, obj, dst=0): method barrier (line 327) | def barrier(self): method tok_encode (line 331) | def tok_encode(self, string: str): method tok_decode (line 334) | def tok_decode(self, tokens): method _encode_pair (line 337) | def _encode_pair(self, context, continuation): method loglikelihood (line 348) | def loglikelihood(self, requests): method loglikelihood_rolling (line 364) | def loglikelihood_rolling( method _loglikelihood_tokens (line 398) | def _loglikelihood_tokens(self, requests, disable_tqdm=False): method generate_until (line 491) | def generate_until(self, requests): FILE: lm_eval/models/neuron_optimum.py class CustomNeuronModelForCausalLM (line 37) | class CustomNeuronModelForCausalLM(NeuronModelForCausalLM): method generate (line 40) | def generate( class NEURON_HF (line 126) | class NEURON_HF(TemplateLM): method __init__ (line 133) | def __init__( method config (line 248) | def config(self): method eot_token_id (line 253) | def eot_token_id(self): method prefix_token_id (line 258) | def prefix_token_id(self): method max_length (line 263) | def max_length(self): method max_gen_toks (line 267) | def max_gen_toks(self) -> int: method batch_size (line 271) | def batch_size(self): method device (line 275) | def device(self): method rank (line 280) | def rank(self): method world_size (line 284) | def world_size(self): method tok_encode (line 287) | def tok_encode(self, string: str, left_truncate_len=None, add_special_... method tok_batch_encode (line 300) | def tok_batch_encode( method tok_decode (line 329) | def tok_decode(self, tokens): method _model_generate (line 332) | def _model_generate(self, context, max_length, stop, **generation_kwar... method _select_cont_toks (line 356) | def _select_cont_toks(self, logits, contlen=None, inplen=None): method loglikelihood_rolling (line 366) | def loglikelihood_rolling(self, requests, disable_tqdm: bool = False): method _loglikelihood_tokens (line 419) | def _loglikelihood_tokens( method generate_until (line 568) | def generate_until(self, requests, disable_tqdm: bool = False): FILE: lm_eval/models/openai_completions.py class LocalCompletionsAPI (line 16) | class LocalCompletionsAPI(TemplateAPI): method __init__ (line 17) | def __init__( method _create_payload (line 61) | def _create_payload( method parse_logprobs (line 99) | def parse_logprobs( method parse_generations (line 125) | def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> L... method api_key (line 137) | def api_key(self): class LocalChatCompletion (line 142) | class LocalChatCompletion(LocalCompletionsAPI): method __init__ (line 150) | def __init__( method _create_payload (line 175) | def _create_payload( method parse_generations (line 211) | def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> L... method tok_encode (line 229) | def tok_encode( method loglikelihood (line 238) | def loglikelihood(self, requests, **kwargs): class OpenAICompletionsAPI (line 247) | class OpenAICompletionsAPI(LocalCompletionsAPI): method __init__ (line 248) | def __init__( method api_key (line 259) | def api_key(self): method loglikelihood (line 268) | def loglikelihood(self, requests, **kwargs): method chat_template (line 277) | def chat_template(self, chat_template: Union[bool, str] = False) -> Op... class OpenAIChatCompletion (line 282) | class OpenAIChatCompletion(LocalChatCompletion): method __init__ (line 283) | def __init__( method api_key (line 303) | def api_key(self): method loglikelihood (line 312) | def loglikelihood(self, requests, **kwargs): method _create_payload (line 317) | def _create_payload( class AzureOpenaiChatCompletionsLM (line 359) | class AzureOpenaiChatCompletionsLM(OpenAIChatCompletion): method __init__ (line 360) | def __init__( method api_key (line 384) | def api_key(self): FILE: lm_eval/models/optimum_habana.py class HabanaLM (line 18) | class HabanaLM(HFLM): method __init__ (line 30) | def __init__(self, **kwargs) -> None: method max_length (line 52) | def max_length(self) -> int: method max_length (line 57) | def max_length(self, value: int) -> None: method find_bucket (line 60) | def find_bucket(self, length: int, key=lambda b, length: b >= length) ... method _model_call (line 75) | def _model_call(self, inps: torch.Tensor) -> torch.Tensor: method setup_generation_config_gaudi (line 97) | def setup_generation_config_gaudi(self, **kwargs): method _create_model (line 108) | def _create_model(self, *args, **kwargs) -> None: method generate_until (line 125) | def generate_until( method _model_generate (line 137) | def _model_generate( FILE: lm_eval/models/optimum_ipex.py class IPEXLM (line 13) | class IPEXLM(HFLM): method __init__ (line 18) | def __init__( method _create_model (line 33) | def _create_model( FILE: lm_eval/models/optimum_lm.py class OptimumLM (line 14) | class OptimumLM(HFLM): method __init__ (line 25) | def __init__( method _create_model (line 43) | def _create_model( FILE: lm_eval/models/sglang_causallms.py class SGLangLM (line 34) | class SGLangLM(TemplateLM): method __init__ (line 37) | def __init__( method loglikelihood_rolling (line 124) | def loglikelihood_rolling( method generate_until (line 193) | def generate_until( method _model_generate (line 288) | def _model_generate( method eot_token_id (line 319) | def eot_token_id(self): method prefix_token_id (line 324) | def prefix_token_id(self): method max_length (line 333) | def max_length(self): method max_gen_toks (line 343) | def max_gen_toks(self): method tok_encode (line 347) | def tok_encode( method tok_decode (line 372) | def tok_decode(self, tokens: List[int]) -> str: method tokenizer_name (line 377) | def tokenizer_name(self) -> str: method chat_template (line 387) | def chat_template(self, chat_template: Union[bool, str] = False) -> str: method apply_chat_template (line 408) | def apply_chat_template( method _loglikelihood_tokens (line 423) | def _loglikelihood_tokens( method _parse_logprobs (line 483) | def _parse_logprobs(tokens: List, outputs, ctxlen: int) -> Tuple[float... method modify_gen_kwargs (line 519) | def modify_gen_kwargs(kwargs: dict) -> dict: FILE: lm_eval/models/sglang_generate_API.py class SGLANGGENERATEAPI (line 9) | class SGLANGGENERATEAPI(LocalCompletionsAPI): method __init__ (line 10) | def __init__( method _create_payload (line 20) | def _create_payload( method parse_logprobs (line 66) | def parse_logprobs( method parse_generations (line 90) | def parse_generations(outputs: Union[Dict, List[Dict]], **kwargs) -> L... method api_key (line 99) | def api_key(self): FILE: lm_eval/models/textsynth.py function textsynth_completion (line 29) | def textsynth_completion(**kwargs): class TextSynthLM (line 51) | class TextSynthLM(LM): method __init__ (line 52) | def __init__(self, engine, truncate: bool = False, **kwargs) -> None: method eot_token_id (line 68) | def eot_token_id(self): method max_length (line 73) | def max_length(self) -> int: method max_gen_toks (line 78) | def max_gen_toks(self) -> int: method batch_size (line 82) | def batch_size(self): method device (line 87) | def device(self): method tok_encode (line 91) | def tok_encode(self, string: str): method tok_decode (line 95) | def tok_decode(self, tokens): method loglikelihood (line 99) | def loglikelihood(self, requests, disable_tqdm: bool = False): method loglikelihood_rolling (line 123) | def loglikelihood_rolling(self, requests, disable_tqdm: bool = False): method generate_until (line 133) | def generate_until(self, requests, disable_tqdm: bool = False): method _model_call (line 166) | def _model_call(self, inps): method _model_generate (line 170) | def _model_generate(self, context, max_length, eos_token_id): FILE: lm_eval/models/utils.py class GenKwargs (line 33) | class GenKwargs(TypedDict, total=False): function chunks (line 42) | def chunks(iter, n: int = 0, fn=None): class MultiChoice (line 80) | class MultiChoice: method __init__ (line 81) | def __init__(self, choices) -> None: method __contains__ (line 85) | def __contains__(self, values) -> bool: method __iter__ (line 94) | def __iter__(self) -> Iterator: class Grouper (line 98) | class Grouper: method __init__ (line 105) | def __init__(self, arr, fn) -> None: method get_grouped (line 123) | def get_grouped(self): method get_original (line 134) | def get_original(self, grouped_dict): function undistribute (line 156) | def undistribute(iterable): function retry_on_specific_exceptions (line 196) | def retry_on_specific_exceptions( class Collator (line 236) | class Collator: method __init__ (line 249) | def __init__( method _group_by_index (line 270) | def _group_by_index(self) -> None: method _group_by_context (line 276) | def _group_by_context(self) -> None: method get_batched (line 282) | def get_batched( method get_cache (line 329) | def get_cache( method _reorder (line 390) | def _reorder(self, arr: list | tuple[tuple[int, Any], ...]) -> Iterator: method get_original (line 406) | def get_original(self, newarr: list) -> list: method __len__ (line 427) | def __len__(self): method group (line 431) | def group( method get_chunks (line 474) | def get_chunks( function configure_pad_token (line 515) | def configure_pad_token( function replace_placeholders (line 560) | def replace_placeholders( function flatten_image_list (line 594) | def flatten_image_list(images: list[list]): function handle_stop_sequences (line 605) | def handle_stop_sequences(until: str | list[str] | None, eos: str | None... function normalize_gen_kwargs (line 621) | def normalize_gen_kwargs( function resize_image (line 717) | def resize_image( function truncate_tokens (line 817) | def truncate_tokens( function maybe_truncate (line 836) | def maybe_truncate( function postprocess_generated_text (line 910) | def postprocess_generated_text( function has_bos_prefix (line 939) | def has_bos_prefix(sequence: str, bos_str: str | Iterable[str] | None = ... function _add_special_kwargs (line 948) | def _add_special_kwargs(add_special_tokens: bool | None, add_bos: bool |... FILE: lm_eval/models/utils_hf.py function pad_and_concat (line 8) | def pad_and_concat( function clear_torch_cache (line 59) | def clear_torch_cache() -> None: function get_dtype (line 64) | def get_dtype(dtype: str | torch.dtype) -> torch.dtype | str: class MultiTokenEOSCriteria (line 74) | class MultiTokenEOSCriteria(transformers.StoppingCriteria): method __init__ (line 77) | def __init__( method __call__ (line 100) | def __call__(self, input_ids, scores, **kwargs) -> bool: function stop_sequences_criteria (line 114) | def stop_sequences_criteria( FILE: lm_eval/models/vllm_causallms.py function _vllm_mp_worker (line 68) | def _vllm_mp_worker( class VLLM (line 126) | class VLLM(TemplateLM): method __init__ (line 130) | def __init__( method eot_token_id (line 288) | def eot_token_id(self): method prefix_token_id (line 293) | def prefix_token_id(self): method max_length (line 302) | def max_length(self) -> int: method max_gen_toks (line 319) | def max_gen_toks(self): method apply_chat_template (line 322) | def apply_chat_template( method tokenizer_name (line 355) | def tokenizer_name(self) -> str: method tok_encode (line 359) | def tok_encode( method tok_encode (line 363) | def tok_encode( method tok_encode (line 367) | def tok_encode( method _model_generate (line 428) | def _model_generate( method loglikelihood_rolling (line 558) | def loglikelihood_rolling( method generate_until (line 627) | def generate_until( method _loglikelihood_tokens (line 725) | def _loglikelihood_tokens( method _parse_logprobs (line 787) | def _parse_logprobs(tokens: list, outputs, ctxlen: int) -> tuple[float... method modify_gen_kwargs (line 850) | def modify_gen_kwargs( FILE: lm_eval/models/vllm_vlms.py class VLLM_VLM (line 33) | class VLLM_VLM(VLLM): method __init__ (line 36) | def __init__( method tok_batch_multimodal_encode (line 76) | def tok_batch_multimodal_encode( method _multimodal_model_generate (line 102) | def _multimodal_model_generate( method apply_chat_template (line 157) | def apply_chat_template( method generate_until (line 214) | def generate_until( method loglikelihood_rolling (line 309) | def loglikelihood_rolling( FILE: lm_eval/models/winml.py class WindowsML (line 32) | class WindowsML(TemplateLM): method create_from_arg_obj (line 43) | def create_from_arg_obj( method __init__ (line 67) | def __init__( method _validate_dependencies (line 120) | def _validate_dependencies(self) -> None: method _fix_winrt_runtime (line 150) | def _fix_winrt_runtime(self): method _register_winml_providers_to_genai (line 164) | def _register_winml_providers_to_genai(self) -> bool: method _setup_winml_devices_and_providers (line 199) | def _setup_winml_devices_and_providers(self) -> None: method _load_and_compile_model (line 238) | def _load_and_compile_model(self, model_path: str) -> None: method eot_token_id (line 283) | def eot_token_id(self) -> int: method prefix_token_id (line 310) | def prefix_token_id(self) -> int | None: method max_gen_toks (line 340) | def max_gen_toks(self) -> int: method tok_encode (line 349) | def tok_encode( method tok_decode (line 375) | def tok_decode(self, tokens: list[int]) -> str: method _run_genai_inference_for_full_logits (line 387) | def _run_genai_inference_for_full_logits(self, input_text: str) -> np.... method _loglikelihood_tokens (line 438) | def _loglikelihood_tokens( method loglikelihood (line 461) | def loglikelihood( method loglikelihood_rolling (line 574) | def loglikelihood_rolling( method generate_until (line 647) | def generate_until( method _run_genai_generation (line 694) | def _run_genai_generation( FILE: lm_eval/prompts/__init__.py function get_prompt (line 23) | def get_prompt(prompt_id: str, dataset_name: str = None, subset_name: st... function load_prompt_list (line 72) | def load_prompt_list( class PromptString (line 115) | class PromptString: method __init__ (line 116) | def __init__(self, prompt_string): method apply (line 119) | def apply(self, doc): FILE: lm_eval/result_schema.py class _TaskMetrics (line 110) | class _TaskMetrics(TypedDict, Generic[T], extra_items=T): class _SampleCount (line 131) | class _SampleCount(TypedDict): class _EvalConfig (line 141) | class _EvalConfig(TypedDict, total=False): class SampleResult (line 163) | class SampleResult(TypedDict, extra_items=float): FILE: lm_eval/tasks/__init__.py function get_task_name_from_config (line 36) | def get_task_name_from_config(task_config: dict[str, str]) -> str: function get_task_name_from_object (line 50) | def get_task_name_from_object(task_object): function _check_duplicates (line 63) | def _check_duplicates(task_dict: dict) -> None: function _log_task_dict (line 98) | def _log_task_dict(task_dict: dict, task_manager: "TaskManager") -> None: function get_task_dict (line 137) | def get_task_dict( FILE: lm_eval/tasks/_factory.py class TaskFactory (line 25) | class TaskFactory: method __init__ (line 32) | def __init__(self, *, meta: dict[str, Any] | None = None): method build (line 37) | def build( method _build_task (line 65) | def _build_task(self, entry: Entry, overrides: dict[str, Any] | None) ... method _build_group (line 85) | def _build_group( method _build_group_members (line 127) | def _build_group_members( method _build_tag (line 234) | def _build_tag( method _load_full_config (line 255) | def _load_full_config( function _ctor_accepts_config (line 283) | def _ctor_accepts_config(cls) -> bool: FILE: lm_eval/tasks/_index.py class Kind (line 19) | class Kind(Enum): class Entry (line 28) | class Entry: class TaskIndex (line 36) | class TaskIndex: method __init__ (line 41) | def __init__(self, *, meta: dict[str, str] | None = None) -> None: method build (line 45) | def build( method _iter_yaml_files (line 82) | def _iter_yaml_files(root: Path): method process_cfg (line 94) | def process_cfg( method _register_tags (line 139) | def _register_tags( method _kind_of (line 154) | def _kind_of(cfg: dict) -> Kind: method entry_from_path (line 168) | def entry_from_path(path: Path) -> Entry | None: method entry_from_config (line 179) | def entry_from_config(cfg: dict[str, Any]) -> Entry | None: method _str_to_set (line 192) | def _str_to_set(*args) -> set[str]: FILE: lm_eval/tasks/_yaml_loader.py function _mk_function_ctor (line 17) | def _mk_function_ctor(base_dir: Path, resolve: bool): function _make_loader (line 27) | def _make_loader(base_dir: Path, *, resolve_funcs: bool) -> type[yaml.Lo... function _load_module_with_cache (line 38) | def _load_module_with_cache(module_path: Path) -> Any: function _import_func_in_yml (line 93) | def _import_func_in_yml(qual: str, base_dir: Path): function _import_fun_from_str (line 130) | def _import_fun_from_str(path_str: str) -> Any: function load_yaml (line 164) | def load_yaml( FILE: lm_eval/tasks/aclue/_generate_configs.py function parse_args (line 35) | def parse_args(): FILE: lm_eval/tasks/acpbench/gen_2shot/acp_utils.py class ACPBench_Visitor (line 47) | class ACPBench_Visitor(Visitor): method __init__ (line 48) | def __init__(self) -> None: method action_list (line 56) | def action_list(self, tree): method prog_list (line 59) | def prog_list(self, tree): method progression_list (line 64) | def progression_list(self, tree): method action_none (line 67) | def action_none(self, tree): method action_name (line 70) | def action_name(self, tree): method index (line 78) | def index(self, tree): class ACPGrammarParser (line 84) | class ACPGrammarParser(object): method __init__ (line 85) | def __init__(self, task) -> None: method parse (line 91) | def parse(self, input, debug=False): function is_on_optimal_plan (line 135) | def is_on_optimal_plan(domain, problem, action, opt): function is_plan (line 177) | def is_plan(domain, problem, new_plan): function get_action_preconditions (line 196) | def get_action_preconditions(domain, problem, action): function generate_optimal_plans_for_problem_state (line 207) | def generate_optimal_plans_for_problem_state(P, state, num_plans, timeout): function generate_top_q_plans (line 228) | def generate_top_q_plans(domain, problem, num_plans=10, quality_bound=1.... function is_unsolvable_new_goal (line 241) | def is_unsolvable_new_goal(domain, problem, new_goal): function is_unsolvable (line 247) | def is_unsolvable(domain, problem): function extract_goal (line 274) | def extract_goal(prob): function entails (line 288) | def entails(state, partialstate): function progress (line 292) | def progress(state, act): function regress (line 302) | def regress(state, act): function get_STRIPS (line 312) | def get_STRIPS(domain, problem): function create_tmp_dom_prob_replace_init (line 330) | def create_tmp_dom_prob_replace_init(P, state, result_domain_file, resul... function fix_name (line 340) | def fix_name(s): function get_atoms_pddl (line 354) | def get_atoms_pddl(d, p, atoms): class Action (line 390) | class Action: method __init__ (line 391) | def __init__(self, name, pre, add, delete): method __str__ (line 397) | def __str__(self): method toJSON (line 404) | def toJSON(self): method __repr__ (line 416) | def __repr__(self): method __eq__ (line 419) | def __eq__(self, action): method __hash__ (line 422) | def __hash__(self): class STRIPS (line 426) | class STRIPS: method __init__ (line 427) | def __init__(self, domain, problem): method __str__ (line 453) | def __str__(self): method toJSON (line 460) | def toJSON(self): method operator_to_action (line 473) | def operator_to_action(self, op, check_fluents=True, check_static=False): method fix_pre_name (line 488) | def fix_pre_name(self, precondition): method action (line 493) | def action(self, name): method get_action_or_none (line 496) | def get_action_or_none(self, name): method fluent (line 501) | def fluent(self, name): method static_symbols (line 504) | def static_symbols(self): method fluent_symbols (line 507) | def fluent_symbols(self): method get_grounded_atoms (line 510) | def get_grounded_atoms(self, symbol): method get_applicable_actions (line 523) | def get_applicable_actions(self, s): method ground_problem (line 526) | def ground_problem(self, problem): method get_static (line 551) | def get_static(self): method PDDL_replace_init_pddl_parser (line 558) | def PDDL_replace_init_pddl_parser(self, s): function parse_ans (line 571) | def parse_ans(response: str, parser: ACPGrammarParser, task: str): function remove_garbage (line 582) | def remove_garbage(s): function compare_str (line 593) | def compare_str(s1, s2): function compare (line 597) | def compare(l1, l2): function check_prog_response (line 608) | def check_prog_response(resp): function clean_answer (line 618) | def clean_answer(resp, task): function get_grammar_task (line 642) | def get_grammar_task(task): function fix_action_name (line 666) | def fix_action_name(a): function str_remove_before_first_parentheses (line 671) | def str_remove_before_first_parentheses(s): function str_remove_after_last_parentheses (line 680) | def str_remove_after_last_parentheses(s): function cleanup_answer (line 691) | def cleanup_answer(ans): function set_equal (line 710) | def set_equal(ans1, ans2): class BaseEvaluator (line 714) | class BaseEvaluator(ABC): method __init__ (line 715) | def __init__(self) -> None: method get_score (line 719) | def get_score(self, ans, doc): method add_scores (line 722) | def add_scores(self, scores): method get_avg_score (line 725) | def get_avg_score(self): function get_evaluator (line 730) | def get_evaluator(group): class ActionReachabilityEvaluator (line 757) | class ActionReachabilityEvaluator(BaseEvaluator): method get_score (line 758) | def get_score(self, ans, doc): class ApplicabilityEvaluator (line 801) | class ApplicabilityEvaluator(BaseEvaluator): method get_score (line 802) | def get_score(self, ans, doc): function is_subsequence (line 817) | def is_subsequence(plan, new_plan): function is_subsequence_and_plan (line 828) | def is_subsequence_and_plan(domain, problem, plan, new_plan): class JustificationEvaluator (line 842) | class JustificationEvaluator(BaseEvaluator): method get_score (line 843) | def get_score(self, ans, doc): class LandmarksEvaluator (line 883) | class LandmarksEvaluator(BaseEvaluator): method get_score (line 884) | def get_score(self, ans, doc): class NextActionEvaluator (line 916) | class NextActionEvaluator(BaseEvaluator): method get_score (line 917) | def get_score(self, ans, doc): class ProgressionEvaluator (line 961) | class ProgressionEvaluator(BaseEvaluator): method get_score (line 962) | def get_score(self, ans, doc): class ReachabilityEvaluator (line 992) | class ReachabilityEvaluator(BaseEvaluator): method get_score (line 993) | def get_score(self, ans, doc): class ValidationEvaluator (line 1029) | class ValidationEvaluator(BaseEvaluator): method get_score (line 1030) | def get_score(self, ans, doc): function dump_item (line 1049) | def dump_item(item, **kwargs): function parse_prediction (line 1053) | def parse_prediction(prediction): class ACPGrammarFilter (line 1064) | class ACPGrammarFilter(RegexFilter): method __init__ (line 1067) | def __init__(self, *args, **kwargs): method clean_pos_neg (line 1071) | def clean_pos_neg(self, resp): method clean_simplified_plan (line 1082) | def clean_simplified_plan(self, resp): method apply (line 1091) | def apply(self, resps, docs): function process_acp_results (line 1107) | def process_acp_results(doc, results): function get_score (line 1111) | def get_score(references, predictions, **kwargs): FILE: lm_eval/tasks/acpbench/gen_2shot_with_pddl/acp_utils.py class ACPBench_Visitor (line 47) | class ACPBench_Visitor(Visitor): method __init__ (line 48) | def __init__(self) -> None: method action_list (line 56) | def action_list(self, tree): method prog_list (line 59) | def prog_list(self, tree): method progression_list (line 64) | def progression_list(self, tree): method action_none (line 67) | def action_none(self, tree): method action_name (line 70) | def action_name(self, tree): method index (line 78) | def index(self, tree): class ACPGrammarParser (line 84) | class ACPGrammarParser(object): method __init__ (line 85) | def __init__(self, task) -> None: method parse (line 91) | def parse(self, input, debug=False): function is_on_optimal_plan (line 135) | def is_on_optimal_plan(domain, problem, action, opt): function is_plan (line 177) | def is_plan(domain, problem, new_plan): function get_action_preconditions (line 196) | def get_action_preconditions(domain, problem, action): function generate_optimal_plans_for_problem_state (line 207) | def generate_optimal_plans_for_problem_state(P, state, num_plans, timeout): function generate_top_q_plans (line 228) | def generate_top_q_plans(domain, problem, num_plans=10, quality_bound=1.... function is_unsolvable_new_goal (line 241) | def is_unsolvable_new_goal(domain, problem, new_goal): function is_unsolvable (line 247) | def is_unsolvable(domain, problem): function extract_goal (line 274) | def extract_goal(prob): function entails (line 288) | def entails(state, partialstate): function progress (line 292) | def progress(state, act): function regress (line 302) | def regress(state, act): function get_STRIPS (line 312) | def get_STRIPS(domain, problem): function create_tmp_dom_prob_replace_init (line 330) | def create_tmp_dom_prob_replace_init(P, state, result_domain_file, resul... function fix_name (line 340) | def fix_name(s): function get_atoms_pddl (line 354) | def get_atoms_pddl(d, p, atoms): class Action (line 390) | class Action: method __init__ (line 391) | def __init__(self, name, pre, add, delete): method __str__ (line 397) | def __str__(self): method toJSON (line 404) | def toJSON(self): method __repr__ (line 416) | def __repr__(self): method __eq__ (line 419) | def __eq__(self, action): method __hash__ (line 422) | def __hash__(self): class STRIPS (line 426) | class STRIPS: method __init__ (line 427) | def __init__(self, domain, problem): method __str__ (line 453) | def __str__(self): method toJSON (line 460) | def toJSON(self): method operator_to_action (line 473) | def operator_to_action(self, op, check_fluents=True, check_static=False): method fix_pre_name (line 488) | def fix_pre_name(self, precondition): method action (line 493) | def action(self, name): method get_action_or_none (line 496) | def get_action_or_none(self, name): method fluent (line 501) | def fluent(self, name): method static_symbols (line 504) | def static_symbols(self): method fluent_symbols (line 507) | def fluent_symbols(self): method get_grounded_atoms (line 510) | def get_grounded_atoms(self, symbol): method get_applicable_actions (line 523) | def get_applicable_actions(self, s): method ground_problem (line 526) | def ground_problem(self, problem): method get_static (line 551) | def get_static(self): method PDDL_replace_init_pddl_parser (line 558) | def PDDL_replace_init_pddl_parser(self, s): function parse_ans (line 571) | def parse_ans(response: str, parser: ACPGrammarParser, task: str): function remove_garbage (line 582) | def remove_garbage(s): function compare_str (line 593) | def compare_str(s1, s2): function compare (line 597) | def compare(l1, l2): function check_prog_response (line 608) | def check_prog_response(resp): function clean_answer (line 618) | def clean_answer(resp, task): function get_grammar_task (line 642) | def get_grammar_task(task): function fix_action_name (line 666) | def fix_action_name(a): function str_remove_before_first_parentheses (line 671) | def str_remove_before_first_parentheses(s): function str_remove_after_last_parentheses (line 680) | def str_remove_after_last_parentheses(s): function cleanup_answer (line 691) | def cleanup_answer(ans): function set_equal (line 710) | def set_equal(ans1, ans2): class BaseEvaluator (line 714) | class BaseEvaluator(ABC): method __init__ (line 715) | def __init__(self) -> None: method get_score (line 719) | def get_score(self, ans, doc): method add_scores (line 722) | def add_scores(self, scores): method get_avg_score (line 725) | def get_avg_score(self): function get_evaluator (line 730) | def get_evaluator(group): class ActionReachabilityEvaluator (line 757) | class ActionReachabilityEvaluator(BaseEvaluator): method get_score (line 758) | def get_score(self, ans, doc): class ApplicabilityEvaluator (line 801) | class ApplicabilityEvaluator(BaseEvaluator): method get_score (line 802) | def get_score(self, ans, doc): function is_subsequence (line 817) | def is_subsequence(plan, new_plan): function is_subsequence_and_plan (line 828) | def is_subsequence_and_plan(domain, problem, plan, new_plan): class JustificationEvaluator (line 842) | class JustificationEvaluator(BaseEvaluator): method get_score (line 843) | def get_score(self, ans, doc): class LandmarksEvaluator (line 883) | class LandmarksEvaluator(BaseEvaluator): method get_score (line 884) | def get_score(self, ans, doc): class NextActionEvaluator (line 916) | class NextActionEvaluator(BaseEvaluator): method get_score (line 917) | def get_score(self, ans, doc): class ProgressionEvaluator (line 961) | class ProgressionEvaluator(BaseEvaluator): method get_score (line 962) | def get_score(self, ans, doc): class ReachabilityEvaluator (line 992) | class ReachabilityEvaluator(BaseEvaluator): method get_score (line 993) | def get_score(self, ans, doc): class ValidationEvaluator (line 1029) | class ValidationEvaluator(BaseEvaluator): method get_score (line 1030) | def get_score(self, ans, doc): function dump_item (line 1049) | def dump_item(item, **kwargs): function parse_prediction (line 1053) | def parse_prediction(prediction): class ACPGrammarFilter (line 1064) | class ACPGrammarFilter(RegexFilter): method __init__ (line 1067) | def __init__(self, *args, **kwargs): method clean_pos_neg (line 1071) | def clean_pos_neg(self, resp): method clean_simplified_plan (line 1082) | def clean_simplified_plan(self, resp): method apply (line 1091) | def apply(self, resps, docs): function process_acp_results (line 1107) | def process_acp_results(doc, results): function get_score (line 1111) | def get_score(references, predictions, **kwargs): FILE: lm_eval/tasks/afrimgsm/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang): function gen_lang_yamls (line 22) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 96) | def main() -> None: FILE: lm_eval/tasks/afrimgsm/utils.py function add_regex_pattern (line 75) | def add_regex_pattern(regex_pattern): function gen_lang_yamls (line 109) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 195) | def main() -> None: FILE: lm_eval/tasks/afrimmlu/direct/prompt_1/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrimmlu/direct/prompt_2/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrimmlu/direct/prompt_3/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrimmlu/direct/prompt_4/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrimmlu/direct/prompt_5/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrimmlu/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function gen_lang_yamls (line 12) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 77) | def main() -> None: FILE: lm_eval/tasks/afrimmlu/translate/prompt_1/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrimmlu/translate/prompt_2/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrimmlu/translate/prompt_3/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrimmlu/translate/prompt_4/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrimmlu/translate/prompt_5/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrimmlu/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 9) | def doc_to_text(doc): FILE: lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py function doc_to_target (line 4) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/anli prompt/translate/utils.py function doc_to_target (line 4) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/direct/prompt_1/utils.py function doc_to_text (line 4) | def doc_to_text(doc): function doc_to_target (line 17) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/direct/prompt_2/utils.py function doc_to_target (line 4) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/direct/prompt_3/utils.py function doc_to_target (line 4) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/direct/prompt_4/utils.py function doc_to_text (line 4) | def doc_to_text(doc): function doc_to_target (line 17) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/direct/prompt_5/utils.py function doc_to_target (line 4) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang): function gen_lang_yamls (line 30) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 103) | def main() -> None: FILE: lm_eval/tasks/afrixnli/lai prompt/direct/utils.py function doc_to_text (line 4) | def doc_to_text(doc): function doc_to_target (line 17) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/lai prompt/translate/utils.py function doc_to_text (line 4) | def doc_to_text(doc): function doc_to_target (line 17) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/translate/prompt_1/utils.py function doc_to_text (line 4) | def doc_to_text(doc): function doc_to_target (line 17) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/translate/prompt_2/utils.py function doc_to_target (line 4) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/translate/prompt_3/utils.py function doc_to_text (line 4) | def doc_to_text(doc): function doc_to_target (line 19) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/translate/prompt_4/utils.py function doc_to_text (line 4) | def doc_to_text(doc): function doc_to_target (line 17) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/translate/prompt_5/utils.py function doc_to_target (line 4) | def doc_to_target(doc): FILE: lm_eval/tasks/afrixnli/utils.py class FunctionTag (line 6) | class FunctionTag: method __init__ (line 7) | def __init__(self, value): function gen_lang_yamls (line 123) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 211) | def main() -> None: FILE: lm_eval/tasks/afrobench/adr/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang): function gen_lang_yamls (line 30) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 79) | def main() -> None: FILE: lm_eval/tasks/afrobench/afriqa/prompt_1/utils.py function normalize_answer (line 6) | def normalize_answer(s): function f1 (line 28) | def f1(items): FILE: lm_eval/tasks/afrobench/afriqa/prompt_2/utils.py function normalize_answer (line 6) | def normalize_answer(s): function f1 (line 28) | def f1(items): FILE: lm_eval/tasks/afrobench/afriqa/prompt_3/utils.py function normalize_answer (line 6) | def normalize_answer(s): function f1 (line 28) | def f1(items): FILE: lm_eval/tasks/afrobench/afriqa/prompt_4/utils.py function normalize_answer (line 6) | def normalize_answer(s): function f1 (line 28) | def f1(items): FILE: lm_eval/tasks/afrobench/afriqa/prompt_5/utils.py function normalize_answer (line 6) | def normalize_answer(s): function f1 (line 28) | def f1(items): FILE: lm_eval/tasks/afrobench/afriqa/utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang): function gen_lang_yamls (line 43) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 99) | def main() -> None: FILE: lm_eval/tasks/afrobench/afrisenti/utils.py class FunctionTag (line 6) | class FunctionTag: method __init__ (line 7) | def __init__(self, value): function prompt_func (line 11) | def prompt_func(mode, lang): function gen_lang_yamls (line 35) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 98) | def main() -> None: FILE: lm_eval/tasks/afrobench/belebele/utils.py function prompt_func (line 7) | def prompt_func(mode, lang): function gen_lang_yamls (line 18) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 129) | def main() -> None: FILE: lm_eval/tasks/afrobench/flores/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang, lang_dict): function gen_lang_yamls (line 33) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse:... function main (line 165) | def main() -> None: FILE: lm_eval/tasks/afrobench/injongointent/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang, intent): function gen_lang_yamls (line 29) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 133) | def main() -> None: FILE: lm_eval/tasks/afrobench/mafand/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang, lang_dict): function gen_lang_yamls (line 35) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse:... function main (line 110) | def main() -> None: FILE: lm_eval/tasks/afrobench/mafand/prompt_1/african-english/utils.py function get_target (line 26) | def get_target(doc): function get_target_reverse (line 35) | def get_target_reverse(doc): function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc): function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc): function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc): function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc): function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc): function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc): FILE: lm_eval/tasks/afrobench/mafand/prompt_1/english-african/utils.py function get_target (line 26) | def get_target(doc): function get_target_reverse (line 35) | def get_target_reverse(doc): function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc): function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc): function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc): function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc): function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc): function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc): FILE: lm_eval/tasks/afrobench/mafand/prompt_2/african-english/utils.py function get_target (line 26) | def get_target(doc): function get_target_reverse (line 35) | def get_target_reverse(doc): function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc): function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc): function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc): function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc): function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc): function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc): FILE: lm_eval/tasks/afrobench/mafand/prompt_2/english-african/utils.py function get_target (line 26) | def get_target(doc): function get_target_reverse (line 35) | def get_target_reverse(doc): function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc): function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc): function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc): function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc): function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc): function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc): FILE: lm_eval/tasks/afrobench/mafand/prompt_3/african-english/utils.py function get_target (line 26) | def get_target(doc): function get_target_reverse (line 35) | def get_target_reverse(doc): function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc): function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc): function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc): function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc): function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc): function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc): FILE: lm_eval/tasks/afrobench/mafand/prompt_3/english-african/utils.py function get_target (line 26) | def get_target(doc): function get_target_reverse (line 35) | def get_target_reverse(doc): function create_text_prompt_1 (line 43) | def create_text_prompt_1(doc): function create_reverse_prompt_1 (line 57) | def create_reverse_prompt_1(doc): function create_text_prompt_2 (line 72) | def create_text_prompt_2(doc): function create_reverse_prompt_2 (line 84) | def create_reverse_prompt_2(doc): function create_text_prompt_3 (line 97) | def create_text_prompt_3(doc): function create_reverse_prompt_3 (line 110) | def create_reverse_prompt_3(doc): FILE: lm_eval/tasks/afrobench/masakhaner/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang): function gen_lang_yamls (line 48) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 112) | def main() -> None: FILE: lm_eval/tasks/afrobench/masakhaner/prompt_1/utils.py function doc_to_target (line 7) | def doc_to_target(doc): function transform_text (line 11) | def transform_text(text): function span_f1_agg (line 42) | def span_f1_agg(items): FILE: lm_eval/tasks/afrobench/masakhaner/prompt_2/utils.py function doc_to_target (line 7) | def doc_to_target(doc): function transform_text (line 11) | def transform_text(text): function span_f1_agg (line 42) | def span_f1_agg(items): FILE: lm_eval/tasks/afrobench/masakhaner/prompt_3/utils.py function doc_to_target (line 7) | def doc_to_target(doc): function transform_text (line 11) | def transform_text(text): function span_f1_agg (line 42) | def span_f1_agg(items): FILE: lm_eval/tasks/afrobench/masakhaner/prompt_4/utils.py function doc_to_target (line 7) | def doc_to_target(doc): function transform_text (line 11) | def transform_text(text): function span_f1_agg (line 42) | def span_f1_agg(items): FILE: lm_eval/tasks/afrobench/masakhaner/prompt_5/utils.py function doc_to_target (line 7) | def doc_to_target(doc): function transform_text (line 11) | def transform_text(text): function span_f1_agg (line 42) | def span_f1_agg(items): FILE: lm_eval/tasks/afrobench/masakhanews/utils.py function prompt_func (line 7) | def prompt_func(mode, lang): function gen_lang_yamls (line 35) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 97) | def main() -> None: FILE: lm_eval/tasks/afrobench/masakhapos/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang): function gen_lang_yamls (line 61) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 125) | def main() -> None: FILE: lm_eval/tasks/afrobench/masakhapos/prompt_1/utils.py function doc_to_target (line 8) | def doc_to_target(doc): function acc_score (line 32) | def acc_score(items): FILE: lm_eval/tasks/afrobench/masakhapos/prompt_2/utils.py function doc_to_target (line 8) | def doc_to_target(doc): function acc_score (line 32) | def acc_score(items): FILE: lm_eval/tasks/afrobench/masakhapos/prompt_3/utils.py function doc_to_target (line 8) | def doc_to_target(doc): function acc_score (line 32) | def acc_score(items): FILE: lm_eval/tasks/afrobench/masakhapos/prompt_4/utils.py function doc_to_target (line 8) | def doc_to_target(doc): function acc_score (line 32) | def acc_score(items): FILE: lm_eval/tasks/afrobench/masakhapos/prompt_5/utils.py function doc_to_target (line 8) | def doc_to_target(doc): function acc_score (line 32) | def acc_score(items): FILE: lm_eval/tasks/afrobench/masakhapos/utils.py function doc_to_text (line 4) | def doc_to_text(doc): function doc_to_target (line 19) | def doc_to_target(doc): FILE: lm_eval/tasks/afrobench/naijarc/utils.py function prompt_func (line 7) | def prompt_func(mode, lang): function gen_lang_yamls (line 18) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 67) | def main() -> None: FILE: lm_eval/tasks/afrobench/ntrex/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang, lang_dict): function gen_lang_yamls (line 33) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse:... function main (line 134) | def main() -> None: FILE: lm_eval/tasks/afrobench/openai_mmlu/utils.py function prompt_func (line 7) | def prompt_func(mode, lang): function gen_lang_yamls (line 18) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 73) | def main() -> None: FILE: lm_eval/tasks/afrobench/salt/gen_utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang, lang_dict): function gen_lang_yamls (line 34) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str, reverse:... function main (line 112) | def main() -> None: FILE: lm_eval/tasks/afrobench/sib/utils.py class FunctionTag (line 7) | class FunctionTag: method __init__ (line 8) | def __init__(self, value): function prompt_func (line 12) | def prompt_func(mode, lang): function gen_lang_yamls (line 40) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 201) | def main() -> None: FILE: lm_eval/tasks/afrobench/uhura-arc-easy/utils.py function get_language_from_code (line 8) | def get_language_from_code(code: str) -> str: function prompt_func (line 13) | def prompt_func(mode): function gen_lang_yamls (line 51) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 99) | def main() -> None: FILE: lm_eval/tasks/afrobench/xlsum/prompt_1/utils.py function rougeL (line 4) | def rougeL(items): function rougeL_agg (line 11) | def rougeL_agg(items): FILE: lm_eval/tasks/afrobench/xlsum/prompt_2/utils.py function rougeL (line 4) | def rougeL(items): function rougeL_agg (line 11) | def rougeL_agg(items): FILE: lm_eval/tasks/afrobench/xlsum/prompt_3/utils.py function rougeL (line 4) | def rougeL(items): function rougeL_agg (line 11) | def rougeL_agg(items): FILE: lm_eval/tasks/afrobench/xlsum/utils.py function prompt_func (line 7) | def prompt_func(mode, lang): function gen_lang_yamls (line 29) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 88) | def main() -> None: FILE: lm_eval/tasks/agieval/utils.py function parse_math_answer (line 10) | def parse_math_answer(raw_string): function _fix_fracs (line 82) | def _fix_fracs(string): function _fix_a_slash_b (line 114) | def _fix_a_slash_b(string): function _remove_right_units (line 129) | def _remove_right_units(string): function _fix_sqrt (line 139) | def _fix_sqrt(string): function _strip_string (line 154) | def _strip_string(string): function is_equiv (line 224) | def is_equiv(str1, str2, verbose=False): function process_results (line 243) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]: function process_results_mcqa (line 262) | def process_results_mcqa(doc, results): FILE: lm_eval/tasks/aime/utils.py function process_results (line 5) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]: function is_equiv (line 36) | def is_equiv(str1, str2, verbose=False): function remove_boxed (line 53) | def remove_boxed(s): function last_boxed_only_string (line 67) | def last_boxed_only_string(string): function fix_fracs (line 97) | def fix_fracs(string): function fix_a_slash_b (line 129) | def fix_a_slash_b(string): function remove_right_units (line 144) | def remove_right_units(string): function fix_sqrt (line 154) | def fix_sqrt(string): function strip_string (line 169) | def strip_string(string): FILE: lm_eval/tasks/arab_culture/_generate_configs.py function parse_args (line 34) | def parse_args(): FILE: lm_eval/tasks/arab_culture/utils_mcq.py function doc_to_text (line 49) | def doc_to_text(doc): function doc_to_choice (line 101) | def doc_to_choice(doc): function doc_to_target (line 105) | def doc_to_target(doc): FILE: lm_eval/tasks/arab_culture_completion/_generate_configs.py function parse_args (line 34) | def parse_args(): FILE: lm_eval/tasks/arab_culture_completion/utils_completion.py function doc_to_text (line 52) | def doc_to_text(doc): function doc_to_choice (line 91) | def doc_to_choice(doc): function doc_to_target (line 96) | def doc_to_target(doc): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_alghafa/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_exams/utils.py function process_docs (line 15) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mmlu/utils.py function process_docs (line 15) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_challenge/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_arc_easy/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_boolq/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_copa/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_hellaswag/utils.py function process_docs (line 7) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_mmlu/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_openbook_qa/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_piqa/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_race/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_sciq/utils.py function doc_to_text (line 7) | def doc_to_text(doc): function process_docs (line 24) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_arabic_mt_toxigen/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/arabic_leaderboard_complete/arabic_leaderboard_avca/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_alghafa_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_exams_light/utils.py function process_docs (line 15) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mmlu_light/utils.py function process_docs (line 15) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_challenge_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_arc_easy_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_boolq_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_copa_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_hellaswag_light/utils.py function process_docs (line 7) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_mmlu_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_openbook_qa_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_piqa_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_race_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_sciq_light/utils.py function doc_to_text (line 7) | def doc_to_text(doc): function process_docs (line 24) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_arabic_mt_toxigen_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/arabic_leaderboard_light/arabic_leaderboard_avca_light/utils.py function process_docs (line 5) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/arabicmmlu/_generate_configs.py function parse_args (line 60) | def parse_args(): FILE: lm_eval/tasks/arabicmmlu/utils.py function doc_to_text (line 14) | def doc_to_text(doc): function doc_to_choice (line 43) | def doc_to_choice(doc): FILE: lm_eval/tasks/aradice/ArabicMMLU/EGY/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/ArabicMMLU/EGY/utils.py function process_docs (line 51) | def process_docs(dataset): FILE: lm_eval/tasks/aradice/ArabicMMLU/LEV/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/ArabicMMLU/LEV/utils.py function process_docs (line 50) | def process_docs(dataset): FILE: lm_eval/tasks/aradice/boolq/EGY/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/boolq/EGY/utils.py function process_docs (line 4) | def process_docs(dataset): FILE: lm_eval/tasks/aradice/boolq/ENG/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/boolq/ENG/utils.py function process_docs (line 4) | def process_docs(dataset): FILE: lm_eval/tasks/aradice/boolq/LEV/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/boolq/LEV/utils.py function process_docs (line 4) | def process_docs(dataset): FILE: lm_eval/tasks/aradice/boolq/MSA/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/boolq/MSA/utils.py function process_docs (line 4) | def process_docs(dataset): FILE: lm_eval/tasks/aradice/cultural-benchmark/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/cultural-benchmark/utils.py function process_docs (line 1) | def process_docs(dataset): FILE: lm_eval/tasks/aradice/openbookqa/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/openbookqa/utils.py function doc_to_target (line 1) | def doc_to_target(doc): function doc_to_choice (line 12) | def doc_to_choice(doc): function doc_to_text (line 17) | def doc_to_text(doc): FILE: lm_eval/tasks/aradice/piqa/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/truthfulqa_mcq/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/winogrande/metrics.py function macro_f1_score (line 4) | def macro_f1_score(items): function micro_f1_score (line 12) | def micro_f1_score(items): function weighted_f1_score (line 20) | def weighted_f1_score(items): FILE: lm_eval/tasks/aradice/winogrande/utils.py function doc_to_text (line 1) | def doc_to_text(doc): function doc_to_target (line 6) | def doc_to_target(doc): function doc_to_choice (line 11) | def doc_to_choice(doc): FILE: lm_eval/tasks/babilong/common_utils.py function get_tokenizer (line 18) | def get_tokenizer( function postprocess_pred (line 27) | def postprocess_pred(prediction: list[str]) -> list[str]: function load_dataset (line 40) | def load_dataset(**kwargs): function process_results (line 55) | def process_results(doc: dict, results: list[str]) -> dict[str, float]: FILE: lm_eval/tasks/basque_bench/flores_eu/create_yamls_flores_eu.py function doc_to_text (line 257) | def doc_to_text(src: str, tgt: str) -> str: function doc_to_target (line 265) | def doc_to_target(tgt: str) -> str: function gen_lang_yamls (line 272) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: function main (line 316) | def main() -> None: FILE: lm_eval/tasks/basque_bench/utils.py function xcopa_doc_to_text (line 6) | def xcopa_doc_to_text(doc): function xcopa_doc_to_choice (line 11) | def xcopa_doc_to_choice(doc): function paws_process_docs (line 21) | def paws_process_docs(dataset): FILE: lm_eval/tasks/basqueglue/utils.py function general_detokenize (line 7) | def general_detokenize(string): function process_doc (line 16) | def process_doc(string): function process_wic_docs (line 22) | def process_wic_docs(dataset): function coref_doc_to_text (line 36) | def coref_doc_to_text(x): function micro_f1_score (line 62) | def micro_f1_score(items): function vaxx_f1_score (line 71) | def vaxx_f1_score(items): FILE: lm_eval/tasks/bbh/_generate_configs.py function parse_args (line 15) | def parse_args(): FILE: lm_eval/tasks/bbh/cot_zeroshot/utils.py class ExtendedRegexFilter (line 9) | class ExtendedRegexFilter(RegexFilter): method __init__ (line 14) | def __init__( method filter_ignores (line 28) | def filter_ignores(self, st): method find_match (line 41) | def find_match(self, regex, resp, convert_dict={}): class MapRegexFilter (line 53) | class MapRegexFilter(ExtendedRegexFilter): method __init__ (line 54) | def __init__( method apply (line 82) | def apply(self, resps, docs): class NumberParseRegexFilter (line 109) | class NumberParseRegexFilter(ExtendedRegexFilter): method apply (line 110) | def apply(self, resps, docs): class WordSortFilter (line 140) | class WordSortFilter(Filter): method apply (line 143) | def apply(self, resps, docs): class MultiChoiceRegexFilter (line 162) | class MultiChoiceRegexFilter(ExtendedRegexFilter): method __init__ (line 163) | def __init__(self, *args, **kwargs): method apply (line 175) | def apply(self, resps, docs): FILE: lm_eval/tasks/bbh/zeroshot/utils.py class ExtendedRegexFilter (line 9) | class ExtendedRegexFilter(RegexFilter): method __init__ (line 14) | def __init__( method filter_ignores (line 28) | def filter_ignores(self, st): method find_match (line 41) | def find_match(self, regex, resp, convert_dict={}): class MapRegexFilter (line 53) | class MapRegexFilter(ExtendedRegexFilter): method __init__ (line 54) | def __init__( method apply (line 82) | def apply(self, resps, docs): class NumberParseRegexFilter (line 109) | class NumberParseRegexFilter(ExtendedRegexFilter): method apply (line 110) | def apply(self, resps, docs): class WordSortFilter (line 140) | class WordSortFilter(Filter): method apply (line 143) | def apply(self, resps, docs): class MultiChoiceRegexFilter (line 162) | class MultiChoiceRegexFilter(ExtendedRegexFilter): method __init__ (line 163) | def __init__(self, *args, **kwargs): method apply (line 175) | def apply(self, resps, docs): FILE: lm_eval/tasks/bbq/utils.py function agg_accuracy_amb (line 33) | def agg_accuracy_amb(arr): function agg_accuracy_disamb (line 42) | def agg_accuracy_disamb(arr): function agg_disamb_bias_scores (line 51) | def agg_disamb_bias_scores(arr): function agg_amb_bias_scores (line 84) | def agg_amb_bias_scores(arr): function _process_results (line 110) | def _process_results(doc, answer: int): function _clean_answer (line 193) | def _clean_answer(answer: str): function _check_unk_answer (line 204) | def _check_unk_answer(answer: str): function process_results_generate_until (line 212) | def process_results_generate_until(doc, results): function process_results_multiple_choice (line 242) | def process_results_multiple_choice(doc, results): function doc_to_biased_answer (line 255) | def doc_to_biased_answer(doc): function _process_groups_in_answers (line 265) | def _process_groups_in_answers(string): function process_docs (line 300) | def process_docs(dataset: datasets.Dataset): function filter_dataset_context (line 362) | def filter_dataset_context(dataset: datasets.Dataset, context: str) -> d... function process_docs_ambig (line 368) | def process_docs_ambig(dataset: datasets.Dataset): function process_docs_disambig (line 372) | def process_docs_disambig(dataset: datasets.Dataset): function doc_to_choice (line 376) | def doc_to_choice(doc): function _doc_to_choice_groups (line 385) | def _doc_to_choice_groups(doc): function doc_to_targets (line 397) | def doc_to_targets(doc): function doc_to_target (line 412) | def doc_to_target(doc): function filter_dataset (line 417) | def filter_dataset(dataset: datasets.Dataset, bias_type: str) -> dataset... function filter_race_color (line 421) | def filter_race_color(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/belebele/_generate_configs.py function parse_args (line 18) | def parse_args(): function query (line 41) | def query(): FILE: lm_eval/tasks/bigbench/generate_tasks.py function main (line 183) | def main() -> None: FILE: lm_eval/tasks/blimp/generate_configs.py function main (line 75) | def main() -> None: FILE: lm_eval/tasks/c4/preprocess_c4.py function c4_detokenizer (line 4) | def c4_detokenizer(doc): function process_results (line 39) | def process_results(doc, results): FILE: lm_eval/tasks/cabbq/utils.py function _model_answer (line 6) | def _model_answer(lls): function _model_answer_type (line 25) | def _model_answer_type(doc, model_answer): function process_results (line 75) | def process_results(doc, results): function acc_ambig_agg (line 137) | def acc_ambig_agg(results): function acc_disambig_agg (line 159) | def acc_disambig_agg(results): function bias_score_ambig_agg (line 181) | def bias_score_ambig_agg(results): function bias_score_disambig_agg (line 212) | def bias_score_disambig_agg(results): FILE: lm_eval/tasks/careqa/utils.py function doc_to_text (line 1) | def doc_to_text(doc) -> str: function doc_to_target (line 39) | def doc_to_target(doc) -> int: FILE: lm_eval/tasks/careqa/utils_open.py function doc_eval (line 22) | def doc_eval(pred, refs): function doc_to_text (line 65) | def doc_to_text(doc) -> str: function doc_to_target (line 69) | def doc_to_target(doc) -> str: function process_results_gen (line 73) | def process_results_gen(doc, results): function process_results_gen_w_repeats (line 98) | def process_results_gen_w_repeats(doc, results): FILE: lm_eval/tasks/careqa/utils_perplexity.py function doc_to_target (line 5) | def doc_to_target(doc) -> str: function process_results (line 9) | def process_results(doc, results): FILE: lm_eval/tasks/catalan_bench/flores_ca/create_yamls_flores_ca.py function code_to_language_name (line 246) | def code_to_language_name(code): function code_to_short_name (line 250) | def code_to_short_name(code): function jinja_var (line 254) | def jinja_var(s): function doc_to_text (line 258) | def doc_to_text(src: str, tgt: str) -> str: function doc_to_target (line 266) | def doc_to_target(tgt: str) -> str: function gen_lang_yamls (line 273) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: function main (line 317) | def main() -> None: FILE: lm_eval/tasks/catalan_bench/truthfulqa_va/utils.py function lowercase_first_letter (line 14) | def lowercase_first_letter(text): function process_summarization (line 18) | def process_summarization(dataset): function process_docs_paraphrases (line 28) | def process_docs_paraphrases(dataset): function process_docs_paws (line 56) | def process_docs_paws(dataset): function rouge1 (line 84) | def rouge1(items): function rouge1_agg (line 91) | def rouge1_agg(items): function process_results_mc2 (line 102) | def process_results_mc2(doc, results): function process_docs_gen (line 115) | def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset: function preprocess_function_gen (line 119) | def preprocess_function_gen(examples): function process_doc_nli (line 143) | def process_doc_nli(dataset): function process_results_gen (line 170) | def process_results_gen(doc, results): function bleu (line 241) | def bleu(refs, preds): function rouge (line 263) | def rouge(refs, preds): FILE: lm_eval/tasks/catalan_bench/utils.py function lowercase_first_letter (line 10) | def lowercase_first_letter(text): function process_doc_nli (line 14) | def process_doc_nli(dataset): function process_results_coqcat (line 38) | def process_results_coqcat(doc, results): function process_results_qa (line 72) | def process_results_qa(doc, results): function process_doc_cabreu (line 81) | def process_doc_cabreu(dataset): function process_docs_paraphrases (line 96) | def process_docs_paraphrases(dataset): function process_docs_copa_ca (line 119) | def process_docs_copa_ca(dataset): function rouge1 (line 128) | def rouge1(items): function rouge1_agg (line 135) | def rouge1_agg(items): FILE: lm_eval/tasks/ceval/_generate_configs.py function parse_args (line 72) | def parse_args(): FILE: lm_eval/tasks/chartqa/utils.py function _normalize_string (line 6) | def _normalize_string(s): function _remove_end_punctuation (line 14) | def _remove_end_punctuation(unnormalized_string: str) -> str: class RelaxedCorrectness (line 27) | class RelaxedCorrectness: method _relaxed_correctness (line 39) | def _relaxed_correctness( method score (line 132) | def score(self, model_answer: str, reference_answer: str | list[str]) ... class ExplicitPromptRelaxedCorrectness (line 141) | class ExplicitPromptRelaxedCorrectness(RelaxedCorrectness): method name (line 145) | def name(self) -> str: method _get_final_answer (line 148) | def _get_final_answer(self, generation: str) -> str: method score (line 174) | def score(self, model_answer: str, reference_answer: str | list[str]) ... class AnywhereInAnswerRelaxedCorrectness (line 182) | class AnywhereInAnswerRelaxedCorrectness(ExplicitPromptRelaxedCorrectness): method name (line 189) | def name(self) -> str: method score (line 192) | def score(self, model_answer: str, reference_answer: str | list[str]) ... function exact_match (line 242) | def exact_match(references, predictions): function relaxed_accuracy (line 257) | def relaxed_accuracy(references, predictions): function anywhere_accuracy (line 268) | def anywhere_accuracy(references, predictions): FILE: lm_eval/tasks/click/click_cul/utils.py function get_context (line 6) | def get_context(doc) -> str: function get_target (line 18) | def get_target(doc) -> str: function get_choices (line 25) | def get_choices(doc) -> List[str]: function extract_economy (line 31) | def extract_economy(dataset: Dataset) -> Dataset: function extract_geography (line 35) | def extract_geography(dataset: Dataset) -> Dataset: function extract_history (line 39) | def extract_history(dataset: Dataset) -> Dataset: function extract_law (line 45) | def extract_law(dataset: Dataset) -> Dataset: function extract_politics (line 51) | def extract_politics(dataset: Dataset) -> Dataset: function extract_kpop (line 55) | def extract_kpop(dataset: Dataset) -> Dataset: function extract_society (line 59) | def extract_society(dataset: Dataset) -> Dataset: function extract_tradition (line 63) | def extract_tradition(dataset: Dataset) -> Dataset: FILE: lm_eval/tasks/click/click_lang/utils.py function get_context (line 6) | def get_context(doc) -> str: function get_target (line 18) | def get_target(doc) -> str: function get_choices (line 25) | def get_choices(doc) -> List[str]: function extract_text (line 31) | def extract_text(dataset: Dataset) -> Dataset: function extract_grammar (line 41) | def extract_grammar(dataset: Dataset) -> Dataset: function extract_function (line 65) | def extract_function(dataset: Dataset) -> Dataset: FILE: lm_eval/tasks/cmmlu/_generate_configs.py function parse_args (line 87) | def parse_args(): FILE: lm_eval/tasks/cnn_dailymail/utils.py function normalize_text (line 27) | def normalize_text(text: str) -> str: function calculate_rouge_scores (line 44) | def calculate_rouge_scores( function calculate_bertscore (line 82) | def calculate_bertscore( function process_results (line 124) | def process_results(doc: Dict[str, Any], results: List[str]) -> Dict[str... function postprocess_generation (line 186) | def postprocess_generation(generation: str) -> str: function filter_long_articles (line 208) | def filter_long_articles(doc: Dict[str, Any]) -> bool: function doc_to_choice (line 224) | def doc_to_choice(doc: Dict[str, Any]) -> List[str]: function process_docs (line 237) | def process_docs(dataset): function calculate_summary_length (line 269) | def calculate_summary_length(generated: str) -> int: FILE: lm_eval/tasks/code_x_glue/code-text/bleu.py function normalize (line 58) | def normalize(s): function count_ngrams (line 78) | def count_ngrams(words, n=4): function cook_refs (line 87) | def cook_refs(refs, n=4): function cook_test (line 101) | def cook_test(test, item, n=4): function score_cooked (line 132) | def score_cooked(allcomps, n=4, ground=0, smooth=1): function bleu (line 174) | def bleu(refs, candidate, ground=0, smooth=1): function splitPuncts (line 180) | def splitPuncts(line): function computeMaps (line 184) | def computeMaps(predictions, goldfile): function bleuFromMaps (line 210) | def bleuFromMaps(m1, m2): function smoothed_bleu_4 (line 222) | def smoothed_bleu_4(references, predictions, **kwargs): FILE: lm_eval/tasks/code_x_glue/code-text/utils.py function doc_to_text (line 1) | def doc_to_text(doc): function doc_to_target (line 8) | def doc_to_target(doc): FILE: lm_eval/tasks/common_voice/utils.py function doc_to_text (line 10) | def doc_to_text(doc: Dict[str, Any]) -> str: function doc_to_audio (line 14) | def doc_to_audio(doc: Dict[str, Any]) -> List[dict]: FILE: lm_eval/tasks/copal_id/utils.py function convert_choice (line 4) | def convert_choice(choice): function doc_to_text (line 8) | def doc_to_text(doc, connector): function doc_to_choice (line 13) | def doc_to_choice(doc): FILE: lm_eval/tasks/coqa/utils.py function doc_to_text (line 6) | def doc_to_text(doc): function doc_to_target (line 19) | def doc_to_target(doc): function em (line 37) | def em(gold_list, pred): function compute_scores (line 51) | def compute_scores(gold_list, pred): function process_results (line 72) | def process_results(doc, results): FILE: lm_eval/tasks/crows_pairs/utils.py function process_results (line 4) | def process_results(doc, results): function doc_to_choice (line 19) | def doc_to_choice(doc): function filter_dataset (line 23) | def filter_dataset(dataset: datasets.Dataset, bias_type: str) -> dataset... function filter_race_color (line 27) | def filter_race_color(dataset: datasets.Dataset) -> datasets.Dataset: function filter_socio (line 31) | def filter_socio(dataset: datasets.Dataset) -> datasets.Dataset: function filter_gender (line 35) | def filter_gender(dataset: datasets.Dataset) -> datasets.Dataset: function filter_age (line 39) | def filter_age(dataset: datasets.Dataset) -> datasets.Dataset: function filter_religion (line 43) | def filter_religion(dataset: datasets.Dataset) -> datasets.Dataset: function filter_disability (line 47) | def filter_disability(dataset: datasets.Dataset) -> datasets.Dataset: function filter_orientation (line 51) | def filter_orientation(dataset: datasets.Dataset) -> datasets.Dataset: function filter_nationality (line 55) | def filter_nationality(dataset: datasets.Dataset) -> datasets.Dataset: function filter_appearance (line 59) | def filter_appearance(dataset: datasets.Dataset) -> datasets.Dataset: function filter_autre (line 63) | def filter_autre(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/csatqa/_generate_configs.py function parse_args (line 19) | def parse_args(): FILE: lm_eval/tasks/csatqa/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/darija_bench/darija_sentiment/utils.py function doc_to_text (line 9) | def doc_to_text(doc): function doc_to_choice_3 (line 21) | def doc_to_choice_3(doc): function doc_to_choice_2 (line 25) | def doc_to_choice_2(doc): function doc_to_target (line 29) | def doc_to_target(doc): FILE: lm_eval/tasks/darija_bench/darija_summarization/utils.py function strip (line 5) | def strip(resps, docs): function doc_to_text (line 12) | def doc_to_text(doc): function doc_to_target (line 19) | def doc_to_target(doc): function bert (line 23) | def bert(items): function Average (line 27) | def Average(lst): function darijabert (line 31) | def darijabert(items): function rouge1 (line 44) | def rouge1(items): function rougeL (line 48) | def rougeL(items): function rouge2 (line 52) | def rouge2(items): function rougeLsum (line 56) | def rougeLsum(items): function agg_rougelsum (line 60) | def agg_rougelsum(items): function agg_rouge1 (line 66) | def agg_rouge1(items): function agg_rouge2 (line 72) | def agg_rouge2(items): function agg_rougel (line 78) | def agg_rougel(items): FILE: lm_eval/tasks/darija_bench/darija_translation/utils.py function strip (line 5) | def strip(resps, docs): function dr_fr (line 12) | def dr_fr(dataset: datasets.Dataset): function dr_en (line 16) | def dr_en(dataset: datasets.Dataset): function dr_msa (line 20) | def dr_msa(dataset: datasets.Dataset): function fr_dr (line 24) | def fr_dr(dataset: datasets.Dataset): function en_dr (line 28) | def en_dr(dataset: datasets.Dataset): function msa_dr (line 32) | def msa_dr(dataset: datasets.Dataset): function doc_to_text (line 46) | def doc_to_text(doc): function doc_to_target (line 51) | def doc_to_target(doc): function bert (line 55) | def bert(items): function Average (line 59) | def Average(lst): function camembert (line 63) | def camembert(items): function darijabert (line 76) | def darijabert(items): function arabert (line 89) | def arabert(items): function bertbase (line 102) | def bertbase(items): function mbert (line 115) | def mbert(items): FILE: lm_eval/tasks/darija_bench/darija_transliteration/utils.py function strip (line 5) | def strip(resps, docs): function dr_ar (line 12) | def dr_ar(dataset: datasets.Dataset): function ar_dr (line 16) | def ar_dr(dataset: datasets.Dataset): function doc_to_text (line 20) | def doc_to_text(doc): function doc_to_target (line 25) | def doc_to_target(doc): function bert (line 29) | def bert(items): function Average (line 33) | def Average(lst): function arabizibert (line 37) | def arabizibert(items): function darijabert (line 50) | def darijabert(items): function mbert (line 63) | def mbert(items): FILE: lm_eval/tasks/darijahellaswag/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/darijammlu/_generate_configs.py function parse_args (line 73) | def parse_args(): FILE: lm_eval/tasks/darijammlu/utils.py function doc_to_text (line 7) | def doc_to_text(doc): function doc_to_choice (line 24) | def doc_to_choice(doc): FILE: lm_eval/tasks/discrim_eval/utils.py function _logit (line 8) | def _logit(p: float) -> float: function process_results (line 30) | def process_results( function agg_demographic_bias_regression (line 63) | def agg_demographic_bias_regression(items: List[BiasTuple]) -> float: FILE: lm_eval/tasks/drop/utils.py function process_docs (line 10) | def process_docs(dataset): function get_answers (line 22) | def get_answers(doc): function parse_answer (line 51) | def parse_answer(answer): function process_results (line 64) | def process_results(doc, results): function get_metrics (line 76) | def get_metrics(predicted, gold): function _answer_to_bags (line 100) | def _answer_to_bags(answer): function _align_bags (line 114) | def _align_bags(predicted, gold): function _compute_f1 (line 134) | def _compute_f1(predicted_bag, gold_bag): function _match_numbers_if_present (line 152) | def _match_numbers_if_present(gold_bag, predicted_bag): function _is_number (line 166) | def _is_number(text): function _remove_articles (line 174) | def _remove_articles(text): function _white_space_fix (line 178) | def _white_space_fix(text): function _remove_punc (line 182) | def _remove_punc(text): function _fix_number (line 190) | def _fix_number(text): function _tokenize (line 194) | def _tokenize(text): function _normalize (line 198) | def _normalize(answer): FILE: lm_eval/tasks/e2lmc/mmlu_early_training/custom_metrics.py function loglikelihood_diff (line 4) | def loglikelihood_diff(items): FILE: lm_eval/tasks/e2lmc/noor/_generate_configs.py function parse_args (line 78) | def parse_args(): FILE: lm_eval/tasks/egyhellaswag/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/egymmlu/_generate_configs.py function parse_args (line 74) | def parse_args(): FILE: lm_eval/tasks/egymmlu/utils.py function doc_to_text (line 7) | def doc_to_text(doc): function doc_to_choice (line 24) | def doc_to_choice(doc): FILE: lm_eval/tasks/eq_bench/multilingual/utils.py function calculate_score_fullscale (line 6) | def calculate_score_fullscale(docs, results): FILE: lm_eval/tasks/eq_bench/utils.py function calculate_score_fullscale (line 6) | def calculate_score_fullscale(docs, results): FILE: lm_eval/tasks/esbbq/utils.py function _model_answer (line 6) | def _model_answer(lls): function _model_answer_type (line 25) | def _model_answer_type(doc, model_answer): function process_results (line 75) | def process_results(doc, results): function acc_ambig_agg (line 137) | def acc_ambig_agg(results): function acc_disambig_agg (line 159) | def acc_disambig_agg(results): function bias_score_ambig_agg (line 181) | def bias_score_ambig_agg(results): function bias_score_disambig_agg (line 212) | def bias_score_disambig_agg(results): FILE: lm_eval/tasks/eus_exams/configs.py function gen_config_yamls (line 16) | def gen_config_yamls(output_dir: str, overwrite: bool) -> None: function main (line 49) | def main() -> None: FILE: lm_eval/tasks/eus_exams/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset): FILE: lm_eval/tasks/eus_reading/utils.py function doc_to_text_context (line 7) | def doc_to_text_context(doc) -> str: function doc_to_choice (line 28) | def doc_to_choice(doc) -> List[str]: FILE: lm_eval/tasks/eus_trivia/utils.py function doc_to_text (line 7) | def doc_to_text(doc) -> str: function doc_to_choice (line 28) | def doc_to_choice(doc) -> List[str]: FILE: lm_eval/tasks/evalita_llm/metrics.py function _aggreg_ls (line 10) | def _aggreg_ls(predictions): function _aggreg_sa_v2 (line 37) | def _aggreg_sa_v2(predictions): function _aggreg_sa (line 49) | def _aggreg_sa(predictions): function _aggreg_ner (line 124) | def _aggreg_ner(predictions): function _aggreg_rel (line 143) | def _aggreg_rel(predictions): function _aggreg_dd (line 160) | def _aggreg_dd(items): FILE: lm_eval/tasks/evalita_llm/sum_utils.py function rouge1_score (line 7) | def rouge1_score(references, predictions, **kwargs): function process_results_sum (line 16) | def process_results_sum(doc, results): FILE: lm_eval/tasks/evalita_llm/utils.py function sa_doc_to_target (line 11) | def sa_doc_to_target(x): function sa_doc_to_target_v2 (line 30) | def sa_doc_to_target_v2(x): function sa_doc_to_choice (line 49) | def sa_doc_to_choice(x): function _ls_gold_to_target (line 60) | def _ls_gold_to_target(x): function ls_doc_to_target (line 77) | def ls_doc_to_target(x): function _ls_split_gold (line 91) | def _ls_split_gold(x): function ls_process_results (line 112) | def ls_process_results(doc, results): function _ner_gold_to_target (line 163) | def _ner_gold_to_target(x: list) -> list: function _ner_gold_to_target_v2 (line 171) | def _ner_gold_to_target_v2(x: list) -> list: function ner_doc_to_target (line 179) | def ner_doc_to_target(doc): function ner_process_results (line 193) | def ner_process_results(doc, results): function ner_process_results_v2 (line 246) | def ner_process_results_v2(doc, results): function _ner_process_raw_output (line 313) | def _ner_process_raw_output(llm_result: str) -> list[tuple]: function _ner_process_raw_output_v2 (line 337) | def _ner_process_raw_output_v2(llm_result: str) -> list[tuple]: function _rel_process_raw_output (line 364) | def _rel_process_raw_output(llm_result: str) -> list[str]: function re_doc_to_target (line 391) | def re_doc_to_target(doc): function _rel_gold_to_target (line 403) | def _rel_gold_to_target(x: list) -> list: function rel_doc_to_target (line 410) | def rel_doc_to_target(doc): function _extract_relations (line 422) | def _extract_relations(results): function rel_process_results_v3 (line 439) | def rel_process_results_v3(doc, results): function split_text_with_regex (line 498) | def split_text_with_regex(text, pattern): function faq_doc_to_target (line 526) | def faq_doc_to_target(x): function ht_doc_to_target (line 541) | def ht_doc_to_target(x): FILE: lm_eval/tasks/fda/task.py class FDA (line 10) | class FDA(ConfigurableTask): method __init__ (line 15) | def __init__(self, **kwargs): method has_training_docs (line 18) | def has_training_docs(self): method has_validation_docs (line 21) | def has_validation_docs(self): method has_test_docs (line 24) | def has_test_docs(self): method validation_docs (line 27) | def validation_docs(self): method doc_to_text (line 30) | def doc_to_text(self, doc): method doc_to_target (line 33) | def doc_to_target(self, doc): method construct_requests (line 36) | def construct_requests( method process_results (line 60) | def process_results(self, doc, results): method aggregation (line 75) | def aggregation(self): method higher_is_better (line 85) | def higher_is_better(self): function contains_score (line 96) | def contains_score(prediction: str, labels: List[str]): FILE: lm_eval/tasks/french_bench/preprocess_wikitext.py function wikitext_detokenizer (line 4) | def wikitext_detokenizer(doc): function process_results (line 39) | def process_results(doc, results): FILE: lm_eval/tasks/french_bench/utils.py function normalize_answer (line 9) | def normalize_answer(s): function get_tokens (line 29) | def get_tokens(s): function exact (line 36) | def exact(predictions, references): function f1 (line 41) | def f1(predictions, references): function rouge1 (line 57) | def rouge1(items): function rouge1_agg (line 64) | def rouge1_agg(items): function is_included (line 74) | def is_included(items): function preprocess (line 83) | def preprocess(text): function process_docs (line 92) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/galician_bench/flores_gl/create_yamls_flores_gl.py function doc_to_text (line 257) | def doc_to_text(src: str, tgt: str) -> str: function doc_to_target (line 265) | def doc_to_target(tgt: str) -> str: function gen_lang_yamls (line 272) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: function main (line 316) | def main() -> None: FILE: lm_eval/tasks/galician_bench/utils.py function lowercase_first_letter (line 14) | def lowercase_first_letter(text): function process_summarization (line 18) | def process_summarization(dataset): function process_docs_paraphrases (line 28) | def process_docs_paraphrases(dataset): function process_docs_paws (line 56) | def process_docs_paws(dataset): function rouge1 (line 84) | def rouge1(items): function rouge1_agg (line 91) | def rouge1_agg(items): function process_results_mc2 (line 102) | def process_results_mc2(doc, results): function process_docs_gen (line 115) | def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset: function preprocess_function_gen (line 119) | def preprocess_function_gen(examples): function process_doc_nli (line 143) | def process_doc_nli(dataset): function process_results_gen (line 170) | def process_results_gen(doc, results): function bleu (line 241) | def bleu(refs, preds): function rouge (line 264) | def rouge(refs, preds): FILE: lm_eval/tasks/glianorex/preprocess_glianorex.py function doc_to_text (line 4) | def doc_to_text(doc) -> str: function doc_to_target (line 10) | def doc_to_target(doc) -> str: function filter_dataset (line 15) | def filter_dataset(dataset: datasets.Dataset, lang: str) -> datasets.Dat... function filter_french (line 19) | def filter_french(dataset: datasets.Dataset) -> datasets.Dataset: function filter_english (line 23) | def filter_english(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/global_mmlu/default/ar/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/bn/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/de/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/en/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/es/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/fr/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/hi/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/id/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/it/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/ja/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/ko/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/pt/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/sw/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/yo/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/default/zh/utils.py function process_docs (line 7) | def process_docs(dataset, category): FILE: lm_eval/tasks/global_mmlu/full/am/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ar/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/bn/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/cs/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/de/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/el/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/en/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/es/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/fa/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/fil/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/fr/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ha/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/he/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/hi/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/id/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ig/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/it/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ja/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ko/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ky/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/lt/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/mg/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ms/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ne/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/nl/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ny/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/pl/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/pt/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ro/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/ru/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/si/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/sn/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/so/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/sr/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/sv/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/sw/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/te/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/tr/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/uk/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/vi/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/yo/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_mmlu/full/zh/utils.py function process_docs (line 65) | def process_docs(dataset, subject): FILE: lm_eval/tasks/global_piqa/completions/_generate_config.py class IndentedDumper (line 7) | class IndentedDumper(yaml.Dumper): method increase_indent (line 8) | def increase_indent(self, flow=False, indentless=False): function format_subset (line 15) | def format_subset(subset: str, preface: str = PREFACE) -> str: FILE: lm_eval/tasks/global_piqa/prompted/_generate_config.py class IndentedDumper (line 7) | class IndentedDumper(yaml.Dumper): method increase_indent (line 8) | def increase_indent(self, flow=False, indentless=False): function format_subset (line 15) | def format_subset(subset: str, preface: str = PREFACE) -> str: FILE: lm_eval/tasks/glue/mnli/utils.py function doc_to_text (line 1) | def doc_to_text(doc) -> str: FILE: lm_eval/tasks/gpqa/cot_n_shot/_generate_configs.py function main (line 5) | def main() -> None: FILE: lm_eval/tasks/gpqa/cot_n_shot/utils.py function preprocess (line 7) | def preprocess(text): function process_docs (line 17) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/gpqa/cot_zeroshot/_generate_configs.py function main (line 5) | def main() -> None: FILE: lm_eval/tasks/gpqa/cot_zeroshot/utils.py function preprocess (line 7) | def preprocess(text): function process_docs (line 17) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/gpqa/generative/_generate_configs.py function main (line 5) | def main() -> None: FILE: lm_eval/tasks/gpqa/generative/utils.py function preprocess (line 7) | def preprocess(text): function process_docs (line 17) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/gpqa/n_shot/_generate_configs.py function main (line 5) | def main() -> None: FILE: lm_eval/tasks/gpqa/n_shot/utils.py function preprocess (line 7) | def preprocess(text): function process_docs (line 20) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/gpqa/zeroshot/_generate_configs.py function main (line 5) | def main() -> None: FILE: lm_eval/tasks/gpqa/zeroshot/utils.py function preprocess (line 7) | def preprocess(text): function process_docs (line 17) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/graphwalks/utils.py function load_dataset (line 7) | def load_dataset(**kwargs): function extract_answer_list (line 27) | def extract_answer_list(response: str) -> Tuple[List[str], bool]: function extract_answer_list_flexible (line 65) | def extract_answer_list_flexible(response: str) -> Tuple[List[str], bool]: function process_results (line 100) | def process_results(doc, results): FILE: lm_eval/tasks/groundcocoa/utils.py function process_docs (line 6) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/hellaswag/utils.py function preprocess (line 6) | def preprocess(text): function process_docs (line 15) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/hendrycks_ethics/utils.py function _preproc_doc (line 5) | def _preproc_doc(doc): function doc_to_text (line 18) | def doc_to_text(doc) -> str: function doc_to_target (line 23) | def doc_to_target(doc): FILE: lm_eval/tasks/hendrycks_math/utils.py function process_docs (line 6) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: function process_results (line 18) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]: function is_equiv (line 36) | def is_equiv(str1, str2, verbose=False): function remove_boxed (line 53) | def remove_boxed(s): function last_boxed_only_string (line 67) | def last_boxed_only_string(string): function fix_fracs (line 97) | def fix_fracs(string): function fix_a_slash_b (line 129) | def fix_a_slash_b(string): function remove_right_units (line 144) | def remove_right_units(string): function fix_sqrt (line 154) | def fix_sqrt(string): function strip_string (line 169) | def strip_string(string): FILE: lm_eval/tasks/histoires_morales/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/hrm8k/default/utils.py function doc_to_text (line 5) | def doc_to_text(doc): function doc_to_text_mmmlu (line 14) | def doc_to_text_mmmlu(doc): function doc_to_target (line 23) | def doc_to_target(doc): function postprocess (line 27) | def postprocess(s): function process_results (line 36) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]: function is_equiv (line 54) | def is_equiv(str1, str2, verbose=False): function parse_math_answer (line 74) | def parse_math_answer(raw_string): function _fix_fracs (line 146) | def _fix_fracs(string): function _fix_a_slash_b (line 178) | def _fix_a_slash_b(string): function _remove_right_units (line 193) | def _remove_right_units(string): function _fix_sqrt (line 203) | def _fix_sqrt(string): function _strip_string (line 218) | def _strip_string(string): FILE: lm_eval/tasks/hrm8k/en/utils.py function doc_to_text (line 5) | def doc_to_text(doc): function doc_to_text_mmmlu (line 14) | def doc_to_text_mmmlu(doc): function doc_to_target (line 23) | def doc_to_target(doc): function postprocess (line 27) | def postprocess(s): function process_results (line 36) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]: function is_equiv (line 54) | def is_equiv(str1, str2, verbose=False): function parse_math_answer (line 74) | def parse_math_answer(raw_string): function _fix_fracs (line 146) | def _fix_fracs(string): function _fix_a_slash_b (line 178) | def _fix_a_slash_b(string): function _remove_right_units (line 193) | def _remove_right_units(string): function _fix_sqrt (line 203) | def _fix_sqrt(string): function _strip_string (line 218) | def _strip_string(string): FILE: lm_eval/tasks/humaneval/utils.py function pass_at_k (line 13) | def pass_at_k(references: list[str], predictions: list[list[str]], k: li... function build_predictions (line 26) | def build_predictions(resps: list[list[str]], docs: list[dict]) -> list[... function build_predictions_instruct (line 30) | def build_predictions_instruct( FILE: lm_eval/tasks/humaneval_infilling/utils.py function pass_at_k (line 13) | def pass_at_k(references: list[str], predictions: list[list[str]], k: li... function build_predictions (line 26) | def build_predictions(resps: list[list[str]], docs: list[dict]) -> list[... FILE: lm_eval/tasks/icelandic_winogrande/preprocess_winogrande.py function doc_to_text (line 1) | def doc_to_text(doc): function doc_to_target (line 6) | def doc_to_target(doc): function doc_to_choice (line 14) | def doc_to_choice(doc): FILE: lm_eval/tasks/ifeval/instructions.py class Instruction (line 110) | class Instruction: method __init__ (line 113) | def __init__(self, instruction_id): method build_description (line 116) | def build_description(self, **kwargs): method get_instruction_args (line 119) | def get_instruction_args(self): method get_instruction_args_keys (line 122) | def get_instruction_args_keys(self): method check_following (line 125) | def check_following(self, value): class ResponseLanguageChecker (line 129) | class ResponseLanguageChecker(Instruction): method build_description (line 132) | def build_description(self, *, language=None): method get_instruction_args (line 155) | def get_instruction_args(self): method get_instruction_args_keys (line 159) | def get_instruction_args_keys(self): method check_following (line 163) | def check_following(self, value): class NumberOfSentences (line 184) | class NumberOfSentences(Instruction): method build_description (line 187) | def build_description(self, *, num_sentences=None, relation=None): method get_instruction_args (line 225) | def get_instruction_args(self): method get_instruction_args_keys (line 232) | def get_instruction_args_keys(self): method check_following (line 236) | def check_following(self, value): class PlaceholderChecker (line 256) | class PlaceholderChecker(Instruction): method build_description (line 259) | def build_description(self, *, num_placeholders=None): method get_instruction_args (line 278) | def get_instruction_args(self): method get_instruction_args_keys (line 282) | def get_instruction_args_keys(self): method check_following (line 286) | def check_following(self, value): class BulletListChecker (line 301) | class BulletListChecker(Instruction): method build_description (line 304) | def build_description(self, *, num_bullets=None): method get_instruction_args (line 325) | def get_instruction_args(self): method get_instruction_args_keys (line 329) | def get_instruction_args_keys(self): method check_following (line 333) | def check_following(self, value): class ConstrainedResponseChecker (line 350) | class ConstrainedResponseChecker(Instruction): method build_description (line 353) | def build_description(self): method get_instruction_args (line 364) | def get_instruction_args(self): method get_instruction_args_keys (line 368) | def get_instruction_args_keys(self): method check_following (line 372) | def check_following(self, value): class ConstrainedStartChecker (line 389) | class ConstrainedStartChecker(Instruction): method build_description (line 392) | def build_description(self, *, starter=None): method get_instruction_args (line 411) | def get_instruction_args(self): method get_instruction_args_keys (line 415) | def get_instruction_args_keys(self): method check_following (line 419) | def check_following(self, value): class HighlightSectionChecker (line 436) | class HighlightSectionChecker(Instruction): method build_description (line 439) | def build_description(self, *, num_highlights=None): method get_instruction_args (line 460) | def get_instruction_args(self): method get_instruction_args_keys (line 464) | def get_instruction_args_keys(self): method check_following (line 468) | def check_following(self, value): class SectionChecker (line 492) | class SectionChecker(Instruction): method build_description (line 495) | def build_description(self, *, section_spliter=None, num_sections=None): method get_instruction_args (line 531) | def get_instruction_args(self): method get_instruction_args_keys (line 538) | def get_instruction_args_keys(self): method check_following (line 542) | def check_following(self, value): class ParagraphChecker (line 561) | class ParagraphChecker(Instruction): method build_description (line 564) | def build_description(self, *, num_paragraphs=None): method get_instruction_args (line 584) | def get_instruction_args(self): method get_instruction_args_keys (line 588) | def get_instruction_args_keys(self): method check_following (line 592) | def check_following(self, value): class PostscriptChecker (line 616) | class PostscriptChecker(Instruction): method build_description (line 619) | def build_description(self, *, postscript_marker=None): method get_instruction_args (line 644) | def get_instruction_args(self): method get_instruction_args_keys (line 648) | def get_instruction_args_keys(self): method check_following (line 652) | def check_following(self, value): class RephraseChecker (line 674) | class RephraseChecker(Instruction): method build_description (line 677) | def build_description(self, *, original_message): method get_instruction_args (line 703) | def get_instruction_args(self): method get_instruction_args_keys (line 707) | def get_instruction_args_keys(self): method check_following (line 711) | def check_following(self, value): method is_change (line 733) | def is_change(self, response): method strip_changes (line 737) | def strip_changes(self, response): class KeywordChecker (line 742) | class KeywordChecker(Instruction): method build_description (line 745) | def build_description(self, *, keywords=None): method get_instruction_args (line 768) | def get_instruction_args(self): method get_instruction_args_keys (line 772) | def get_instruction_args_keys(self): method check_following (line 776) | def check_following(self, value): class KeywordFrequencyChecker (line 784) | class KeywordFrequencyChecker(Instruction): method build_description (line 787) | def build_description(self, *, keyword=None, frequency=None, relation=... method get_instruction_args (line 833) | def get_instruction_args(self): method get_instruction_args_keys (line 841) | def get_instruction_args_keys(self): method check_following (line 845) | def check_following(self, value): class NumberOfWords (line 855) | class NumberOfWords(Instruction): method build_description (line 858) | def build_description(self, *, num_words=None, relation=None): method get_instruction_args (line 896) | def get_instruction_args(self): method get_instruction_args_keys (line 900) | def get_instruction_args_keys(self): method check_following (line 904) | def check_following(self, value): class JsonFormat (line 914) | class JsonFormat(Instruction): method build_description (line 917) | def build_description(self): method get_instruction_args (line 924) | def get_instruction_args(self): method get_instruction_args_keys (line 928) | def get_instruction_args_keys(self): method check_following (line 932) | def check_following(self, value): class ParagraphFirstWordCheck (line 949) | class ParagraphFirstWordCheck(Instruction): method build_description (line 952) | def build_description( method get_instruction_args (line 998) | def get_instruction_args(self): method get_instruction_args_keys (line 1006) | def get_instruction_args_keys(self): method check_following (line 1010) | def check_following(self, value): class KeySentenceChecker (line 1056) | class KeySentenceChecker(Instruction): method build_description (line 1059) | def build_description(self, key_sentences=None, num_sentences=None): method get_instruction_args (line 1091) | def get_instruction_args(self): method get_instruction_args_keys (line 1098) | def get_instruction_args_keys(self): method check_following (line 1102) | def check_following(self, value): class ForbiddenWords (line 1113) | class ForbiddenWords(Instruction): method build_description (line 1116) | def build_description(self, forbidden_words=None): method get_instruction_args (line 1140) | def get_instruction_args(self): method get_instruction_args_keys (line 1144) | def get_instruction_args_keys(self): method check_following (line 1148) | def check_following(self, value): class RephraseParagraph (line 1156) | class RephraseParagraph(Instruction): method build_description (line 1159) | def build_description(self, *, original_paragraph, low, high): method get_instruction_args (line 1190) | def get_instruction_args(self): method get_instruction_args_keys (line 1198) | def get_instruction_args_keys(self): method check_following (line 1202) | def check_following(self, value): class TwoResponsesChecker (line 1216) | class TwoResponsesChecker(Instruction): method build_description (line 1219) | def build_description(self): method get_instruction_args (line 1227) | def get_instruction_args(self): method get_instruction_args_keys (line 1231) | def get_instruction_args_keys(self): method check_following (line 1235) | def check_following(self, value): class RepeatPromptThenAnswer (line 1258) | class RepeatPromptThenAnswer(Instruction): method build_description (line 1261) | def build_description(self, *, prompt_to_repeat=None): method get_instruction_args (line 1282) | def get_instruction_args(self): method get_instruction_args_keys (line 1285) | def get_instruction_args_keys(self): method check_following (line 1289) | def check_following(self, value): class EndChecker (line 1295) | class EndChecker(Instruction): method build_description (line 1298) | def build_description(self, *, end_phrase=None): method get_instruction_args (line 1318) | def get_instruction_args(self): method get_instruction_args_keys (line 1321) | def get_instruction_args_keys(self): method check_following (line 1325) | def check_following(self, value): class TitleChecker (line 1332) | class TitleChecker(Instruction): method build_description (line 1335) | def build_description(self): method get_instruction_args (line 1343) | def get_instruction_args(self): method get_instruction_args_keys (line 1346) | def get_instruction_args_keys(self): method check_following (line 1350) | def check_following(self, value): class LetterFrequencyChecker (line 1362) | class LetterFrequencyChecker(Instruction): method build_description (line 1365) | def build_description(self, *, letter=None, let_frequency=None, let_re... method get_instruction_args (line 1417) | def get_instruction_args(self): method get_instruction_args_keys (line 1425) | def get_instruction_args_keys(self): method check_following (line 1429) | def check_following(self, value): class CapitalLettersEnglishChecker (line 1440) | class CapitalLettersEnglishChecker(Instruction): method build_description (line 1443) | def build_description(self): method get_instruction_args (line 1450) | def get_instruction_args(self): method get_instruction_args_keys (line 1453) | def get_instruction_args_keys(self): method check_following (line 1457) | def check_following(self, value): class LowercaseLettersEnglishChecker (line 1471) | class LowercaseLettersEnglishChecker(Instruction): method build_description (line 1474) | def build_description(self): method get_instruction_args (line 1482) | def get_instruction_args(self): method get_instruction_args_keys (line 1485) | def get_instruction_args_keys(self): method check_following (line 1489) | def check_following(self, value): class CommaChecker (line 1503) | class CommaChecker(Instruction): method build_description (line 1506) | def build_description(self): method get_instruction_args (line 1513) | def get_instruction_args(self): method get_instruction_args_keys (line 1516) | def get_instruction_args_keys(self): method check_following (line 1520) | def check_following(self, value): class CapitalWordFrequencyChecker (line 1525) | class CapitalWordFrequencyChecker(Instruction): method build_description (line 1528) | def build_description( method get_instruction_args (line 1566) | def get_instruction_args(self): method get_instruction_args_keys (line 1573) | def get_instruction_args_keys(self): method check_following (line 1577) | def check_following(self, value): class QuotationChecker (line 1591) | class QuotationChecker(Instruction): method build_description (line 1594) | def build_description(self): method get_instruction_args (line 1601) | def get_instruction_args(self): method get_instruction_args_keys (line 1605) | def get_instruction_args_keys(self): method check_following (line 1609) | def check_following(self, value): FILE: lm_eval/tasks/ifeval/instructions_registry.py function conflict_make (line 153) | def conflict_make(conflicts): FILE: lm_eval/tasks/ifeval/instructions_util.py function download_nltk_resources (line 36) | def download_nltk_resources(): function split_into_sentences (line 1628) | def split_into_sentences(text): function count_words (line 1679) | def count_words(text): function _get_sentence_tokenizer (line 1688) | def _get_sentence_tokenizer(): function count_sentences (line 1692) | def count_sentences(text): function generate_keywords (line 1699) | def generate_keywords(num_keywords): FILE: lm_eval/tasks/ifeval/multilingual/instruction_utils/ca_instructions_util.py function lang_code_to_name (line 32) | def lang_code_to_name(lang_code: str): function split_into_sentences (line 46) | def split_into_sentences(text): function count_words (line 98) | def count_words(text): function tokenize_words (line 105) | def tokenize_words(text): function count_sentences (line 113) | def count_sentences(text): function generate_keywords (line 120) | def generate_keywords(num_keywords): FILE: lm_eval/tasks/ifeval/multilingual/instruction_utils/es_instructions_util.py function lang_code_to_name (line 32) | def lang_code_to_name(lang_code: str): function split_into_sentences (line 46) | def split_into_sentences(text): function count_words (line 98) | def count_words(text): function tokenize_words (line 105) | def tokenize_words(text): function count_sentences (line 113) | def count_sentences(text): function generate_keywords (line 120) | def generate_keywords(num_keywords): FILE: lm_eval/tasks/ifeval/multilingual/instructions/ca_instructions.py class Instruction (line 90) | class Instruction: method __init__ (line 93) | def __init__(self, instruction_id): method build_description (line 96) | def build_description(self, **kwargs): method get_instruction_args (line 99) | def get_instruction_args(self): method get_instruction_args_keys (line 102) | def get_instruction_args_keys(self): method check_following (line 105) | def check_following(self, value): class ResponseLanguageChecker (line 109) | class ResponseLanguageChecker(Instruction): method build_description (line 112) | def build_description(self, *, language = None): method get_instruction_args (line 133) | def get_instruction_args(self): method get_instruction_args_keys (line 137) | def get_instruction_args_keys(self): method check_following (line 141) | def check_following(self, value): class NumberOfSentences (line 162) | class NumberOfSentences(Instruction): method build_description (line 165) | def build_description(self, *, num_sentences = None, method get_instruction_args (line 201) | def get_instruction_args(self): method get_instruction_args_keys (line 206) | def get_instruction_args_keys(self): method check_following (line 210) | def check_following(self, value): class PlaceholderChecker (line 248) | class PlaceholderChecker(Instruction): method build_description (line 251) | def build_description(self, *, num_placeholders = None, method get_instruction_args (line 284) | def get_instruction_args(self): method get_instruction_args_keys (line 289) | def get_instruction_args_keys(self): method check_following (line 293) | def check_following(self, value): class BulletListChecker (line 312) | class BulletListChecker(Instruction): method build_description (line 315) | def build_description(self, *, num_bullets = None): method get_instruction_args (line 336) | def get_instruction_args(self): method get_instruction_args_keys (line 340) | def get_instruction_args_keys(self): method check_following (line 344) | def check_following(self, value): class ConstrainedResponseChecker (line 360) | class ConstrainedResponseChecker(Instruction): method build_description (line 363) | def build_description(self): method get_instruction_args (line 372) | def get_instruction_args(self): method get_instruction_args_keys (line 376) | def get_instruction_args_keys(self): method check_following (line 380) | def check_following(self, value): class ConstrainedStartChecker (line 398) | class ConstrainedStartChecker(Instruction): method build_description (line 401) | def build_description(self, *, starter = None): method get_instruction_args (line 419) | def get_instruction_args(self): method get_instruction_args_keys (line 423) | def get_instruction_args_keys(self): method check_following (line 427) | def check_following(self, value): class HighlightSectionChecker (line 443) | class HighlightSectionChecker(Instruction): method build_description (line 446) | def build_description(self, *, num_highlights = None, method get_instruction_args (line 479) | def get_instruction_args(self): method get_instruction_args_keys (line 484) | def get_instruction_args_keys(self): method check_following (line 488) | def check_following(self, value): class SectionChecker (line 516) | class SectionChecker(Instruction): method build_description (line 519) | def build_description(self, *, section_spliter = None, method get_instruction_args (line 563) | def get_instruction_args(self): method get_instruction_args_keys (line 569) | def get_instruction_args_keys(self): method check_following (line 573) | def check_following(self, value): class ParagraphChecker (line 596) | class ParagraphChecker(Instruction): method build_description (line 599) | def build_description(self, *, num_paragraphs = None): method get_instruction_args (line 618) | def get_instruction_args(self): method get_instruction_args_keys (line 622) | def get_instruction_args_keys(self): method check_following (line 626) | def check_following(self, value): class PostscriptChecker (line 650) | class PostscriptChecker(Instruction): method build_description (line 653) | def build_description(self, *, postscript_marker = None method get_instruction_args (line 675) | def get_instruction_args(self): method get_instruction_args_keys (line 679) | def get_instruction_args_keys(self): method check_following (line 683) | def check_following(self, value): class RephraseChecker (line 706) | class RephraseChecker(Instruction): method build_description (line 709) | def build_description(self, *, original_message): method get_instruction_args (line 731) | def get_instruction_args(self): method get_instruction_args_keys (line 735) | def get_instruction_args_keys(self): method check_following (line 739) | def check_following(self, value): method is_change (line 761) | def is_change(self, response): method strip_changes (line 765) | def strip_changes(self, response): class KeywordChecker (line 770) | class KeywordChecker(Instruction): method build_description (line 773) | def build_description(self, *, keywords = None method get_instruction_args (line 796) | def get_instruction_args(self): method get_instruction_args_keys (line 800) | def get_instruction_args_keys(self): method check_following (line 804) | def check_following(self, value): class KeywordFrequencyChecker (line 812) | class KeywordFrequencyChecker(Instruction): method build_description (line 815) | def build_description(self, *, keyword = None, method get_instruction_args (line 859) | def get_instruction_args(self): method get_instruction_args_keys (line 865) | def get_instruction_args_keys(self): method check_following (line 869) | def check_following(self, value): class NumberOfWords (line 880) | class NumberOfWords(Instruction): method build_description (line 883) | def build_description(self, *, num_words = None, method get_instruction_args (line 921) | def get_instruction_args(self): method get_instruction_args_keys (line 926) | def get_instruction_args_keys(self): method check_following (line 930) | def check_following(self, value): class JsonFormat (line 946) | class JsonFormat(Instruction): method build_description (line 949) | def build_description(self): method get_instruction_args (line 955) | def get_instruction_args(self): method get_instruction_args_keys (line 959) | def get_instruction_args_keys(self): method check_following (line 966) | def check_following(self, value): class ParagraphFirstWordCheck (line 983) | class ParagraphFirstWordCheck(Instruction): method build_description (line 986) | def build_description(self, num_paragraphs = None, method get_instruction_args (line 1030) | def get_instruction_args(self): method get_instruction_args_keys (line 1036) | def get_instruction_args_keys(self): method check_following (line 1040) | def check_following(self, value): class KeySentenceChecker (line 1095) | class KeySentenceChecker(Instruction): method build_description (line 1098) | def build_description(self, key_sentences = None, method get_instruction_args (line 1131) | def get_instruction_args(self): method get_instruction_args_keys (line 1136) | def get_instruction_args_keys(self): method check_following (line 1140) | def check_following(self, value): class ForbiddenWords (line 1151) | class ForbiddenWords(Instruction): method build_description (line 1154) | def build_description(self, forbidden_words = None method get_instruction_args (line 1180) | def get_instruction_args(self): method get_instruction_args_keys (line 1184) | def get_instruction_args_keys(self): method check_following (line 1188) | def check_following(self, value): class RephraseParagraph (line 1197) | class RephraseParagraph(Instruction): method build_description (line 1200) | def build_description(self, *, original_paragraph, low, high method get_instruction_args (line 1229) | def get_instruction_args(self): method get_instruction_args_keys (line 1235) | def get_instruction_args_keys(self): method check_following (line 1239) | def check_following(self, value): class TwoResponsesChecker (line 1253) | class TwoResponsesChecker(Instruction): method build_description (line 1256) | def build_description(self): method get_instruction_args (line 1264) | def get_instruction_args(self): method get_instruction_args_keys (line 1268) | def get_instruction_args_keys(self): method check_following (line 1272) | def check_following(self, value): class RepeatPromptThenAnswer (line 1295) | class RepeatPromptThenAnswer(Instruction): method build_description (line 1298) | def build_description(self, *, prompt_to_repeat = None): method get_instruction_args (line 1318) | def get_instruction_args(self): method get_instruction_args_keys (line 1321) | def get_instruction_args_keys(self): method check_following (line 1325) | def check_following(self, value): class EndChecker (line 1331) | class EndChecker(Instruction): method build_description (line 1334) | def build_description(self, *, end_phrase = None): method get_instruction_args (line 1353) | def get_instruction_args(self): method get_instruction_args_keys (line 1356) | def get_instruction_args_keys(self): method check_following (line 1360) | def check_following(self, value): class TitleChecker (line 1371) | class TitleChecker(Instruction): method build_description (line 1374) | def build_description(self): method get_instruction_args (line 1382) | def get_instruction_args(self): method get_instruction_args_keys (line 1385) | def get_instruction_args_keys(self): method check_following (line 1389) | def check_following(self, value): class LetterFrequencyChecker (line 1401) | class LetterFrequencyChecker(Instruction): method build_description (line 1404) | def build_description(self, *, letter = None, method get_instruction_args (line 1458) | def get_instruction_args(self): method get_instruction_args_keys (line 1464) | def get_instruction_args_keys(self): method check_following (line 1468) | def check_following(self, value): class CapitalLettersCatalanChecker (line 1479) | class CapitalLettersCatalanChecker(Instruction): method build_description (line 1482) | def build_description(self): method get_instruction_args (line 1489) | def get_instruction_args(self): method get_instruction_args_keys (line 1492) | def get_instruction_args_keys(self): method check_following (line 1496) | def check_following(self, value): class LowercaseLettersCatalanChecker (line 1521) | class LowercaseLettersCatalanChecker(Instruction): method build_description (line 1524) | def build_description(self): method get_instruction_args (line 1532) | def get_instruction_args(self): method get_instruction_args_keys (line 1535) | def get_instruction_args_keys(self): method check_following (line 1539) | def check_following(self, value): class CommaChecker (line 1553) | class CommaChecker(Instruction): method build_description (line 1556) | def build_description(self): method get_instruction_args (line 1563) | def get_instruction_args(self): method get_instruction_args_keys (line 1566) | def get_instruction_args_keys(self): method check_following (line 1570) | def check_following(self, value): class CapitalWordFrequencyChecker (line 1575) | class CapitalWordFrequencyChecker(Instruction): method build_description (line 1578) | def build_description( method get_instruction_args (line 1616) | def get_instruction_args(self): method get_instruction_args_keys (line 1623) | def get_instruction_args_keys(self): method check_following (line 1627) | def check_following(self, value): class QuotationChecker (line 1641) | class QuotationChecker(Instruction): method build_description (line 1644) | def build_description(self): method get_instruction_args (line 1651) | def get_instruction_args(self): method get_instruction_args_keys (line 1655) | def get_instruction_args_keys(self): method check_following (line 1659) | def check_following(self, value): class QuestionMarkChecker (line 1665) | class QuestionMarkChecker(Instruction): method build_description (line 1668) | def build_description(self): method get_instruction_args (line 1675) | def get_instruction_args(self): method get_instruction_args_keys (line 1679) | def get_instruction_args_keys(self): method check_following (line 1683) | def check_following(self, value): class ExclamationMarkChecker (line 1695) | class ExclamationMarkChecker(Instruction): method build_description (line 1698) | def build_description(self): method get_instruction_args (line 1705) | def get_instruction_args(self): method get_instruction_args_keys (line 1709) | def get_instruction_args_keys(self): method check_following (line 1713) | def check_following(self, value): class EnieChecker (line 1725) | class EnieChecker(Instruction): method build_description (line 1728) | def build_description( method get_instruction_args (line 1754) | def get_instruction_args(self): method get_instruction_args_keys (line 1760) | def get_instruction_args_keys(self): method check_following (line 1764) | def check_following(self, value): class DieresisChecker (line 1778) | class DieresisChecker(Instruction): method build_description (line 1781) | def build_description( method get_instruction_args (line 1807) | def get_instruction_args(self): method get_instruction_args_keys (line 1813) | def get_instruction_args_keys(self): method check_following (line 1817) | def check_following(self, value): class TildesChecker (line 1831) | class TildesChecker(Instruction): method build_description (line 1834) | def build_description(self, *, num_words = None, method get_instruction_args (line 1872) | def get_instruction_args(self): method get_instruction_args_keys (line 1877) | def get_instruction_args_keys(self): method check_following (line 1881) | def check_following(self, value): FILE: lm_eval/tasks/ifeval/multilingual/instructions/es_instructions.py class Instruction (line 92) | class Instruction: method __init__ (line 95) | def __init__(self, instruction_id): method build_description (line 98) | def build_description(self, **kwargs): method get_instruction_args (line 101) | def get_instruction_args(self): method get_instruction_args_keys (line 104) | def get_instruction_args_keys(self): method check_following (line 107) | def check_following(self, value): class ResponseLanguageChecker (line 111) | class ResponseLanguageChecker(Instruction): method build_description (line 114) | def build_description(self, *, language = None): method get_instruction_args (line 135) | def get_instruction_args(self): method get_instruction_args_keys (line 139) | def get_instruction_args_keys(self): method check_following (line 143) | def check_following(self, value): class NumberOfSentences (line 164) | class NumberOfSentences(Instruction): method build_description (line 167) | def build_description(self, *, num_sentences = None, method get_instruction_args (line 203) | def get_instruction_args(self): method get_instruction_args_keys (line 208) | def get_instruction_args_keys(self): method check_following (line 212) | def check_following(self, value): class PlaceholderChecker (line 250) | class PlaceholderChecker(Instruction): method build_description (line 253) | def build_description(self, *, num_placeholders = None, method get_instruction_args (line 286) | def get_instruction_args(self): method get_instruction_args_keys (line 291) | def get_instruction_args_keys(self): method check_following (line 295) | def check_following(self, value): class BulletListChecker (line 314) | class BulletListChecker(Instruction): method build_description (line 317) | def build_description(self, *, num_bullets = None): method get_instruction_args (line 338) | def get_instruction_args(self): method get_instruction_args_keys (line 342) | def get_instruction_args_keys(self): method check_following (line 346) | def check_following(self, value): class ConstrainedResponseChecker (line 362) | class ConstrainedResponseChecker(Instruction): method build_description (line 365) | def build_description(self): method get_instruction_args (line 374) | def get_instruction_args(self): method get_instruction_args_keys (line 378) | def get_instruction_args_keys(self): method check_following (line 382) | def check_following(self, value): class ConstrainedStartChecker (line 400) | class ConstrainedStartChecker(Instruction): method build_description (line 403) | def build_description(self, *, starter = None): method get_instruction_args (line 421) | def get_instruction_args(self): method get_instruction_args_keys (line 425) | def get_instruction_args_keys(self): method check_following (line 429) | def check_following(self, value): class HighlightSectionChecker (line 445) | class HighlightSectionChecker(Instruction): method build_description (line 448) | def build_description(self, *, num_highlights = None, method get_instruction_args (line 481) | def get_instruction_args(self): method get_instruction_args_keys (line 486) | def get_instruction_args_keys(self): method check_following (line 490) | def check_following(self, value): class SectionChecker (line 518) | class SectionChecker(Instruction): method build_description (line 521) | def build_description(self, *, section_spliter = None, method get_instruction_args (line 565) | def get_instruction_args(self): method get_instruction_args_keys (line 571) | def get_instruction_args_keys(self): method check_following (line 575) | def check_following(self, value): class ParagraphChecker (line 598) | class ParagraphChecker(Instruction): method build_description (line 601) | def build_description(self, *, num_paragraphs = None): method get_instruction_args (line 620) | def get_instruction_args(self): method get_instruction_args_keys (line 624) | def get_instruction_args_keys(self): method check_following (line 628) | def check_following(self, value): class PostscriptChecker (line 652) | class PostscriptChecker(Instruction): method build_description (line 655) | def build_description(self, *, postscript_marker = None method get_instruction_args (line 677) | def get_instruction_args(self): method get_instruction_args_keys (line 681) | def get_instruction_args_keys(self): method check_following (line 685) | def check_following(self, value): class RephraseChecker (line 708) | class RephraseChecker(Instruction): method build_description (line 711) | def build_description(self, *, original_message): method get_instruction_args (line 733) | def get_instruction_args(self): method get_instruction_args_keys (line 737) | def get_instruction_args_keys(self): method check_following (line 741) | def check_following(self, value): method is_change (line 763) | def is_change(self, response): method strip_changes (line 767) | def strip_changes(self, response): class KeywordChecker (line 772) | class KeywordChecker(Instruction): method build_description (line 775) | def build_description(self, *, keywords = None method get_instruction_args (line 798) | def get_instruction_args(self): method get_instruction_args_keys (line 802) | def get_instruction_args_keys(self): method check_following (line 806) | def check_following(self, value): class KeywordFrequencyChecker (line 814) | class KeywordFrequencyChecker(Instruction): method build_description (line 817) | def build_description(self, *, keyword = None, method get_instruction_args (line 861) | def get_instruction_args(self): method get_instruction_args_keys (line 867) | def get_instruction_args_keys(self): method check_following (line 871) | def check_following(self, value): class NumberOfWords (line 882) | class NumberOfWords(Instruction): method build_description (line 885) | def build_description(self, *, num_words = None, method get_instruction_args (line 923) | def get_instruction_args(self): method get_instruction_args_keys (line 928) | def get_instruction_args_keys(self): method check_following (line 932) | def check_following(self, value): class JsonFormat (line 948) | class JsonFormat(Instruction): method build_description (line 951) | def build_description(self): method get_instruction_args (line 957) | def get_instruction_args(self): method get_instruction_args_keys (line 961) | def get_instruction_args_keys(self): method check_following (line 968) | def check_following(self, value): class ParagraphFirstWordCheck (line 985) | class ParagraphFirstWordCheck(Instruction): method build_description (line 988) | def build_description(self, num_paragraphs = None, method get_instruction_args (line 1032) | def get_instruction_args(self): method get_instruction_args_keys (line 1038) | def get_instruction_args_keys(self): method check_following (line 1042) | def check_following(self, value): class KeySentenceChecker (line 1097) | class KeySentenceChecker(Instruction): method build_description (line 1100) | def build_description(self, key_sentences = None, method get_instruction_args (line 1133) | def get_instruction_args(self): method get_instruction_args_keys (line 1138) | def get_instruction_args_keys(self): method check_following (line 1142) | def check_following(self, value): class ForbiddenWords (line 1153) | class ForbiddenWords(Instruction): method build_description (line 1156) | def build_description(self, forbidden_words = None method get_instruction_args (line 1182) | def get_instruction_args(self): method get_instruction_args_keys (line 1186) | def get_instruction_args_keys(self): method check_following (line 1190) | def check_following(self, value): class RephraseParagraph (line 1199) | class RephraseParagraph(Instruction): method build_description (line 1202) | def build_description(self, *, original_paragraph, low, high method get_instruction_args (line 1231) | def get_instruction_args(self): method get_instruction_args_keys (line 1237) | def get_instruction_args_keys(self): method check_following (line 1241) | def check_following(self, value): class TwoResponsesChecker (line 1255) | class TwoResponsesChecker(Instruction): method build_description (line 1258) | def build_description(self): method get_instruction_args (line 1266) | def get_instruction_args(self): method get_instruction_args_keys (line 1270) | def get_instruction_args_keys(self): method check_following (line 1274) | def check_following(self, value): class RepeatPromptThenAnswer (line 1297) | class RepeatPromptThenAnswer(Instruction): method build_description (line 1300) | def build_description(self, *, prompt_to_repeat = None): method get_instruction_args (line 1320) | def get_instruction_args(self): method get_instruction_args_keys (line 1323) | def get_instruction_args_keys(self): method check_following (line 1327) | def check_following(self, value): class EndChecker (line 1333) | class EndChecker(Instruction): method build_description (line 1336) | def build_description(self, *, end_phrase = None): method get_instruction_args (line 1355) | def get_instruction_args(self): method get_instruction_args_keys (line 1358) | def get_instruction_args_keys(self): method check_following (line 1362) | def check_following(self, value): class TitleChecker (line 1373) | class TitleChecker(Instruction): method build_description (line 1376) | def build_description(self): method get_instruction_args (line 1384) | def get_instruction_args(self): method get_instruction_args_keys (line 1387) | def get_instruction_args_keys(self): method check_following (line 1391) | def check_following(self, value): class LetterFrequencyChecker (line 1403) | class LetterFrequencyChecker(Instruction): method build_description (line 1406) | def build_description(self, *, letter = None, method get_instruction_args (line 1460) | def get_instruction_args(self): method get_instruction_args_keys (line 1466) | def get_instruction_args_keys(self): method check_following (line 1470) | def check_following(self, value): class CapitalLettersSpanishChecker (line 1481) | class CapitalLettersSpanishChecker(Instruction): method build_description (line 1484) | def build_description(self): method get_instruction_args (line 1491) | def get_instruction_args(self): method get_instruction_args_keys (line 1494) | def get_instruction_args_keys(self): method check_following (line 1498) | def check_following(self, value): class LowercaseLettersSpanishChecker (line 1523) | class LowercaseLettersSpanishChecker(Instruction): method build_description (line 1526) | def build_description(self): method get_instruction_args (line 1534) | def get_instruction_args(self): method get_instruction_args_keys (line 1537) | def get_instruction_args_keys(self): method check_following (line 1541) | def check_following(self, value): class CommaChecker (line 1555) | class CommaChecker(Instruction): method build_description (line 1558) | def build_description(self): method get_instruction_args (line 1565) | def get_instruction_args(self): method get_instruction_args_keys (line 1568) | def get_instruction_args_keys(self): method check_following (line 1572) | def check_following(self, value): class CapitalWordFrequencyChecker (line 1577) | class CapitalWordFrequencyChecker(Instruction): method build_description (line 1580) | def build_description( method get_instruction_args (line 1618) | def get_instruction_args(self): method get_instruction_args_keys (line 1625) | def get_instruction_args_keys(self): method check_following (line 1629) | def check_following(self, value): class QuotationChecker (line 1643) | class QuotationChecker(Instruction): method build_description (line 1646) | def build_description(self): method get_instruction_args (line 1653) | def get_instruction_args(self): method get_instruction_args_keys (line 1657) | def get_instruction_args_keys(self): method check_following (line 1661) | def check_following(self, value): class QuestionMarkChecker (line 1667) | class QuestionMarkChecker(Instruction): method build_description (line 1670) | def build_description(self): method get_instruction_args (line 1677) | def get_instruction_args(self): method get_instruction_args_keys (line 1681) | def get_instruction_args_keys(self): method check_following (line 1685) | def check_following(self, value): class ExclamationMarkChecker (line 1697) | class ExclamationMarkChecker(Instruction): method build_description (line 1700) | def build_description(self): method get_instruction_args (line 1707) | def get_instruction_args(self): method get_instruction_args_keys (line 1711) | def get_instruction_args_keys(self): method check_following (line 1715) | def check_following(self, value): class EnieChecker (line 1727) | class EnieChecker(Instruction): method build_description (line 1730) | def build_description( method get_instruction_args (line 1756) | def get_instruction_args(self): method get_instruction_args_keys (line 1762) | def get_instruction_args_keys(self): method check_following (line 1766) | def check_following(self, value): class DieresisChecker (line 1780) | class DieresisChecker(Instruction): method build_description (line 1783) | def build_description( method get_instruction_args (line 1809) | def get_instruction_args(self): method get_instruction_args_keys (line 1815) | def get_instruction_args_keys(self): method check_following (line 1819) | def check_following(self, value): class TildesChecker (line 1833) | class TildesChecker(Instruction): method build_description (line 1836) | def build_description(self, *, num_words = None, method get_instruction_args (line 1874) | def get_instruction_args(self): method get_instruction_args_keys (line 1879) | def get_instruction_args_keys(self): method check_following (line 1883) | def check_following(self, value): FILE: lm_eval/tasks/ifeval/multilingual/utils.py class InputExample (line 7) | class InputExample: class OutputExample (line 15) | class OutputExample: function test_instruction_following_strict (line 23) | def test_instruction_following_strict( function test_instruction_following_loose (line 56) | def test_instruction_following_loose( function process_results (line 110) | def process_results(doc, results): function agg_inst_level_acc (line 130) | def agg_inst_level_acc(items): FILE: lm_eval/tasks/ifeval/utils.py class InputExample (line 8) | class InputExample: class OutputExample (line 16) | class OutputExample: function test_instruction_following_strict (line 24) | def test_instruction_following_strict( function test_instruction_following_loose (line 57) | def test_instruction_following_loose( function process_results (line 111) | def process_results(doc, results): function agg_inst_level_acc (line 131) | def agg_inst_level_acc(items): FILE: lm_eval/tasks/include/default/Albanian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Arabic/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Armenian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Azerbaijani/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Basque/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Belarusian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Bengali/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Bulgarian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Chinese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Croatian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Dutch/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Estonian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Finnish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/French/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Georgian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/German/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Greek/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Hebrew/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Hindi/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Hungarian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Indonesian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Italian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Japanese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Kazakh/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Korean/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Lithuanian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Malay/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Malayalam/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Nepali/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/North Macedonian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Persian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Polish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Portuguese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Russian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Serbian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Spanish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Tagalog/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Tamil/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Telugu/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Turkish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Ukrainian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Urdu/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Uzbek/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/default/Vietnamese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Albanian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Arabic/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Armenian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Azerbaijani/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Basque/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Belarusian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Bengali/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Bulgarian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Chinese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Croatian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Dutch/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Estonian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Finnish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/French/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Georgian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/German/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Greek/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Hebrew/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Hindi/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Hungarian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Indonesian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Italian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Japanese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Kazakh/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Korean/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Lithuanian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Malay/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Malayalam/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Nepali/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/North Macedonian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Persian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Polish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Portuguese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Russian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Serbian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Spanish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Tagalog/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Tamil/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Telugu/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Turkish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Ukrainian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Urdu/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Uzbek/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_en/Vietnamese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Albanian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Arabic/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Armenian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Azerbaijani/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Basque/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Belarusian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Bengali/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Bulgarian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Chinese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Croatian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Dutch/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Estonian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Finnish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/French/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Georgian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/German/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Greek/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Hebrew/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Hindi/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Hungarian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Indonesian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Italian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Japanese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Kazakh/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Korean/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Lithuanian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Malay/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Malayalam/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Nepali/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/North Macedonian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Persian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Polish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Portuguese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Russian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Serbian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Spanish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Tagalog/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Tamil/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Telugu/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Turkish/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Ukrainian/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Urdu/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Uzbek/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/include/few_shot_og/Vietnamese/utils.py function process_docs (line 19) | def process_docs(dataset, category): FILE: lm_eval/tasks/japanese_leaderboard/ja_leaderboard_jcommonsenseqa.py function process_docs (line 1) | def process_docs(dataset): FILE: lm_eval/tasks/japanese_leaderboard/ja_leaderboard_mgsm.py function _extract_answer (line 9) | def _extract_answer(completion): function process_results (line 25) | def process_results(doc, results): FILE: lm_eval/tasks/japanese_leaderboard/ja_leaderboard_xlsum.py function _missing_module_message (line 4) | def _missing_module_message(name): class MecabTokenizer (line 17) | class MecabTokenizer: method __init__ (line 18) | def __init__(self) -> None: method normalize_answer (line 21) | def normalize_answer(self, text): method tokenize (line 47) | def tokenize(self, text): function rouge2 (line 51) | def rouge2(items): function rouge2_agg (line 55) | def rouge2_agg(items): FILE: lm_eval/tasks/japanese_leaderboard/ja_leaderboard_xwinograd.py function process_docs (line 1) | def process_docs(dataset): FILE: lm_eval/tasks/jfinqa/test_jfinqa_utils.py class TestNormalize (line 16) | class TestNormalize: method test_normalize (line 32) | def test_normalize(self, text, expected): method test_normalize_comma_only_between_digits (line 35) | def test_normalize_comma_only_between_digits(self): class TestExtractAnswer (line 40) | class TestExtractAnswer: method test_extract_answer (line 55) | def test_extract_answer(self, text, expected): method test_extract_answer_multiline_with_answer (line 58) | def test_extract_answer_multiline_with_answer(self): class TestTryParseNumber (line 63) | class TestTryParseNumber: method test_parse_number (line 81) | def test_parse_number(self, text, expected): method test_parse_unparseable (line 86) | def test_parse_unparseable(self): method test_parse_negative (line 90) | def test_parse_negative(self): class TestNumericalMatch (line 96) | class TestNumericalMatch: method test_tolerance_constant (line 97) | def test_tolerance_constant(self): method test_exact_numerical_match (line 100) | def test_exact_numerical_match(self): method test_within_tolerance (line 103) | def test_within_tolerance(self): method test_outside_tolerance (line 107) | def test_outside_tolerance(self): method test_zero_gold (line 111) | def test_zero_gold(self): method test_non_numeric_fallback (line 115) | def test_non_numeric_fallback(self): method test_unit_match (line 119) | def test_unit_match(self): method test_same_unit_different_values (line 123) | def test_same_unit_different_values(self): class TestDocToText (line 128) | class TestDocToText: method test_complete_document (line 129) | def test_complete_document(self): method test_missing_optional_fields (line 145) | def test_missing_optional_fields(self): method test_no_table (line 151) | def test_no_table(self): class TestProcessResults (line 162) | class TestProcessResults: method test_exact_and_numerical_match (line 163) | def test_exact_and_numerical_match(self): method test_numerical_match_only (line 169) | def test_numerical_match_only(self): method test_no_match (line 175) | def test_no_match(self): method test_empty_results (line 181) | def test_empty_results(self): method test_japanese_text_match (line 186) | def test_japanese_text_match(self): FILE: lm_eval/tasks/jfinqa/utils.py function doc_to_text (line 25) | def doc_to_text(doc: dict[str, Any]) -> str: function process_results (line 56) | def process_results(doc: dict[str, Any], results: list[str]) -> dict[str... function _extract_answer (line 68) | def _extract_answer(text: str) -> str: function _normalize (line 77) | def _normalize(text: str) -> str: function _try_parse_number (line 110) | def _try_parse_number(text: str) -> float | None: function _numerical_match (line 136) | def _numerical_match( FILE: lm_eval/tasks/jsonschema_bench/metrics.py function is_json_schema_valid (line 20) | def is_json_schema_valid(schema: dict): function ipv4_check (line 41) | def ipv4_check(value): function ipv6_check (line 46) | def ipv6_check(value): function uuid_check (line 51) | def uuid_check(value): function schema_conform_with_format_checker (line 55) | def schema_conform_with_format_checker( function schema_compliance (line 76) | def schema_compliance(references: list[str], predictions: list[str]) -> ... function json_validity (line 101) | def json_validity(references: list[str], predictions: list[str]) -> bool: FILE: lm_eval/tasks/kobest/utils.py function copa_doc_to_text (line 4) | def copa_doc_to_text(doc: dict) -> str: function copa_doc_to_target (line 9) | def copa_doc_to_target(doc: dict) -> str: function copa_doc_to_choice (line 14) | def copa_doc_to_choice(doc: dict) -> list: function sentineg_doc_to_text (line 18) | def sentineg_doc_to_text(doc: dict): function wic_doc_to_text (line 22) | def wic_doc_to_text(doc: dict) -> str: function hellaswag_process_doc (line 26) | def hellaswag_process_doc(doc: Dataset) -> Dataset: function macro_f1_score (line 42) | def macro_f1_score(items): FILE: lm_eval/tasks/leaderboard/gpqa/utils.py function preprocess (line 7) | def preprocess(text): function process_docs (line 17) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/leaderboard/ifeval/instructions.py class Instruction (line 110) | class Instruction: method __init__ (line 113) | def __init__(self, instruction_id): method build_description (line 116) | def build_description(self, **kwargs): method get_instruction_args (line 119) | def get_instruction_args(self): method get_instruction_args_keys (line 122) | def get_instruction_args_keys(self): method check_following (line 125) | def check_following(self, value): class ResponseLanguageChecker (line 129) | class ResponseLanguageChecker(Instruction): method build_description (line 132) | def build_description(self, *, language=None): method get_instruction_args (line 155) | def get_instruction_args(self): method get_instruction_args_keys (line 159) | def get_instruction_args_keys(self): method check_following (line 163) | def check_following(self, value): class NumberOfSentences (line 184) | class NumberOfSentences(Instruction): method build_description (line 187) | def build_description(self, *, num_sentences=None, relation=None): method get_instruction_args (line 225) | def get_instruction_args(self): method get_instruction_args_keys (line 232) | def get_instruction_args_keys(self): method check_following (line 236) | def check_following(self, value): class PlaceholderChecker (line 256) | class PlaceholderChecker(Instruction): method build_description (line 259) | def build_description(self, *, num_placeholders=None): method get_instruction_args (line 278) | def get_instruction_args(self): method get_instruction_args_keys (line 282) | def get_instruction_args_keys(self): method check_following (line 286) | def check_following(self, value): class BulletListChecker (line 301) | class BulletListChecker(Instruction): method build_description (line 304) | def build_description(self, *, num_bullets=None): method get_instruction_args (line 325) | def get_instruction_args(self): method get_instruction_args_keys (line 329) | def get_instruction_args_keys(self): method check_following (line 333) | def check_following(self, value): class ConstrainedResponseChecker (line 350) | class ConstrainedResponseChecker(Instruction): method build_description (line 353) | def build_description(self): method get_instruction_args (line 364) | def get_instruction_args(self): method get_instruction_args_keys (line 368) | def get_instruction_args_keys(self): method check_following (line 372) | def check_following(self, value): class ConstrainedStartChecker (line 389) | class ConstrainedStartChecker(Instruction): method build_description (line 392) | def build_description(self, *, starter=None): method get_instruction_args (line 411) | def get_instruction_args(self): method get_instruction_args_keys (line 415) | def get_instruction_args_keys(self): method check_following (line 419) | def check_following(self, value): class HighlightSectionChecker (line 436) | class HighlightSectionChecker(Instruction): method build_description (line 439) | def build_description(self, *, num_highlights=None): method get_instruction_args (line 460) | def get_instruction_args(self): method get_instruction_args_keys (line 464) | def get_instruction_args_keys(self): method check_following (line 468) | def check_following(self, value): class SectionChecker (line 492) | class SectionChecker(Instruction): method build_description (line 495) | def build_description(self, *, section_spliter=None, num_sections=None): method get_instruction_args (line 531) | def get_instruction_args(self): method get_instruction_args_keys (line 538) | def get_instruction_args_keys(self): method check_following (line 542) | def check_following(self, value): class ParagraphChecker (line 561) | class ParagraphChecker(Instruction): method build_description (line 564) | def build_description(self, *, num_paragraphs=None): method get_instruction_args (line 584) | def get_instruction_args(self): method get_instruction_args_keys (line 588) | def get_instruction_args_keys(self): method check_following (line 592) | def check_following(self, value): class PostscriptChecker (line 616) | class PostscriptChecker(Instruction): method build_description (line 619) | def build_description(self, *, postscript_marker=None): method get_instruction_args (line 644) | def get_instruction_args(self): method get_instruction_args_keys (line 648) | def get_instruction_args_keys(self): method check_following (line 652) | def check_following(self, value): class RephraseChecker (line 674) | class RephraseChecker(Instruction): method build_description (line 677) | def build_description(self, *, original_message): method get_instruction_args (line 703) | def get_instruction_args(self): method get_instruction_args_keys (line 707) | def get_instruction_args_keys(self): method check_following (line 711) | def check_following(self, value): method is_change (line 733) | def is_change(self, response): method strip_changes (line 737) | def strip_changes(self, response): class KeywordChecker (line 742) | class KeywordChecker(Instruction): method build_description (line 745) | def build_description(self, *, keywords=None): method get_instruction_args (line 768) | def get_instruction_args(self): method get_instruction_args_keys (line 772) | def get_instruction_args_keys(self): method check_following (line 776) | def check_following(self, value): class KeywordFrequencyChecker (line 784) | class KeywordFrequencyChecker(Instruction): method build_description (line 787) | def build_description(self, *, keyword=None, frequency=None, relation=... method get_instruction_args (line 833) | def get_instruction_args(self): method get_instruction_args_keys (line 841) | def get_instruction_args_keys(self): method check_following (line 845) | def check_following(self, value): class NumberOfWords (line 855) | class NumberOfWords(Instruction): method build_description (line 858) | def build_description(self, *, num_words=None, relation=None): method get_instruction_args (line 896) | def get_instruction_args(self): method get_instruction_args_keys (line 900) | def get_instruction_args_keys(self): method check_following (line 904) | def check_following(self, value): class JsonFormat (line 914) | class JsonFormat(Instruction): method build_description (line 917) | def build_description(self): method get_instruction_args (line 924) | def get_instruction_args(self): method get_instruction_args_keys (line 928) | def get_instruction_args_keys(self): method check_following (line 932) | def check_following(self, value): class ParagraphFirstWordCheck (line 949) | class ParagraphFirstWordCheck(Instruction): method build_description (line 952) | def build_description( method get_instruction_args (line 998) | def get_instruction_args(self): method get_instruction_args_keys (line 1006) | def get_instruction_args_keys(self): method check_following (line 1010) | def check_following(self, value): class KeySentenceChecker (line 1056) | class KeySentenceChecker(Instruction): method build_description (line 1059) | def build_description(self, key_sentences=None, num_sentences=None): method get_instruction_args (line 1091) | def get_instruction_args(self): method get_instruction_args_keys (line 1098) | def get_instruction_args_keys(self): method check_following (line 1102) | def check_following(self, value): class ForbiddenWords (line 1113) | class ForbiddenWords(Instruction): method build_description (line 1116) | def build_description(self, forbidden_words=None): method get_instruction_args (line 1140) | def get_instruction_args(self): method get_instruction_args_keys (line 1144) | def get_instruction_args_keys(self): method check_following (line 1148) | def check_following(self, value): class RephraseParagraph (line 1156) | class RephraseParagraph(Instruction): method build_description (line 1159) | def build_description(self, *, original_paragraph, low, high): method get_instruction_args (line 1190) | def get_instruction_args(self): method get_instruction_args_keys (line 1198) | def get_instruction_args_keys(self): method check_following (line 1202) | def check_following(self, value): class TwoResponsesChecker (line 1216) | class TwoResponsesChecker(Instruction): method build_description (line 1219) | def build_description(self): method get_instruction_args (line 1227) | def get_instruction_args(self): method get_instruction_args_keys (line 1231) | def get_instruction_args_keys(self): method check_following (line 1235) | def check_following(self, value): class RepeatPromptThenAnswer (line 1258) | class RepeatPromptThenAnswer(Instruction): method build_description (line 1261) | def build_description(self, *, prompt_to_repeat=None): method get_instruction_args (line 1282) | def get_instruction_args(self): method get_instruction_args_keys (line 1285) | def get_instruction_args_keys(self): method check_following (line 1289) | def check_following(self, value): class EndChecker (line 1295) | class EndChecker(Instruction): method build_description (line 1298) | def build_description(self, *, end_phrase=None): method get_instruction_args (line 1318) | def get_instruction_args(self): method get_instruction_args_keys (line 1321) | def get_instruction_args_keys(self): method check_following (line 1325) | def check_following(self, value): class TitleChecker (line 1332) | class TitleChecker(Instruction): method build_description (line 1335) | def build_description(self): method get_instruction_args (line 1343) | def get_instruction_args(self): method get_instruction_args_keys (line 1346) | def get_instruction_args_keys(self): method check_following (line 1350) | def check_following(self, value): class LetterFrequencyChecker (line 1362) | class LetterFrequencyChecker(Instruction): method build_description (line 1365) | def build_description(self, *, letter=None, let_frequency=None, let_re... method get_instruction_args (line 1417) | def get_instruction_args(self): method get_instruction_args_keys (line 1425) | def get_instruction_args_keys(self): method check_following (line 1429) | def check_following(self, value): class CapitalLettersEnglishChecker (line 1440) | class CapitalLettersEnglishChecker(Instruction): method build_description (line 1443) | def build_description(self): method get_instruction_args (line 1450) | def get_instruction_args(self): method get_instruction_args_keys (line 1453) | def get_instruction_args_keys(self): method check_following (line 1457) | def check_following(self, value): class LowercaseLettersEnglishChecker (line 1471) | class LowercaseLettersEnglishChecker(Instruction): method build_description (line 1474) | def build_description(self): method get_instruction_args (line 1482) | def get_instruction_args(self): method get_instruction_args_keys (line 1485) | def get_instruction_args_keys(self): method check_following (line 1489) | def check_following(self, value): class CommaChecker (line 1503) | class CommaChecker(Instruction): method build_description (line 1506) | def build_description(self): method get_instruction_args (line 1513) | def get_instruction_args(self): method get_instruction_args_keys (line 1516) | def get_instruction_args_keys(self): method check_following (line 1520) | def check_following(self, value): class CapitalWordFrequencyChecker (line 1525) | class CapitalWordFrequencyChecker(Instruction): method build_description (line 1528) | def build_description( method get_instruction_args (line 1566) | def get_instruction_args(self): method get_instruction_args_keys (line 1573) | def get_instruction_args_keys(self): method check_following (line 1577) | def check_following(self, value): class QuotationChecker (line 1591) | class QuotationChecker(Instruction): method build_description (line 1594) | def build_description(self): method get_instruction_args (line 1601) | def get_instruction_args(self): method get_instruction_args_keys (line 1605) | def get_instruction_args_keys(self): method check_following (line 1609) | def check_following(self, value): FILE: lm_eval/tasks/leaderboard/ifeval/instructions_registry.py function conflict_make (line 153) | def conflict_make(conflicts): FILE: lm_eval/tasks/leaderboard/ifeval/instructions_util.py function download_nltk_resources (line 34) | def download_nltk_resources(): function split_into_sentences (line 1623) | def split_into_sentences(text): function count_words (line 1674) | def count_words(text): function _get_sentence_tokenizer (line 1683) | def _get_sentence_tokenizer(): function count_sentences (line 1687) | def count_sentences(text): function generate_keywords (line 1694) | def generate_keywords(num_keywords): FILE: lm_eval/tasks/leaderboard/ifeval/utils.py class InputExample (line 8) | class InputExample: class OutputExample (line 16) | class OutputExample: function test_instruction_following_strict (line 24) | def test_instruction_following_strict( function test_instruction_following_loose (line 57) | def test_instruction_following_loose( function process_results (line 111) | def process_results(doc, results): function agg_inst_level_acc (line 131) | def agg_inst_level_acc(items): FILE: lm_eval/tasks/leaderboard/math/utils.py function doc_to_text (line 26) | def doc_to_text(doc: dict) -> str: function process_docs (line 30) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: function list_fewshot_samples (line 44) | def list_fewshot_samples() -> list[dict]: function process_results (line 73) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]: function process_result_v1 (line 94) | def process_result_v1(doc: dict, candidates: str) -> int: function last_boxed_only_string (line 108) | def last_boxed_only_string(string: str) -> str: function remove_boxed (line 138) | def remove_boxed(s: str) -> str: class timeout (line 154) | class timeout: method __init__ (line 155) | def __init__(self, seconds=1, error_message="Timeout"): method handle_timeout (line 159) | def handle_timeout(self, signum, frame): method __enter__ (line 162) | def __enter__(self): method __exit__ (line 166) | def __exit__(self, type, value, traceback): function is_equiv (line 170) | def is_equiv(x1: str, x2: str) -> bool: function get_unnormalized_answer (line 214) | def get_unnormalized_answer(text: str) -> str: function normalize_final_answer (line 285) | def normalize_final_answer(final_answer: str) -> str: FILE: lm_eval/tasks/leaderboard/mmlu_pro/utils.py function doc_to_text (line 4) | def doc_to_text(doc): function doc_to_choice (line 14) | def doc_to_choice(doc): FILE: lm_eval/tasks/leaderboard/musr/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): function doc_to_text (line 14) | def doc_to_text(doc): FILE: lm_eval/tasks/libra/utils.py class PredictionResult (line 20) | class PredictionResult: function filter_dataset_by_page_lengths (line 26) | def filter_dataset_by_page_lengths(*args, **kwargs) -> Dict[str, dataset... function normalize_answer (line 57) | def normalize_answer(sentence: str) -> str: function process_results (line 81) | def process_results(doc: List, results: List[str]) -> Dict: function exact_match_score (line 100) | def exact_match_score(prediction: str, ground_truth: str) -> float: function f1_score (line 107) | def f1_score(prediction: str, ground_truth: str) -> float: function count_score (line 118) | def count_score(prediction: str, ground_truth: str) -> float: function aggregate_results (line 128) | def aggregate_results( function aggregate_results_em (line 161) | def aggregate_results_em(results: List[PredictionResult]) -> Dict[str, f... function aggregate_results_f1 (line 165) | def aggregate_results_f1(results: List[PredictionResult]) -> Dict[str, f... function aggregate_results_count_score (line 169) | def aggregate_results_count_score(results: List[PredictionResult]) -> Di... FILE: lm_eval/tasks/lingoly/script.py function clean_answer (line 6) | def clean_answer(answer: str): function safe_exact (line 31) | def safe_exact(references: list[str], predictions: list[str]): function parse_str_list_score (line 42) | def parse_str_list_score(model, correct, scoring_func): function exact_match (line 91) | def exact_match(references: list[str], predictions: list[str]): function aggregate_scores (line 124) | def aggregate_scores(input): function aggregate_metrics (line 128) | def aggregate_metrics( FILE: lm_eval/tasks/lingoly/utils.py function load_questionsheet (line 6) | def load_questionsheet(qsheet: dict, no_context: bool = False): function format_answers (line 31) | def format_answers(questionpart_ns: list[str], answers: list[str]): function load_question (line 43) | def load_question( function load_all_questions (line 77) | def load_all_questions( FILE: lm_eval/tasks/llama3/instruct/arc_challenge/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/llama3/instruct/mmlu_de/utils.py function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset: FILE: lm_eval/tasks/llama3/instruct/mmlu_es/utils.py function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset: FILE: lm_eval/tasks/llama3/instruct/mmlu_fr/utils.py function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset: FILE: lm_eval/tasks/llama3/instruct/mmlu_hi/utils.py function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset: FILE: lm_eval/tasks/llama3/instruct/mmlu_it/utils.py function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset: FILE: lm_eval/tasks/llama3/instruct/mmlu_pro/utils.py function process_docs (line 5) | def process_docs(dataset, subject): function fewshot_to_text (line 9) | def fewshot_to_text(example): FILE: lm_eval/tasks/llama3/instruct/mmlu_pt/utils.py function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset: FILE: lm_eval/tasks/llama3/instruct/mmlu_th/utils.py function process_docs (line 6) | def process_docs(dataset: datasets.Dataset, subtask) -> datasets.Dataset: FILE: lm_eval/tasks/logiqa/utils_logiqa.py function doc_to_text (line 2) | def doc_to_text(doc) -> str: function doc_to_target (line 22) | def doc_to_target(doc) -> int: FILE: lm_eval/tasks/logiqa2/utils_logiqa2.py function doc_to_text (line 2) | def doc_to_text(doc) -> str: FILE: lm_eval/tasks/longbench/_generate_config.py function parse_args (line 139) | def parse_args(): FILE: lm_eval/tasks/longbench/metrics.py function normalize_answer (line 40) | def normalize_answer(s: str) -> str: function normalize_zh_answer (line 59) | def normalize_zh_answer(s: str) -> str: function count_score (line 76) | def count_score(prediction: str, ground_truth: str, **kwargs): function get_count_score (line 86) | def get_count_score(doc: dict, results: list[str], **kwargs): function retrieval_score (line 95) | def retrieval_score(prediction: str, ground_truth: str, **kwargs): function get_retrieval_score (line 108) | def get_retrieval_score(doc: dict, results: list[str], **kwargs): function retrieval_zh_score (line 117) | def retrieval_zh_score(prediction: str, ground_truth: str, **kwargs): function get_retrieval_zh_score (line 130) | def get_retrieval_zh_score(doc: dict, results: list[str], **kwargs): function code_sim_score (line 139) | def code_sim_score(prediction: str, ground_truth: str, **kwargs): function get_code_sim_score (line 149) | def get_code_sim_score(doc: dict, results: list[str], **kwargs): function classification_score (line 158) | def classification_score(prediction: str, ground_truth: str, **kwargs): function get_classification_score (line 174) | def get_classification_score(doc: dict, results: list[str]) -> dict: function rouge_score (line 185) | def rouge_score(predictions: str, ground_truth: str, **kwargs) -> float: function get_rouge_score (line 197) | def get_rouge_score(doc: dict, results: list[str], **kwargs): function rouge_zh_score (line 206) | def rouge_zh_score(prediction: str, ground_truth: str, **kwargs): function get_rouge_zh_score (line 213) | def get_rouge_zh_score(doc, results, **kwargs): function f1_score (line 222) | def f1_score(prediction: Union[str, list], ground_truth: Union[str, list... function get_f1_score (line 233) | def get_f1_score(doc: dict, results: list[str], **kwargs): function qa_f1_score (line 242) | def qa_f1_score(prediction: str, ground_truth: str, **kwargs): function qa_f1_zh_score (line 251) | def qa_f1_zh_score(prediction: str, ground_truth: str, **kwargs): function get_qa_f1_score (line 261) | def get_qa_f1_score(doc: dict, results: list[str], **kwargs): function get_qa_f1_zh_score (line 270) | def get_qa_f1_zh_score(doc: dict, results: list[str], **kwargs): function get_qa_f1_with_score (line 284) | def get_qa_f1_with_score(doc: dict, results: list[str], **kwargs): function get_qa_f1_zh_with_score (line 290) | def get_qa_f1_zh_with_score(doc: dict, results: list[str], **kwargs): function get_rouge_with_score (line 296) | def get_rouge_with_score(doc: dict, results: list[str], **kwargs): function get_rouge_zh_with_score (line 302) | def get_rouge_zh_with_score(doc: dict, results: list[str], **kwargs): function get_classification_with_score (line 308) | def get_classification_with_score(doc: dict, results: list[str], **kwargs): function get_count_with_score (line 314) | def get_count_with_score(doc: dict, results: list[str], **kwargs): function get_retrieval_with_score (line 320) | def get_retrieval_with_score(doc: dict, results: list[str], **kwargs): function get_retrieval_zh_with_score (line 326) | def get_retrieval_zh_with_score(doc: dict, results: list[str], **kwargs): function get_code_sim_with_score (line 332) | def get_code_sim_with_score(doc: dict, results: list[str], **kwargs): FILE: lm_eval/tasks/longbench/utils.py function scorer_e (line 50) | def scorer_e(dataset, predictions, answers, lengths, all_classes): function scorer (line 74) | def scorer(dataset, predictions, answers, all_classes): FILE: lm_eval/tasks/manager.py class TaskDict (line 23) | class TaskDict(TypedDict): class TaskManager (line 37) | class TaskManager: method __init__ (line 53) | def __init__( method all_tasks (line 109) | def all_tasks(self) -> list[str]: method all_groups (line 114) | def all_groups(self) -> list[str]: method all_subtasks (line 119) | def all_subtasks(self) -> list[str]: method all_tags (line 124) | def all_tags(self) -> list[str]: method task_index (line 129) | def task_index(self) -> dict[str, Entry]: method _entry (line 134) | def _entry(self, name: str) -> Entry | None: method _load_spec (line 138) | def _load_spec(self, spec: str | dict[str, Any]) -> Task | Group | lis... method load (line 179) | def load( method load_task_or_group (line 242) | def load_task_or_group(self, task_list: str | list[str | dict]) -> dict: method _check_duplicates (line 283) | def _check_duplicates(built: list[Task | Group]) -> None: method match_tasks (line 309) | def match_tasks(self, task_list: list[str]) -> list[str]: method list_all_tasks (line 313) | def list_all_tasks( FILE: lm_eval/tasks/mathqa/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc): FILE: lm_eval/tasks/mbpp/utils.py function pass_at_1 (line 18) | def pass_at_1( function extract_code_blocks (line 32) | def extract_code_blocks(text: str) -> str: function build_predictions (line 47) | def build_predictions(resps: list[list[str]], docs: list[dict]) -> list[... function list_fewshot_samples (line 51) | def list_fewshot_samples(): FILE: lm_eval/tasks/med_concepts_qa/_generate_configs.py function generate_yaml_content (line 6) | def generate_yaml_content(vocab_name: str, level: str): function generate_yaml_files (line 17) | def generate_yaml_files( FILE: lm_eval/tasks/med_prescriptions/utils.py function get_full_med_list (line 2048) | def get_full_med_list(): function process_docs (line 2052) | def process_docs(dataset: datasets.Dataset): function contains_indian_characters (line 2088) | def contains_indian_characters(text): function check_list_for_indian_characters (line 2110) | def check_list_for_indian_characters(string_list): function doc_to_text_easy (line 2118) | def doc_to_text_easy(doc) -> str: function doc_to_text_hard (line 2134) | def doc_to_text_hard(doc) -> str: function get_diagnosis (line 2150) | def get_diagnosis(doc): function get_medicines_list (line 2178) | def get_medicines_list(doc): function doc_to_target (line 2250) | def doc_to_target(doc): function doc_to_target_obtain (line 2254) | def doc_to_target_obtain(doc): function doc_to_choice_easy (line 2263) | def doc_to_choice_easy(doc): function doc_to_choice_hard (line 2271) | def doc_to_choice_hard(doc): FILE: lm_eval/tasks/med_text_classification/utils.py function process_docs_hard (line 6) | def process_docs_hard(dataset: datasets.Dataset): function process_docs (line 10) | def process_docs(dataset: datasets.Dataset): function doc_to_choice_easy (line 23) | def doc_to_choice_easy(doc): function doc_to_text_easy (line 33) | def doc_to_text_easy(doc) -> str: function doc_to_target_easy (line 51) | def doc_to_target_easy(doc): function doc_to_text_hard (line 55) | def doc_to_text_hard(doc) -> str: function doc_to_choice_hard (line 67) | def doc_to_choice_hard(doc): function doc_to_target_hard (line 113) | def doc_to_target_hard(doc): FILE: lm_eval/tasks/meddialog/utils.py function doc_eval (line 24) | def doc_eval(pred, refs): function doc_to_text_raw (line 67) | def doc_to_text_raw(doc) -> str: function doc_to_target_raw (line 71) | def doc_to_target_raw(doc) -> str: function process_results_gen_raw (line 75) | def process_results_gen_raw(doc, results): function doc_to_text_qsumm (line 100) | def doc_to_text_qsumm(doc) -> str: function doc_to_target_qsumm (line 104) | def doc_to_target_qsumm(doc) -> str: function process_results_gen_qsumm (line 108) | def process_results_gen_qsumm(doc, results): FILE: lm_eval/tasks/meddialog/utils_perplexity.py function process_results_qsumm (line 6) | def process_results_qsumm(doc, results): function process_results_raw (line 17) | def process_results_raw(doc, results): FILE: lm_eval/tasks/mediqa_qa2019/utils.py function doc_eval (line 24) | def doc_eval(pred, refs): function doc_to_text (line 67) | def doc_to_text(doc) -> str: function doc_to_target (line 71) | def doc_to_target(doc) -> str: function process_results_gen (line 75) | def process_results_gen(doc, results): FILE: lm_eval/tasks/mediqa_qa2019/utils_perplexity.py function doc_to_target (line 5) | def doc_to_target(doc) -> str: function process_results (line 9) | def process_results(doc, results): FILE: lm_eval/tasks/medmcqa/utils_medmcqa.py function doc_to_text (line 2) | def doc_to_text(doc) -> str: FILE: lm_eval/tasks/medqa/preprocess_medqa.py function doc_to_text (line 1) | def doc_to_text(doc) -> str: function doc_to_target (line 12) | def doc_to_target(doc) -> int: FILE: lm_eval/tasks/medtext/utils.py function doc_eval (line 24) | def doc_eval(pred, refs): function doc_to_text (line 67) | def doc_to_text(doc) -> str: function doc_to_target (line 71) | def doc_to_target(doc) -> str: function process_results (line 75) | def process_results(doc, results): FILE: lm_eval/tasks/medtext/utils_perplexity.py function process_results (line 6) | def process_results(doc, results): FILE: lm_eval/tasks/meqsum/utils.py function doc_to_text (line 24) | def doc_to_text(doc) -> str: function doc_to_target (line 33) | def doc_to_target(doc) -> str: function process_results_gen (line 37) | def process_results_gen(doc, results): FILE: lm_eval/tasks/metabench/process_docs.py function hash_string (line 7) | def hash_string(string: str) -> str: function process_arc (line 11) | def process_arc(dataset: datasets.Dataset) -> datasets.Dataset: function process_gsm8k (line 33) | def process_gsm8k(dataset: datasets.Dataset) -> datasets.Dataset: function process_hellaswag (line 52) | def process_hellaswag(dataset: datasets.Dataset) -> datasets.Dataset: function process_mmlu (line 109) | def process_mmlu(dataset: datasets.Dataset) -> datasets.Dataset: function process_truthfulqa (line 136) | def process_truthfulqa(dataset: datasets.Dataset) -> datasets.Dataset: function process_winogrande (line 144) | def process_winogrande(dataset: datasets.Dataset) -> datasets.Dataset: function winogrande_doc_to_text (line 173) | def winogrande_doc_to_text(doc): # Mirrored from the winogrande task function winogrande_doc_to_target (line 178) | def winogrande_doc_to_target(doc): # Mirrored from the winogrande task function winogrande_doc_to_choice (line 183) | def winogrande_doc_to_choice(doc): # Mirrored from the winogrande task FILE: lm_eval/tasks/metabench/process_docs_permute.py function hash_string (line 8) | def hash_string(string: str) -> str: function process_arc (line 12) | def process_arc(dataset: datasets.Dataset) -> datasets.Dataset: function process_hellaswag (line 44) | def process_hellaswag(dataset: datasets.Dataset) -> datasets.Dataset: function process_mmlu (line 109) | def process_mmlu(dataset: datasets.Dataset) -> datasets.Dataset: function process_truthfulqa (line 147) | def process_truthfulqa(dataset: datasets.Dataset) -> datasets.Dataset: function process_winogrande (line 170) | def process_winogrande(dataset: datasets.Dataset) -> datasets.Dataset: function winogrande_doc_to_text (line 213) | def winogrande_doc_to_text(doc): # Mirrored from the winogrande task function winogrande_doc_to_target (line 218) | def winogrande_doc_to_target(doc): # Mirrored from the winogrande task function winogrande_doc_to_choice (line 223) | def winogrande_doc_to_choice(doc): # Mirrored from the winogrande task FILE: lm_eval/tasks/mgsm/utils.py function add_regex_pattern (line 97) | def add_regex_pattern(regex_pattern): function gen_lang_yamls (line 131) | def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None: function main (line 204) | def main() -> None: FILE: lm_eval/tasks/mimic_repsum/utils.py function doc_eval (line 28) | def doc_eval(pred, refs): function doc_to_text (line 74) | def doc_to_text(doc) -> str: function doc_to_target (line 101) | def doc_to_target(doc) -> str: function is_non_str_iterable (line 123) | def is_non_str_iterable(obj): function process_results (line 127) | def process_results(doc, results): FILE: lm_eval/tasks/mimic_repsum/utils_perplexity.py function process_results (line 6) | def process_results(doc, results): FILE: lm_eval/tasks/minerva_math/utils.py function doc_to_text (line 29) | def doc_to_text(doc: dict) -> str: function process_docs (line 33) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: function list_fewshot_samples (line 49) | def list_fewshot_samples() -> list[dict]: function process_results (line 74) | def process_results(doc: dict, results: list[str]) -> dict[str, int]: function last_boxed_only_string (line 99) | def last_boxed_only_string(string: str) -> Optional[str]: function remove_boxed (line 129) | def remove_boxed(s: str) -> str: class timeout (line 143) | class timeout: method __init__ (line 144) | def __init__(self, seconds=1, error_message="Timeout"): method handle_timeout (line 148) | def handle_timeout(self, signum, frame): method __enter__ (line 151) | def __enter__(self): method __exit__ (line 155) | def __exit__(self, type, value, traceback): function is_equiv (line 159) | def is_equiv(x1: str, x2: str) -> bool: function get_unnormalized_answer (line 202) | def get_unnormalized_answer(text: str) -> str: function normalize_final_answer (line 274) | def normalize_final_answer(final_answer: str) -> str: FILE: lm_eval/tasks/mlqa/generate_tasks.py function main (line 23) | def main() -> None: FILE: lm_eval/tasks/mlqa/utils.py function whitespace_tokenize (line 24) | def whitespace_tokenize(text): function mixed_segmentation (line 28) | def mixed_segmentation(text): function normalize_answer (line 48) | def normalize_answer(s, lang): function f1_score (line 91) | def f1_score(prediction, ground_truth, lang): function exact_match_score (line 104) | def exact_match_score(prediction, ground_truth, lang): function metric_max_over_ground_truths (line 108) | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths, ... function process_docs (line 116) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: function process_results_lang (line 129) | def process_results_lang(doc, results, lang): function process_results_en (line 140) | def process_results_en(doc, results): function process_results_es (line 144) | def process_results_es(doc, results): function process_results_hi (line 148) | def process_results_hi(doc, results): function process_results_vi (line 152) | def process_results_vi(doc, results): function process_results_de (line 156) | def process_results_de(doc, results): function process_results_ar (line 160) | def process_results_ar(doc, results): function process_results_zh (line 164) | def process_results_zh(doc, results): FILE: lm_eval/tasks/mmlu-pro-plus/utils.py function format_cot_example (line 24) | def format_cot_example(example, including_answer=True): function process_docs (line 46) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu/_generate_configs.py function parse_args (line 78) | def parse_args(): FILE: lm_eval/tasks/mmlu/flan_cot_zeroshot/utils.py class MultiChoiceRegexFilter (line 8) | class MultiChoiceRegexFilter(RegexFilter): method __init__ (line 11) | def __init__( method apply (line 34) | def apply(self, resps, docs): FILE: lm_eval/tasks/mmlu/flan_n_shot/generative/utils.py class MultiChoiceRegexFilter (line 8) | class MultiChoiceRegexFilter(RegexFilter): method __init__ (line 11) | def __init__( method apply (line 34) | def apply(self, resps, docs): FILE: lm_eval/tasks/mmlu_pro/utils.py function format_cot_example (line 7) | def format_cot_example(example, including_answer=True): function process_docs (line 34) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/af/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/ar/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/bn/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/cs/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/de/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/en/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/es/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/fr/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/hi/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/hu/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/id/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/it/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/ja/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/ko/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/mr/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/ne/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/pt/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/ru/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/sr/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/sw/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/te/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/template/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/th/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/uk/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/ur/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/vi/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/wo/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/yo/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/zh/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlu_prox/zu/utils.py function format_cot_example (line 32) | def format_cot_example(example, including_answer=True): function process_docs (line 53) | def process_docs(dataset, subject): FILE: lm_eval/tasks/mmlusr/answer_only/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/mmlusr/config.py function parse_args (line 79) | def parse_args(): FILE: lm_eval/tasks/mmlusr/question_and_answer/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/mmlusr/question_only/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/mmmu/utils.py function doc_to_image (line 26) | def doc_to_image(doc): function doc_to_text (line 42) | def doc_to_text(doc): function _doc_to_text (line 54) | def _doc_to_text(doc): function process_results (line 76) | def process_results(doc, results): function parse_multi_choice_response (line 105) | def parse_multi_choice_response(response, all_choices, index2ans): function check_is_number (line 163) | def check_is_number(string): function normalize_str (line 175) | def normalize_str(string): function extract_numbers (line 200) | def extract_numbers(string): function parse_open_response (line 223) | def parse_open_response(response): function eval_multi_choice (line 299) | def eval_multi_choice(gold_i, pred_i): function eval_open (line 316) | def eval_open(gold_i, pred_i): FILE: lm_eval/tasks/model_written_evals/advanced_ai_risk/_generate_configs.py function main (line 6) | def main() -> None: FILE: lm_eval/tasks/model_written_evals/persona/_generate_configs.py function main (line 6) | def main() -> None: FILE: lm_eval/tasks/moral_stories/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/mts_dialog/utils.py function doc_eval (line 24) | def doc_eval(pred, refs): function doc_to_text (line 67) | def doc_to_text(doc) -> str: function doc_to_target (line 71) | def doc_to_target(doc) -> str: function process_results (line 75) | def process_results(doc, results): FILE: lm_eval/tasks/mts_dialog/utils_perplexity.py function process_results (line 6) | def process_results(doc, results): FILE: lm_eval/tasks/mutual/utils.py function process_docs (line 4) | def process_docs(dataset): function process_results (line 30) | def process_results(doc, results): FILE: lm_eval/tasks/noreval/ask_gec/errant.py function parse_args (line 9) | def parse_args(): function read_examples (line 30) | def read_examples(fpath: str): function save_results (line 47) | def save_results(fpath: str, obj: dict): function evaluate (line 58) | def evaluate(fpath: str, out_fpath: str): function main (line 93) | def main(): FILE: lm_eval/tasks/noreval/norec/utils.py function multi_f1 (line 5) | def multi_f1(items): FILE: lm_eval/tasks/noreval/noridiom/utils.py function normalize (line 7) | def normalize(text): function f1 (line 12) | def f1(prediction, completion): function process_results (line 27) | def process_results(doc, results): function filter_dataset_nb (line 39) | def filter_dataset_nb(dataset): function filter_dataset_nn (line 43) | def filter_dataset_nn(dataset): FILE: lm_eval/tasks/noreval/noropenbookqa/utils.py function filter_dataset (line 4) | def filter_dataset(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/noreval/norquad/utils.py function process_results (line 5) | def process_results(doc, results): function process_docs (line 13) | def process_docs(dataset: datasets.Dataset): function p0 (line 23) | def p0(doc): function p1 (line 31) | def p1(doc): function p2 (line 39) | def p2(doc): function p3 (line 49) | def p3(doc): function p4 (line 57) | def p4(doc): FILE: lm_eval/tasks/noreval/norsumm/utils.py function process_results (line 20) | def process_results(doc, results): function bleu (line 50) | def bleu(refs, preds): function rouge (line 73) | def rouge(refs, preds): function bertscore_f1 (line 107) | def bertscore_f1(references, predictions): FILE: lm_eval/tasks/noreval/nortruthfulqa/generation/utils.py function preprocess_function (line 19) | def preprocess_function(examples): function process_docs (line 41) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: function process_results (line 45) | def process_results(doc, results): function bleu (line 98) | def bleu(refs, preds): function rouge (line 121) | def rouge(refs, preds): FILE: lm_eval/tasks/noreval/nortruthfulqa/multiple_choice/nno/utils.py function p0_nn (line 1) | def p0_nn(doc): function p1_nn (line 6) | def p1_nn(doc): function p2_nn (line 14) | def p2_nn(doc): function p3_nn (line 22) | def p3_nn(doc): function p4_nn (line 30) | def p4_nn(doc): FILE: lm_eval/tasks/noreval/nortruthfulqa/multiple_choice/nob/utils.py function p0_nb (line 1) | def p0_nb(doc): function p1_nb (line 6) | def p1_nb(doc): function p2_nb (line 14) | def p2_nb(doc): function p3_nb (line 22) | def p3_nb(doc): function p4_nb (line 30) | def p4_nb(doc): FILE: lm_eval/tasks/noreval/nrk_quiz_qa/nno/utils.py function p0_nn (line 1) | def p0_nn(doc): function p1_nn (line 6) | def p1_nn(doc): function p2_nn (line 12) | def p2_nn(doc): function p3_nn (line 30) | def p3_nn(doc): function p4_nn (line 41) | def p4_nn(doc): FILE: lm_eval/tasks/noreval/nrk_quiz_qa/nob/utils.py function p0_nb (line 1) | def p0_nb(doc): function p1_nb (line 6) | def p1_nb(doc): function p2_nb (line 12) | def p2_nb(doc): function p3_nb (line 32) | def p3_nb(doc): function p4_nb (line 43) | def p4_nb(doc): FILE: lm_eval/tasks/noticia/utils.py function clean_text (line 6) | def clean_text(text: str) -> str: function rouge1 (line 20) | def rouge1(items): function average_len (line 27) | def average_len(items): function rouge1_agg (line 34) | def rouge1_agg(items): function average_len_agg (line 48) | def average_len_agg(items): FILE: lm_eval/tasks/okapi/arc_multilingual/utils.py function preprocess (line 6) | def preprocess(text): function process_docs (line 14) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/okapi/hellaswag_multilingual/utils.py function preprocess (line 6) | def preprocess(text): function process_docs (line 15) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/okapi/mmlu_multilingual/_generate_configs.py function main (line 6) | def main() -> None: FILE: lm_eval/tasks/okapi/truthfulqa_multilingual/utils.py function preprocess (line 23) | def preprocess(text): function process_docs (line 33) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: function process_results_mc2 (line 48) | def process_results_mc2(doc, results): FILE: lm_eval/tasks/olaph/utils.py function doc_eval (line 25) | def doc_eval(pred, refs): function doc_to_text (line 68) | def doc_to_text(doc) -> str: function doc_to_target (line 72) | def doc_to_target(doc) -> str: function process_docs (line 76) | def process_docs(dataset: datasets.Dataset): function process_results (line 89) | def process_results(doc, results): FILE: lm_eval/tasks/olaph/utils_perplexity.py function process_results (line 6) | def process_results(doc, results): FILE: lm_eval/tasks/openai-mmmlu/_generate_configs.py function load_json (line 31) | def load_json(path: Path): function description_for (line 36) | def description_for(subject: str, display_name: str) -> str: function subject_alias (line 44) | def subject_alias(subject: str, display_name: str) -> str: function quote (line 48) | def quote(value: str) -> str: function write_file (line 57) | def write_file(path: Path, content: str) -> None: function subject_yaml (line 62) | def subject_yaml(base_yaml: str, language: dict, subject: str, category:... function category_yaml (line 78) | def category_yaml(language: dict, category: str) -> str: function language_group_yaml (line 91) | def language_group_yaml(language: dict, categories: Iterable[str]) -> str: function master_group_yaml (line 102) | def master_group_yaml(language_groups: Iterable[str]) -> str: function parse_args (line 112) | def parse_args(): FILE: lm_eval/tasks/openai-mmmlu/default/utils.py function _normalize_subject_name (line 10) | def _normalize_subject_name(name: str) -> str: function _filter_subject (line 25) | def _filter_subject(dataset, subject): function _register_subject_filters (line 35) | def _register_subject_filters(): FILE: lm_eval/tasks/paloma/paloma_utils.py function doc_to_target (line 1) | def doc_to_target(doc): FILE: lm_eval/tasks/paws-x/_generate_config.py function gen_lang_yamls (line 49) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: function main (line 91) | def main() -> None: FILE: lm_eval/tasks/paws-x/utils.py function general_detokenize (line 4) | def general_detokenize(string): function lowercase_first_letter (line 14) | def lowercase_first_letter(text): function process_docs_paraphrases (line 18) | def process_docs_paraphrases(dataset): FILE: lm_eval/tasks/pisa/utils.py function replace_images_tokens (line 31) | def replace_images_tokens(input_string): function parse_options (line 40) | def parse_options(options): function construct_prompt (line 51) | def construct_prompt(doc, mc_prompt=""): function pisa_doc_to_text (line 58) | def pisa_doc_to_text(doc): function pisa_doc_to_visual (line 63) | def pisa_doc_to_visual(doc): function pisa_process_results (line 70) | def pisa_process_results(doc, results, **kwargs): function pisa_process_results_llm_judged (line 85) | def pisa_process_results_llm_judged(doc, results, **kwargs): function eval_multi_choice (line 114) | def eval_multi_choice(gold_i, pred_i): function eval_open (line 129) | def eval_open(gold_i, pred_i): function parse_multi_choice_response (line 158) | def parse_multi_choice_response(response, all_choices, index2ans): function extract_numbers (line 219) | def extract_numbers(string): function check_is_number (line 242) | def check_is_number(string): function normalize_str (line 251) | def normalize_str(string): function get_multi_choice_info (line 274) | def get_multi_choice_info(options): function build_user_prompt (line 291) | def build_user_prompt(student_answer: str, options: List[str], correct: ... function judge_mcq (line 312) | def judge_mcq(pred: str, options: List[str], correct: str) -> int: FILE: lm_eval/tasks/portuguese_bench/flores_pt/create_yamls_flores_pt.py function doc_to_text (line 257) | def doc_to_text(src: str, tgt: str) -> str: function doc_to_target (line 265) | def doc_to_target(tgt: str) -> str: function gen_lang_yamls (line 272) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: function main (line 315) | def main() -> None: FILE: lm_eval/tasks/pubmedqa/preprocess_pubmedqa.py function doc_to_text (line 1) | def doc_to_text(doc) -> str: FILE: lm_eval/tasks/qa4mre/preprocess_qa4mre.py function qa4mre_process (line 1) | def qa4mre_process(doc): function doc_to_target (line 5) | def doc_to_target(doc): FILE: lm_eval/tasks/qasper/metrics.py function normalize_answer (line 6) | def normalize_answer(s): function f1_abstractive (line 28) | def f1_abstractive(predictions, references): FILE: lm_eval/tasks/qasper/utils.py function process_docs (line 6) | def process_docs(dataset, set_answer_type="bool"): FILE: lm_eval/tasks/race/preprocess_race.py function process_ast (line 4) | def process_ast(string): function last_problem (line 8) | def last_problem(doc): function get_answer_option (line 12) | def get_answer_option(problem): function doc_to_choice (line 18) | def doc_to_choice(doc): function doc_to_text (line 24) | def doc_to_text(doc): function doc_to_target (line 37) | def doc_to_target(doc): FILE: lm_eval/tasks/realtoxicityprompts/metric.py function toxicity_perspective_api (line 12) | def toxicity_perspective_api( FILE: lm_eval/tasks/ruler/common_utils.py function get_tokenizer (line 21) | def get_tokenizer( function postprocess_pred (line 30) | def postprocess_pred(prediction: list[str]) -> list[str]: function string_match_all (line 43) | def string_match_all(preds: list[str], refs: list[list[str]]) -> float: function string_match_part (line 53) | def string_match_part(preds: list[str], refs: list[list[str]]) -> float: function process_results (line 63) | def process_results(doc: dict, results: list[str]) -> dict[str, float]: function process_results_part (line 73) | def process_results_part(doc: dict, results: list[str]) -> dict[str, flo... function aggregate_metrics (line 83) | def aggregate_metrics(metrics: list[float]) -> float: FILE: lm_eval/tasks/ruler/cwe_utils.py function get_example (line 43) | def get_example(num_words, common_repeats=30, uncommon_repeats=3, common... function generate_input_output (line 55) | def generate_input_output( function sys_word_pair_random (line 84) | def sys_word_pair_random( function get_dataset (line 168) | def get_dataset(pretrained, seq=None, **kwargs): function get_cw_dataset (line 176) | def get_cw_dataset(**kwargs): FILE: lm_eval/tasks/ruler/essays.py function fetch_url (line 27) | async def fetch_url(client: httpx.AsyncClient, url: str) -> str: function process_html_essay (line 34) | async def process_html_essay( function process_text_essay (line 56) | async def process_text_essay( function get_essays (line 71) | async def get_essays() -> Dict[str, str]: function get_all_essays (line 121) | def get_all_essays() -> Dict[str, str]: FILE: lm_eval/tasks/ruler/fwe_utils.py function generate_input_output (line 38) | def generate_input_output( function sys_kwext (line 85) | def sys_kwext( function get_dataset (line 145) | def get_dataset(pretrained, max_seq_length=None, **kwargs): function fwe_download (line 154) | def fwe_download(**kwargs): FILE: lm_eval/tasks/ruler/niah_utils.py function download_dataset (line 15) | def download_dataset(df: Generator) -> dict[str, datasets.Dataset]: function niah_single_1 (line 23) | def niah_single_1(**kwargs): function niah_single_2 (line 40) | def niah_single_2(**kwargs): function niah_single_3 (line 57) | def niah_single_3(**kwargs): function niah_multikey_1 (line 74) | def niah_multikey_1(**kwargs): function niah_multikey_2 (line 92) | def niah_multikey_2(**kwargs): function niah_multikey_3 (line 109) | def niah_multikey_3(**kwargs): function niah_multivalue (line 126) | def niah_multivalue(**kwargs): function niah_multiquery (line 144) | def niah_multiquery(**kwargs): FILE: lm_eval/tasks/ruler/prepare_niah.py function cached_sent_tokenize (line 65) | def cached_sent_tokenize(text: str) -> List[str]: function download_nltk_resources (line 69) | def download_nltk_resources(): function generate_random_number (line 88) | def generate_random_number(num_digits=7) -> str: function generate_random_word (line 94) | def generate_random_word() -> str: function generate_random_uuid (line 99) | def generate_random_uuid() -> str: function generate_random (line 103) | def generate_random(type_needle: str) -> str: function generate_input_output (line 114) | def generate_input_output( function generate_samples (line 213) | def generate_samples( function get_haystack (line 327) | def get_haystack( FILE: lm_eval/tasks/ruler/qa_utils.py function download_json (line 37) | def download_json(url) -> dict: function read_squad (line 45) | def read_squad( function read_hotpotqa (line 76) | def read_hotpotqa( function generate_input_output (line 99) | def generate_input_output( function generate_samples (line 134) | def generate_samples( function get_dataset (line 203) | def get_dataset(pretrained, docs, qas, max_seq_length=None, **kwargs) ->... function get_qa_dataset (line 216) | def get_qa_dataset(ds, **kwargs) -> dict[str, datasets.Dataset]: function get_squad (line 234) | def get_squad(**kwargs): function get_hotpotqa (line 238) | def get_hotpotqa(**kwargs): FILE: lm_eval/tasks/ruler/vt_utils.py function generate_chains (line 45) | def generate_chains( function generate_input_output (line 70) | def generate_input_output(num_noises, num_chains, num_hops, is_icl=False): function randomize_icl (line 120) | def randomize_icl(icl_example: str) -> str: function sys_vartrack_w_noise_random (line 129) | def sys_vartrack_w_noise_random( function get_dataset (line 224) | def get_dataset( function get_vt_dataset (line 244) | def get_vt_dataset(**kwargs) -> dict[str, datasets.Dataset]: FILE: lm_eval/tasks/score/agi_eval/utils_agieval.py function initial_process_docs (line 46) | def initial_process_docs(doc: Dataset) -> Dataset: function prompt_robustness_process_results (line 107) | def prompt_robustness_process_results(doc, results) -> Dict[str, float]: function option_order_robustness_process_results (line 121) | def option_order_robustness_process_results(doc, results) -> Dict[str, f... function non_greedy_robustness_process_results (line 148) | def non_greedy_robustness_process_results(doc, results) -> Dict[str, flo... function per_prompt_accuracy (line 159) | def per_prompt_accuracy(results: List[Dict[str, Any]], p_id=0) -> float: function per_option_accuracy (line 185) | def per_option_accuracy(results: List[Dict[str, Any]], always_opt="a") -... function non_greedy_accuracy (line 207) | def non_greedy_accuracy(results: List[Dict[str, Any]]) -> float: FILE: lm_eval/tasks/score/math/math_grader.py function _check_antlr_version (line 91) | def _check_antlr_version(): function _fix_fracs (line 109) | def _fix_fracs(string): function _str_is_int (line 144) | def _str_is_int(x: str) -> bool: function _str_to_int (line 153) | def _str_to_int(x: str) -> bool: function _inject_implicit_mixed_number (line 162) | def _inject_implicit_mixed_number(step: str): function _strip_properly_formatted_commas (line 172) | def _strip_properly_formatted_commas(expr: str): function _remove_right_units (line 183) | def _remove_right_units(expr): function _process_and_or_inside_text (line 204) | def _process_and_or_inside_text(string): function _remove_left_and_right (line 210) | def _remove_left_and_right(expr): function _fix_sqrt (line 217) | def _fix_sqrt(string): function _fix_interval (line 222) | def _fix_interval(expr): function _inject_implicit_mixed_fraction (line 230) | def _inject_implicit_mixed_fraction(step: str): function normalize_answer_string (line 251) | def normalize_answer_string(expr: str) -> str: function is_digit (line 351) | def is_digit(s): function normalize (line 363) | def normalize(answer) -> str: function math_equal (line 378) | def math_equal( function symbolic_equal (line 531) | def symbolic_equal(a, b, tolerance, timeout=10.0): function extract_answer (line 564) | def extract_answer( class TimeoutException (line 615) | class TimeoutException(Exception): function time_limit (line 620) | def time_limit(seconds: float): function format_intervals (line 632) | def format_intervals(prediction): FILE: lm_eval/tasks/score/math/utils_math.py function find_boxed_entries (line 44) | def find_boxed_entries(answer_str): function extract_answer_dataset (line 92) | def extract_answer_dataset(solution: str, problem: str, corrected_answer... function process_docs (line 119) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: function prompt_robustness_process_docs (line 143) | def prompt_robustness_process_docs(doc: datasets.Dataset) -> datasets.Da... function non_greedy_robustness_process_docs (line 152) | def non_greedy_robustness_process_docs(doc: datasets.Dataset) -> dataset... function process_results (line 161) | def process_results(doc: dict, results: List[str]) -> Dict[str, int]: function non_greedy_robustness_process_results (line 178) | def non_greedy_robustness_process_results( function per_prompt_accuracy (line 185) | def per_prompt_accuracy(results: List[Dict[str, Any]], p_id=0) -> float: function calculate_consistency_rate (line 211) | def calculate_consistency_rate(responses: List[List[str]]) -> float: function math_prompt_consistency_rate (line 234) | def math_prompt_consistency_rate(results: List[Dict[str, Any]]) -> float: function non_greedy_accuracy (line 257) | def non_greedy_accuracy(results: List[Dict[str, Any]]) -> float: FILE: lm_eval/tasks/score/mmlu_pro/utils_mmlu_pro.py function non_greedy_robustness_process_results (line 61) | def non_greedy_robustness_process_results(doc, results) -> Dict[str, flo... function prompt_robustness_process_results (line 73) | def prompt_robustness_process_results(doc, results) -> Dict[str, float]: function option_order_robustness_process_results (line 94) | def option_order_robustness_process_results(doc, results) -> Dict[str, f... function per_prompt_macro_accuracy (line 123) | def per_prompt_macro_accuracy(results: List[Dict[str, Any]], p_id=0) -> ... function per_option_macro_accuracy (line 154) | def per_option_macro_accuracy(results: List[Dict[str, Any]], always_opt=... function non_greedy_macro_accuracy (line 187) | def non_greedy_macro_accuracy(results: List[Dict[str, Any]]) -> float: FILE: lm_eval/tasks/score/non_greedy_summarizer.py function load_json_logs (line 33) | def load_json_logs(file_paths, subtasks): function calculate_consistency_rate (line 71) | def calculate_consistency_rate(responses: List[List[str]]) -> float: function calculate_math_consistency_rate (line 94) | def calculate_math_consistency_rate(responses: List[List[str]]) -> float: function main (line 117) | def main(): FILE: lm_eval/tasks/score/utils.py function __repeat_elements (line 36) | def __repeat_elements(lst, n): function process_docs_add_prompts (line 43) | def process_docs_add_prompts( function option_order_robustness_process_docs (line 74) | def option_order_robustness_process_docs( function non_greedy_robustness_process_docs (line 135) | def non_greedy_robustness_process_docs( function robustness_doc_to_text (line 165) | def robustness_doc_to_text(doc: Dataset) -> str: function __postprocess_pred (line 189) | def __postprocess_pred(pred): function translate_model_answer_to_labels (line 199) | def translate_model_answer_to_labels(answer, labels, option_format=None): function calculate_consistency_rate (line 220) | def calculate_consistency_rate(responses: List[List[str]]) -> float: function prompt_consistency_rate (line 243) | def prompt_consistency_rate(results: List[Dict[str, Any]]) -> float: function options_consistency_rate (line 266) | def options_consistency_rate(results: List[Dict[str, Any]], labels) -> f... FILE: lm_eval/tasks/scrolls/task.py function _download_metric (line 45) | def _download_metric(): function _process_doc_prepended_question (line 66) | def _process_doc_prepended_question(doc): function _drop_duplicates_in_input (line 82) | def _drop_duplicates_in_input(untokenized_dataset): function _num_cpu_cores (line 103) | def _num_cpu_cores(): class _SCROLLSTask (line 115) | class _SCROLLSTask(ConfigurableTask): method __init__ (line 123) | def __init__(self, config=None): method has_training_docs (line 128) | def has_training_docs(self): method has_validation_docs (line 131) | def has_validation_docs(self): method has_test_docs (line 134) | def has_test_docs(self): method training_docs (line 137) | def training_docs(self): method validation_docs (line 148) | def validation_docs(self): method should_decontaminate (line 159) | def should_decontaminate(self): method doc_to_decontamination_query (line 162) | def doc_to_decontamination_query(self, doc): method download (line 165) | def download(self, *args, **kwargs): method _get_prune_text (line 173) | def _get_prune_text(self, sample): method prune (line 176) | def prune(self): method doc_to_target (line 202) | def doc_to_target(self, doc): method doc_to_text (line 205) | def doc_to_text(self, doc): method higher_is_better (line 208) | def higher_is_better(self): method _scrolls_metrics (line 212) | def _scrolls_metrics(self): method _make_compute_metrics (line 215) | def _make_compute_metrics(self, value): method aggregation (line 225) | def aggregation(self): class _SCROLLSMultipleChoiceTask (line 232) | class _SCROLLSMultipleChoiceTask(_SCROLLSTask): method __post_init__ (line 233) | def __post_init__(self): method _scrolls_metrics (line 236) | def _scrolls_metrics(self): method aggregation (line 239) | def aggregation(self): method higher_is_better (line 242) | def higher_is_better(self): method process_results (line 245) | def process_results(self, doc, results): method construct_requests (line 259) | def construct_requests( class _SCROLLSSummaryTask (line 277) | class _SCROLLSSummaryTask(_SCROLLSTask): method _process_doc (line 278) | def _process_doc(self, doc): method _scrolls_metrics (line 281) | def _scrolls_metrics(self): method process_results (line 288) | def process_results(self, doc, results): method construct_requests (line 295) | def construct_requests( method doc_to_text (line 306) | def doc_to_text(self, doc): class Qasper (line 310) | class Qasper(_SCROLLSTask): method _process_doc (line 317) | def _process_doc(self, doc): method _scrolls_metrics (line 327) | def _scrolls_metrics(self): method process_results (line 330) | def process_results(self, doc, results): method construct_requests (line 339) | def construct_requests( class QuALITY (line 371) | class QuALITY(_SCROLLSMultipleChoiceTask): method _normalize_answer (line 380) | def _normalize_answer(text): method _process_doc (line 383) | def _process_doc(self, doc): class NarrativeQA (line 399) | class NarrativeQA(_SCROLLSTask): method _process_doc (line 406) | def _process_doc(self, doc): method _scrolls_metrics (line 409) | def _scrolls_metrics(self): method _get_prune_text (line 412) | def _get_prune_text(self, doc): method process_results (line 419) | def process_results(self, doc, results): method construct_requests (line 422) | def construct_requests( class ContractNLI (line 434) | class ContractNLI(_SCROLLSMultipleChoiceTask): method _process_doc (line 442) | def _process_doc(self, doc): method doc_to_text (line 448) | def doc_to_text(self, doc): class GovReport (line 452) | class GovReport(_SCROLLSSummaryTask): class SummScreenFD (line 465) | class SummScreenFD(_SCROLLSSummaryTask): class QMSum (line 473) | class QMSum(_SCROLLSSummaryTask): method _process_doc (line 482) | def _process_doc(self, doc): method doc_to_text (line 485) | def doc_to_text(self, doc): FILE: lm_eval/tasks/simple_cooccurrence_bias/utils.py function process_results (line 6) | def process_results(doc, results): function process_results_gen (line 29) | def process_results_gen(doc, results): FILE: lm_eval/tasks/slr_bench/lm_eval_slr_bench.py function process_results (line 19) | def process_results(doc, results): FILE: lm_eval/tasks/spanish_bench/flores_es/create_yamls_flores_es.py function doc_to_text (line 257) | def doc_to_text(src: str, tgt: str) -> str: function doc_to_target (line 265) | def doc_to_target(tgt: str) -> str: function gen_lang_yamls (line 272) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: function main (line 315) | def main() -> None: FILE: lm_eval/tasks/spanish_bench/utils.py function lowercase_first_letter (line 10) | def lowercase_first_letter(text): function process_doc_nli (line 14) | def process_doc_nli(dataset): function process_xlsum (line 38) | def process_xlsum(dataset): function process_docs_paraphrases (line 48) | def process_docs_paraphrases(dataset): function process_docs_copa_es (line 76) | def process_docs_copa_es(dataset): function rouge1 (line 85) | def rouge1(items): function rouge1_agg (line 92) | def rouge1_agg(items): FILE: lm_eval/tasks/squad_completion/task.py class SQUADCompletion (line 11) | class SQUADCompletion(ConfigurableTask): method __init__ (line 16) | def __init__(self, **kwargs): method has_training_docs (line 19) | def has_training_docs(self): method has_validation_docs (line 22) | def has_validation_docs(self): method has_test_docs (line 25) | def has_test_docs(self): method validation_docs (line 28) | def validation_docs(self): method doc_to_text (line 31) | def doc_to_text(self, doc): method doc_to_target (line 34) | def doc_to_target(self, doc): method construct_requests (line 37) | def construct_requests( method process_results (line 63) | def process_results(self, doc, results): method aggregation (line 78) | def aggregation(self): method higher_is_better (line 88) | def higher_is_better(self): function contains_score (line 99) | def contains_score(prediction: str, labels: List[str]): FILE: lm_eval/tasks/squadv2/task.py function _squad_metric (line 39) | def _squad_metric(predictions, references): function _squad_agg (line 46) | def _squad_agg(key, items): class SQuAD2 (line 52) | class SQuAD2(ConfigurableTask): method __init__ (line 57) | def __init__(self, config=None): method has_training_docs (line 65) | def has_training_docs(self): method has_validation_docs (line 68) | def has_validation_docs(self): method has_test_docs (line 71) | def has_test_docs(self): method training_docs (line 74) | def training_docs(self): method validation_docs (line 77) | def validation_docs(self): method doc_to_text (line 80) | def doc_to_text(self, doc): method should_decontaminate (line 94) | def should_decontaminate(self): method doc_to_decontamination_query (line 97) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 100) | def doc_to_target(self, doc): method construct_requests (line 108) | def construct_requests( method process_results (line 139) | def process_results(self, doc, results): method aggregation (line 197) | def aggregation(self): method higher_is_better (line 230) | def higher_is_better(self): FILE: lm_eval/tasks/super_glue/cb/aggregate.py function cb_multi_fi (line 4) | def cb_multi_fi(items): FILE: lm_eval/tasks/super_glue/cb/t5_utils.py function mean_3class_f1 (line 1) | def mean_3class_f1(predictions, references): # This is a passthrough fu... function agg_mean_3class_f1 (line 11) | def agg_mean_3class_f1(items): FILE: lm_eval/tasks/super_glue/copa/utils.py function convert_choice (line 1) | def convert_choice(choice): function doc_to_text (line 5) | def doc_to_text(doc): function doc_to_target (line 14) | def doc_to_target(doc): function doc_to_choice (line 20) | def doc_to_choice(doc): FILE: lm_eval/tasks/super_glue/multirc/t5_utils.py function f1 (line 6) | def f1(predictions, references): # This is a passthrough function function agg_f1 (line 20) | def agg_f1(items): function em (line 29) | def em(predictions, references): # This is a passthrough function function agg_em (line 43) | def agg_em(items): FILE: lm_eval/tasks/super_glue/record/t5_utils.py function doc_to_text (line 11) | def doc_to_text(doc): function process_docs (line 28) | def process_docs(dataset): function normalize_squad (line 50) | def normalize_squad(answer): function em (line 76) | def em(predictions, references): # This is a passthrough function function f1 (line 80) | def f1(predictions, references): # This is a passthrough function function squad_em_agg (line 84) | def squad_em_agg(items): function squad_f1_agg (line 104) | def squad_f1_agg(items): FILE: lm_eval/tasks/super_glue/record/util.py function doc_to_text (line 8) | def doc_to_text(doc): function format_answer (line 16) | def format_answer(query, entity): function doc_to_target (line 20) | def doc_to_target(doc): function doc_to_choice (line 25) | def doc_to_choice(doc): function process_docs (line 29) | def process_docs(dataset: datasets.Dataset): function process_results (line 41) | def process_results(doc, results): FILE: lm_eval/tasks/super_glue/wsc/preprocess_wsc.py function default_doc_to_text (line 4) | def default_doc_to_text(x): FILE: lm_eval/tasks/super_glue/wsc/t5_utils.py function doc_to_text (line 5) | def doc_to_text(x): function _wsc_inputs (line 10) | def _wsc_inputs(x): function clean (line 80) | def clean(s: str) -> str: function process_results (line 86) | def process_results(docs: dict, resps: List): FILE: lm_eval/tasks/swde/task.py class SWDE (line 10) | class SWDE(ConfigurableTask): method __init__ (line 15) | def __init__(self, **kwargs): method has_training_docs (line 18) | def has_training_docs(self): method has_validation_docs (line 21) | def has_validation_docs(self): method has_test_docs (line 24) | def has_test_docs(self): method validation_docs (line 27) | def validation_docs(self): method doc_to_text (line 30) | def doc_to_text(self, doc): method doc_to_target (line 33) | def doc_to_target(self, doc): method construct_requests (line 36) | def construct_requests( method process_results (line 60) | def process_results(self, doc, results): method aggregation (line 75) | def aggregation(self): method higher_is_better (line 85) | def higher_is_better(self): function contains_score (line 96) | def contains_score(prediction: str, labels: List[str]): FILE: lm_eval/tasks/tinyBenchmarks/agg_functions.py function agg_pirt (line 15) | def agg_pirt(items: List[float], benchmark: str) -> float: function agg_gpirt_arc (line 21) | def agg_gpirt_arc(items: List[float], benchmark: str = "arc") -> float: function agg_gpirt_gsm8k (line 27) | def agg_gpirt_gsm8k(items: List[float], benchmark: str = "gsm8k") -> float: function agg_gpirt_hellaswag (line 33) | def agg_gpirt_hellaswag(items: List[float], benchmark: str = "hellaswag"... function agg_gpirt_mmlu (line 39) | def agg_gpirt_mmlu(items: List[float], benchmark: str = "mmlu") -> float: function agg_gpirt_truthfulqa (line 45) | def agg_gpirt_truthfulqa(items: List[float], benchmark: str = "truthfulq... function agg_gpirt_winogrande (line 51) | def agg_gpirt_winogrande(items: List[float], benchmark: str = "winogrand... FILE: lm_eval/tasks/tinyBenchmarks/utils_hellaswag.py function preprocess (line 9) | def preprocess(text): function process_docs (line 18) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/tinyBenchmarks/utils_truthfulqa.py function process_results_mc2 (line 12) | def process_results_mc2(doc, results): function process_docs_gen (line 25) | def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset: function preprocess_function (line 29) | def preprocess_function(examples): function process_results_gen (line 53) | def process_results_gen(doc, results): function bleu (line 124) | def bleu(refs, preds): function rouge (line 147) | def rouge(refs, preds): FILE: lm_eval/tasks/tinyBenchmarks/utils_winogrande.py function doc_to_text (line 4) | def doc_to_text(doc): function doc_to_target (line 9) | def doc_to_target(doc): function doc_to_choice (line 14) | def doc_to_choice(doc): FILE: lm_eval/tasks/tmlu/default/_generate_configs.py function parse_args (line 79) | def parse_args(): FILE: lm_eval/tasks/tmlu/default/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/tmmluplus/default/_generate_configs.py function parse_args (line 109) | def parse_args(): FILE: lm_eval/tasks/tmmluplus/default/utils.py function process_docs (line 4) | def process_docs(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/toxigen/utils.py function doc_to_target (line 4) | def doc_to_target(doc): FILE: lm_eval/tasks/translation/utils.py function code_to_language (line 35) | def code_to_language(code): function gen_lang_yamls (line 41) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: function main (line 100) | def main() -> None: FILE: lm_eval/tasks/truthfulqa-multi/utils.py function process_results_mc2 (line 18) | def process_results_mc2(doc, results): function process_docs_gen (line 34) | def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset: function preprocess_function (line 38) | def preprocess_function(examples): function process_results_gen (line 80) | def process_results_gen(doc, results): function bleu (line 151) | def bleu(refs, preds): function rouge (line 174) | def rouge(refs, preds): FILE: lm_eval/tasks/truthfulqa/utils.py function process_results_mc2 (line 10) | def process_results_mc2(doc, results): function process_docs_gen (line 27) | def process_docs_gen(dataset: datasets.Dataset) -> datasets.Dataset: function preprocess_function (line 31) | def preprocess_function(examples): function process_results_gen (line 55) | def process_results_gen(doc, results): function bleu (line 126) | def bleu(refs, preds): function rouge (line 149) | def rouge(refs, preds): FILE: lm_eval/tasks/unitxt/task.py function assert_unitxt_installed (line 30) | def assert_unitxt_installed(): function score (line 46) | def score(items, metric): class Unitxt (line 57) | class Unitxt(ConfigurableTask): method __init__ (line 60) | def __init__( method download (line 76) | def download(self, dataset_kwargs: Optional[Dict[str, Any]] = None) ->... method has_training_docs (line 82) | def has_training_docs(self): method has_validation_docs (line 85) | def has_validation_docs(self): method has_test_docs (line 88) | def has_test_docs(self): method training_docs (line 91) | def training_docs(self): method validation_docs (line 94) | def validation_docs(self): method test_docs (line 97) | def test_docs(self): method doc_to_text (line 100) | def doc_to_text(self, doc): method should_decontaminate (line 103) | def should_decontaminate(self): method doc_to_target (line 106) | def doc_to_target(self, doc): method get_arguments (line 109) | def get_arguments(self, doc, ctx): method fewshot_context (line 112) | def fewshot_context(self, doc, **kwargs) -> str: method construct_requests (line 125) | def construct_requests(self, doc, ctx, **kwargs): method process_results (line 148) | def process_results(self, doc, results): method aggregation (line 169) | def aggregation(self): method higher_is_better (line 180) | def higher_is_better(self): function extract_images (line 193) | def extract_images(text, instance): class UnitxtMultiModal (line 206) | class UnitxtMultiModal(Unitxt): method doc_to_text (line 209) | def doc_to_text(self, doc): method doc_to_image (line 212) | def doc_to_image(self, doc): method get_arguments (line 216) | def get_arguments(self, doc, ctx): FILE: lm_eval/tasks/webqs/utils.py function doc_to_choice (line 4) | def doc_to_choice(doc: Dict) -> List[str]: function doc_to_target (line 9) | def doc_to_target(doc: Dict) -> List[int]: function _remove_prefixes (line 15) | def _remove_prefixes(aliases): FILE: lm_eval/tasks/wikitext/preprocess_wikitext.py function wikitext_detokenizer (line 4) | def wikitext_detokenizer(doc): function process_results (line 39) | def process_results(doc, results): FILE: lm_eval/tasks/winogender/utils.py function filter_dataset (line 4) | def filter_dataset(dataset: datasets.Dataset, gender: str) -> datasets.D... function filter_male (line 8) | def filter_male(dataset: datasets.Dataset) -> datasets.Dataset: function filter_female (line 12) | def filter_female(dataset: datasets.Dataset) -> datasets.Dataset: function filter_neutral (line 16) | def filter_neutral(dataset: datasets.Dataset) -> datasets.Dataset: FILE: lm_eval/tasks/winogrande/preprocess_winogrande.py function doc_to_text (line 1) | def doc_to_text(doc): function doc_to_target (line 6) | def doc_to_target(doc): function doc_to_choice (line 11) | def doc_to_choice(doc): FILE: lm_eval/tasks/wmt2016/metrics.py function bleu (line 4) | def bleu(predictions, references): function agg_bleu (line 8) | def agg_bleu(items): FILE: lm_eval/tasks/wsc273/utils.py function process_doc (line 16) | def process_doc(dataset): function __normalize_option (line 27) | def __normalize_option(doc, option): FILE: lm_eval/tasks/xcopa/utils.py function convert_choice (line 4) | def convert_choice(choice): function doc_to_text (line 8) | def doc_to_text(doc, connector): function doc_to_choice (line 14) | def doc_to_choice(doc): FILE: lm_eval/tasks/xnli/utils.py function gen_lang_yamls (line 104) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: function main (line 148) | def main() -> None: FILE: lm_eval/tasks/xquad/utils.py function process_results_qa (line 10) | def process_results_qa(doc, results): FILE: lm_eval/tasks/xwinograd/utils.py function doc_to_text (line 13) | def doc_to_text(doc: Dict) -> int: function doc_to_target (line 25) | def doc_to_target(doc: Dict) -> str: function doc_to_choice (line 36) | def doc_to_choice(doc: Dict) -> List[str]: function gen_lang_yamls (line 43) | def gen_lang_yamls(output_dir: str, overwrite: bool) -> None: function main (line 76) | def main() -> None: FILE: lm_eval/utils.py class _LMEvalFormatter (line 35) | class _LMEvalFormatter(logging.Formatter): method format (line 38) | def format(self, record): function is_torch_available (line 43) | def is_torch_available() -> bool: function is_transformers_available (line 47) | def is_transformers_available() -> bool: function wrap_text (line 51) | def wrap_text(string: str, width: int = 140, **kwargs) -> str | None: function setup_logging (line 68) | def setup_logging(verbosity=logging.INFO): function warning_once (line 111) | def warning_once(logger: logging.Logger, msg: str, *args): function info_once (line 117) | def info_once(logger: logging.Logger, msg: str, *args): function maybe_warn (line 122) | def maybe_warn(msg: str, verbose: bool = True): function hash_string (line 129) | def hash_string(string: str) -> str: function escaped_split (line 133) | def escaped_split(text, sep_char, maxsplit=-1): function handle_arg_string (line 156) | def handle_arg_string(arg): function handle_non_serializable (line 199) | def handle_non_serializable(o): function sanitize_list (line 208) | def sanitize_list(sub): function simple_parse_args_string (line 220) | def simple_parse_args_string(args_string: str | None) -> dict: function join_iters (line 239) | def join_iters(iters): function group (line 244) | def group(arr, fn): function pattern_match (line 255) | def pattern_match(patterns, source_list): function softmax (line 266) | def softmax(x) -> np.ndarray: function general_detokenize (line 272) | def general_detokenize(string) -> str: function get_file_task_name (line 282) | def get_file_task_name(filename: str) -> str: function get_file_datetime (line 289) | def get_file_datetime(filename: str) -> str: function sanitize_model_name (line 296) | def sanitize_model_name(model_name: str) -> str: function sanitize_task_name (line 303) | def sanitize_task_name(task_name: str) -> str: function get_latest_filename (line 310) | def get_latest_filename(filenames: list[str]) -> str: function get_results_filenames (line 317) | def get_results_filenames(filenames: list[str]) -> list[str]: function get_sample_results_filenames (line 324) | def get_sample_results_filenames(filenames: list[str]) -> list[str]: function get_rolling_token_windows (line 331) | def get_rolling_token_windows( function make_disjoint_window (line 374) | def make_disjoint_window( class EnhancedJSONEncoder (line 382) | class EnhancedJSONEncoder(json.JSONEncoder): method default (line 388) | def default(self, o): class Reorderer (line 394) | class Reorderer: method __init__ (line 395) | def __init__(self, arr: list[Any], fn: Callable) -> None: method get_reordered (line 412) | def get_reordered(self): method get_original (line 420) | def get_original(self, newarr): function _build_hierarchy_info (line 442) | def _build_hierarchy_info( function make_table (line 475) | def make_table(result_dict, column: str = "results", sort_results: bool ... function positional_deprecated (line 561) | def positional_deprecated(fn): function ignore_constructor (line 580) | def ignore_constructor(loader, node): function import_function (line 584) | def import_function(loader: yaml.Loader, node, yaml_path: Path): function regex_replace (line 606) | def regex_replace(string, pattern, repl, count: int = 0): function apply_template (line 617) | def apply_template(template: str, doc: dict) -> str: function create_iterator (line 622) | def create_iterator(raw_iterator, *, rank=0, world_size=1, limit=None): function weighted_f1_score (line 631) | def weighted_f1_score(items): function convert_pil_to_hash (line 641) | def convert_pil_to_hash(value): function convert_bytes_to_hash (line 649) | def convert_bytes_to_hash(value): function hash_dict_images (line 653) | def hash_dict_images(data_dict): class RemoteTokenizer (line 697) | class RemoteTokenizer: method __init__ (line 702) | def __init__( method _request_with_retries (line 741) | def _request_with_retries(self, method, url, **kwargs): method _validate_server (line 760) | def _validate_server(self): method tokenizer_info (line 769) | def tokenizer_info(self) -> dict: method eos_token (line 778) | def eos_token(self) -> str | None: method bos_token (line 782) | def bos_token(self) -> str | None: method pad_token (line 786) | def pad_token(self) -> str | None: method eos_token_id (line 790) | def eos_token_id(self) -> int | None: method bos_token_id (line 796) | def bos_token_id(self) -> int | None: method eot_token (line 802) | def eot_token(self) -> int | None: method encode (line 805) | def encode(self, text: str) -> list[int]: method decode (line 814) | def decode(self, tokens: list[int]) -> str: method batch_decode (line 823) | def batch_decode(self, tokens_list: list[list[int]]) -> list[str]: method apply_chat_template (line 826) | def apply_chat_template( method __call__ (line 839) | def __call__(self, text: str, add_special_tokens: bool = False, **kwar... function check_remote_tokenizer_support (line 844) | def check_remote_tokenizer_support( function set_torch_seed (line 910) | def set_torch_seed(seed: int): function random_name_id (line 917) | def random_name_id() -> str: FILE: scripts/build_benchmark.py function parse_args (line 17) | def parse_args(): FILE: scripts/clean_training_data/compress_and_package.py function process_task (line 16) | def process_task( function compress_and_move (line 31) | def compress_and_move(working_directory, output_directory, process_count): FILE: scripts/clean_training_data/generate_13_grams.py function handler (line 46) | def handler(signal_received, frame): function yield_pile (line 51) | def yield_pile(start_offsets=None, checkpoint_offset=None): class Buckets (line 86) | class Buckets: method __init__ (line 87) | def __init__(self, directory, num_buckets): method add_data (line 104) | def add_data(self, key, value): method save_checkpoint (line 109) | def save_checkpoint(self): method close_buckets (line 116) | def close_buckets(self): function do_ngrams_in_buckets (line 121) | def do_ngrams_in_buckets(n_value, working_directory, bucket_count): FILE: scripts/clean_training_data/investigate_pile.py function get_file_stats (line 12) | def get_file_stats(file_path, tqdm_func, global_tqdm): function get_files (line 36) | def get_files(): function get_stats (line 43) | def get_stats(): FILE: scripts/clean_training_data/janitor_util.cpp function is_whitespace (line 9) | bool is_whitespace(char ch) noexcept { function is_punctuation (line 15) | bool is_punctuation(char c) noexcept { function clean_ngram (line 24) | std::vector clean_ngram(std::string const &input, function clean_ngram_with_indices (line 109) | std::vector> function PYBIND11_MODULE (line 194) | PYBIND11_MODULE(janitor_util, m) { FILE: scripts/clean_training_data/process_sorted_buckets.py function process_bucket (line 35) | def process_bucket( function process_sorted_buckets (line 97) | def process_sorted_buckets(working_directory, move_dir, process_count): FILE: scripts/clean_training_data/sort_13_gram_buckets.py function handler (line 28) | def handler(signal_received, frame): function sort_13_gram_buckets (line 33) | def sort_13_gram_buckets(working_directory): FILE: scripts/make_table_results.py function make_table (line 16) | def make_table(result_dict): FILE: scripts/make_table_tasks.py function check (line 17) | def check(tf): FILE: scripts/model_comparator.py function memory_stats (line 19) | def memory_stats(): function calculate_z_value (line 25) | def calculate_z_value(res1: dict, res2: dict) -> tuple[float, float]: function print_results (line 36) | def print_results( function parse_args (line 64) | def parse_args(): FILE: scripts/regression.py function parse_args (line 29) | def parse_args(): function eval_models (line 47) | def eval_models(args, branch=None): function extract_value (line 108) | def extract_value(args, results, model, task, err=False): function format_value (line 126) | def format_value(args, results, model, task): function format_diff (line 132) | def format_diff(args, results1, results2, model, task): function main (line 139) | def main(): FILE: scripts/requests_caching.py function run_model_for_task_caching (line 35) | def run_model_for_task_caching(tasks: list[str], cache_requests: str): function request_caching_arg_to_dict (line 96) | def request_caching_arg_to_dict(cache_requests: str) -> dict: FILE: scripts/write_out.py function parse_args (line 19) | def parse_args(): function main (line 42) | def main(): FILE: scripts/zeno_visualize.py function parse_args (line 22) | def parse_args(): function sanitize_string (line 39) | def sanitize_string(model_args_raw: Union[str, dict]) -> str: function main (line 55) | def main(): function tasks_for_model (line 163) | def tasks_for_model(model: str, data_path: str): function generate_dataset (line 182) | def generate_dataset( function generate_system_df (line 230) | def generate_system_df(data, config): FILE: tests/conftest.py function on_ci (line 13) | def on_ci(): function fewshot_config (line 19) | def fewshot_config(): function task_config (line 30) | def task_config(): function mock_configurable_task (line 48) | def mock_configurable_task(task_config): FILE: tests/models/test_api.py function api (line 10) | def api(): function api_tokenized (line 17) | def api_tokenized(): function api_batch_ssl_tokenized (line 26) | def api_batch_ssl_tokenized(): function test_create_payload_generate (line 36) | def test_create_payload_generate(api): function test_create_payload_loglikelihood (line 57) | def test_create_payload_loglikelihood(api): function test_model_generate_call_usage (line 103) | def test_model_generate_call_usage( function test_model_tokenized_call_usage (line 143) | def test_model_tokenized_call_usage( class DummyAsyncContextManager (line 164) | class DummyAsyncContextManager: method __init__ (line 165) | def __init__(self, result): method __aenter__ (line 168) | async def __aenter__(self): method __aexit__ (line 171) | async def __aexit__(self, exc_type, exc, tb): function test_get_batched_requests_with_no_ssl (line 190) | def test_get_batched_requests_with_no_ssl( function test_local_completionsapi_remote_tokenizer_authenticated (line 231) | def test_local_completionsapi_remote_tokenizer_authenticated(monkeypatch): function test_local_completionsapi_remote_tokenizer_unauthenticated (line 254) | def test_local_completionsapi_remote_tokenizer_unauthenticated(monkeypat... function test_localchatcompletion_remote_tokenizer_authenticated (line 277) | def test_localchatcompletion_remote_tokenizer_authenticated(monkeypatch): function test_localchatcompletion_remote_tokenizer_unauthenticated (line 302) | def test_localchatcompletion_remote_tokenizer_unauthenticated(monkeypatch): FILE: tests/models/test_bos_handling.py function _mock_version (line 27) | def _mock_version(name): class MockModuleFinder (line 36) | class MockModuleFinder: method __init__ (line 39) | def __init__(self, modules): method find_spec (line 42) | def find_spec(self, fullname, path, target=None): method create_module (line 51) | def create_module(self, spec): method exec_module (line 60) | def exec_module(self, module): function pythia_tokenizer (line 80) | def pythia_tokenizer(): function olmo_tokenizer (line 97) | def olmo_tokenizer(): function create_hf_mock (line 118) | def create_hf_mock(tokenizer, add_bos_token, backend="causal"): function create_vllm_mock (line 135) | def create_vllm_mock(tokenizer, add_bos_token): class TestHasBosPrefix (line 152) | class TestHasBosPrefix: method test_none_bos_returns_false (line 155) | def test_none_bos_returns_false(self): method test_detects_single_bos_string (line 160) | def test_detects_single_bos_string(self): method test_detects_multiple_bos_variants (line 166) | def test_detects_multiple_bos_variants(self): class TestAddSpecialKwargs (line 175) | class TestAddSpecialKwargs: method test_explicit_add_special_tokens_takes_precedence (line 178) | def test_explicit_add_special_tokens_takes_precedence(self): method test_falls_back_to_add_bos (line 183) | def test_falls_back_to_add_bos(self): method test_both_none_returns_empty (line 188) | def test_both_none_returns_empty(self): class TestDefaultsToNone (line 198) | class TestDefaultsToNone: method test_huggingface_none_uses_tokenizer_default (line 202) | def test_huggingface_none_uses_tokenizer_default(self, tokenizer_name,... method test_vllm_none_uses_tokenizer_default (line 218) | def test_vllm_none_uses_tokenizer_default(self, tokenizer_name, request): class TestNoDuplicateBos (line 239) | class TestNoDuplicateBos: method test_huggingface_detects_bos_in_single_string (line 243) | def test_huggingface_detects_bos_in_single_string(self, tokenizer_name... method test_huggingface_adds_bos_when_missing (line 267) | def test_huggingface_adds_bos_when_missing(self, tokenizer_name, reque... method test_huggingface_follows_tokenizer_default (line 280) | def test_huggingface_follows_tokenizer_default(self, tokenizer_name, r... method test_vllm_handles_mixed_batch (line 297) | def test_vllm_handles_mixed_batch(self, tokenizer_name, add_bos_token,... method test_vllm_preserves_order_in_mixed_batch (line 337) | def test_vllm_preserves_order_in_mixed_batch( class TestChatTemplateCompatibility (line 376) | class TestChatTemplateCompatibility: method test_huggingface_chat_template_no_duplicate_bos (line 380) | def test_huggingface_chat_template_no_duplicate_bos(self, tokenizer_na... method test_vllm_mixed_chat_batch (line 405) | def test_vllm_mixed_chat_batch(self, tokenizer_name, add_bos_token, re... method test_huggingface_seq2seq_skips_causal_bos_logic (line 453) | def test_huggingface_seq2seq_skips_causal_bos_logic(self, pythia_token... class TestLoglikelihoodBosHandling (line 472) | class TestLoglikelihoodBosHandling: method test_empty_context_continuation_with_bos (line 477) | def test_empty_context_continuation_with_bos( method test_empty_context_continuation_without_bos (line 532) | def test_empty_context_continuation_without_bos( method test_context_with_bos_prefix (line 576) | def test_context_with_bos_prefix(self, tokenizer_name, add_bos_token, ... class TestEdgeCases (line 616) | class TestEdgeCases: method test_explicit_override_takes_precedence (line 619) | def test_explicit_override_takes_precedence(self, pythia_tokenizer): method test_vllm_empty_input (line 630) | def test_vllm_empty_input(self): FILE: tests/models/test_gguf.py function gguf_completion_mock (line 15) | def gguf_completion_mock(base_url=None, **kwargs): class GGUFLMTest (line 92) | class GGUFLMTest(unittest.TestCase): method test_loglikelihood (line 96) | def test_loglikelihood(self, gguf_completion_mock): method test_generate_until (line 118) | def test_generate_until(self, gguf_completion_mock): FILE: tests/models/test_gptqmodel.py function assert_less_than (line 8) | def assert_less_than(value, threshold, desc): class Test_GPTQModel (line 14) | class Test_GPTQModel: method test_gptqmodel (line 18) | def test_gptqmodel(self) -> None: FILE: tests/models/test_hf_steered.py class Test_SteeredModel (line 23) | class Test_SteeredModel: method test_load_with_sae_lens (line 114) | def test_load_with_sae_lens(self) -> None: method test_loglikelihood (line 126) | def test_loglikelihood(self) -> None: method test_generate_until (line 145) | def test_generate_until(self) -> None: method test_loglikelihood_rolling (line 149) | def test_loglikelihood_rolling(self) -> None: method test_toc_encode (line 153) | def test_toc_encode(self) -> None: method test_toc_decode (line 157) | def test_toc_decode(self) -> None: method test_batch_encode (line 161) | def test_batch_encode(self) -> None: method test_model_generate (line 165) | def test_model_generate(self) -> None: FILE: tests/models/test_huggingface.py class Test_HFLM (line 26) | class Test_HFLM: method test_logliklihood (line 110) | def test_logliklihood(self) -> None: method test_generate_until (line 129) | def test_generate_until(self) -> None: method test_logliklihood_rolling (line 133) | def test_logliklihood_rolling(self) -> None: method test_toc_encode (line 137) | def test_toc_encode(self) -> None: method test_toc_decode (line 141) | def test_toc_decode(self) -> None: method test_batch_encode (line 145) | def test_batch_encode(self) -> None: method test_model_generate (line 149) | def test_model_generate(self) -> None: FILE: tests/models/test_model_utils.py class TestTruncateTokens (line 6) | class TestTruncateTokens: method test_left (line 7) | def test_left(self): method test_right (line 11) | def test_right(self): method test_middle (line 15) | def test_middle(self): method test_middle_even (line 20) | def test_middle_even(self): method test_no_truncation_needed (line 25) | def test_no_truncation_needed(self): method test_unknown_strategy (line 29) | def test_unknown_strategy(self): class TestMaybeTruncate (line 35) | class TestMaybeTruncate: method test_case1_no_truncation (line 39) | def test_case1_no_truncation(self): method test_case1_no_truncation_with_adjust (line 47) | def test_case1_no_truncation_with_adjust(self): method test_case2_truncate_prompt_no_adjust (line 56) | def test_case2_truncate_prompt_no_adjust(self): method test_case2_no_adjust_is_default (line 65) | def test_case2_no_adjust_is_default(self): method test_case2_prompt_fits_but_gen_too_large_no_adjust (line 73) | def test_case2_prompt_fits_but_gen_too_large_no_adjust(self): method test_case3_reduce_gen_toks (line 83) | def test_case3_reduce_gen_toks(self): method test_case4_truncate_left (line 92) | def test_case4_truncate_left(self): method test_case4_truncate_right (line 105) | def test_case4_truncate_right(self): method test_case4_truncate_middle (line 118) | def test_case4_truncate_middle(self): method test_case4_default_strategy_is_left (line 132) | def test_case4_default_strategy_is_left(self): method test_min_gen_toks_zero_reduces_to_zero (line 144) | def test_min_gen_toks_zero_reduces_to_zero(self): method test_min_gen_toks_zero_truncates_prompt (line 157) | def test_min_gen_toks_zero_truncates_prompt(self): method test_raises_when_max_len_too_small (line 171) | def test_raises_when_max_len_too_small(self): class TestNormalizeGenKwargs (line 183) | class TestNormalizeGenKwargs: method test_until_string_converted_to_list (line 188) | def test_until_string_converted_to_list(self): method test_until_list_passed_through (line 192) | def test_until_list_passed_through(self): method test_until_missing_defaults_to_empty_list (line 196) | def test_until_missing_defaults_to_empty_list(self): method test_max_gen_toks_used_directly (line 202) | def test_max_gen_toks_used_directly(self): method test_max_new_tokens_converted (line 206) | def test_max_new_tokens_converted(self): method test_max_tokens_converted (line 210) | def test_max_tokens_converted(self): method test_max_completion_tokens_converted (line 214) | def test_max_completion_tokens_converted(self): method test_default_max_gen_toks_when_none_provided (line 218) | def test_default_max_gen_toks_when_none_provided(self): method test_custom_default_max_gen_toks (line 222) | def test_custom_default_max_gen_toks(self): method test_max_token_priority_max_gen_toks_first (line 226) | def test_max_token_priority_max_gen_toks_first(self): method test_max_token_priority_max_new_tokens_second (line 236) | def test_max_token_priority_max_new_tokens_second(self): method test_max_token_priority_max_tokens_third (line 246) | def test_max_token_priority_max_tokens_third(self): method test_do_sample_none_temperature_zero_sets_do_sample_false (line 257) | def test_do_sample_none_temperature_zero_sets_do_sample_false(self): method test_do_sample_none_temperature_positive_sets_do_sample_true (line 261) | def test_do_sample_none_temperature_positive_sets_do_sample_true(self): method test_do_sample_false_sets_temperature_zero (line 265) | def test_do_sample_false_sets_temperature_zero(self): method test_do_sample_false_temperature_positive_forces_temperature_zero (line 269) | def test_do_sample_false_temperature_positive_forces_temperature_zero(... method test_do_sample_true_temperature_positive_preserved (line 273) | def test_do_sample_true_temperature_positive_preserved(self): method test_do_sample_true_temperature_zero_preserved (line 278) | def test_do_sample_true_temperature_zero_preserved(self): method test_extra_kwargs_passed_through (line 285) | def test_extra_kwargs_passed_through(self): method test_original_dict_not_mutated (line 297) | def test_original_dict_not_mutated(self): FILE: tests/models/test_openvino.py function test_evaluator (line 33) | def test_evaluator(backend, model_id, task): function test_ov_config (line 89) | def test_ov_config(): FILE: tests/models/test_sglang.py class Test_SGlang (line 15) | class Test_SGlang: method setup_class (line 31) | def setup_class(cls): method test_logliklihood (line 45) | def test_logliklihood(self) -> None: method test_generate_until (line 51) | def test_generate_until(self) -> None: method test_logliklihood_rolling (line 58) | def test_logliklihood_rolling(self) -> None: method test_evaluator (line 88) | def test_evaluator(self) -> None: FILE: tests/models/test_vllm.py class Test_VLLM (line 11) | class Test_VLLM: method test_logliklihood (line 33) | def test_logliklihood(self) -> None: method test_generate_until (line 39) | def test_generate_until(self) -> None: method test_logliklihood_rolling (line 45) | def test_logliklihood_rolling(self) -> None: FILE: tests/models/test_vllm_context_length.py class TestVLLMContextLength (line 19) | class TestVLLMContextLength: method test_loglikelihood_tokens_truncates_to_max_length_minus_one (line 22) | def test_loglikelihood_tokens_truncates_to_max_length_minus_one(self) ... method test_loglikelihood_tokens_no_truncation_when_within_limit (line 65) | def test_loglikelihood_tokens_no_truncation_when_within_limit(self) ->... method test_loglikelihood_tokens_truncates_at_exactly_max_length (line 102) | def test_loglikelihood_tokens_truncates_at_exactly_max_length(self) ->... method test_loglikelihood_tokens_boundary_at_max_length_minus_one (line 139) | def test_loglikelihood_tokens_boundary_at_max_length_minus_one(self) -... method test_loglikelihood_rolling_uses_max_length_minus_two (line 176) | def test_loglikelihood_rolling_uses_max_length_minus_two(self) -> None: FILE: tests/scripts/test_zeno_visualize.py function test_zeno_sanitize_string (line 11) | def test_zeno_sanitize_string(): FILE: tests/test_aggregation_pipeline.py function _m (line 31) | def _m(d: dict[str, Any]) -> _TaskMetrics: class MockTask (line 36) | class MockTask(Task): method __init__ (line 41) | def __init__( method task_name (line 54) | def task_name(self): method dump_config (line 57) | def dump_config(self) -> dict: method aggregation (line 60) | def aggregation(self): method higher_is_better (line 63) | def higher_is_better(self): method eval_docs (line 67) | def eval_docs(self): method has_training_docs (line 71) | def has_training_docs(self): method has_validation_docs (line 74) | def has_validation_docs(self): method has_test_docs (line 77) | def has_test_docs(self): method test_docs (line 80) | def test_docs(self): method doc_to_text (line 83) | def doc_to_text(self, doc): method doc_to_target (line 86) | def doc_to_target(self, doc): method construct_requests (line 89) | def construct_requests(self, doc, ctx, **kwargs): method process_results (line 92) | def process_results(self, doc, results): function _make_acc (line 96) | def _make_acc( class TestTaskToGroupPipeline (line 112) | class TestTaskToGroupPipeline: method test_single_task_single_group (line 115) | def test_single_task_single_group(self): method test_two_tasks_weighted_group (line 132) | def test_two_tasks_weighted_group(self): method test_two_tasks_unweighted_group (line 156) | def test_two_tasks_unweighted_group(self): method test_sample_len_is_total_not_per_filter (line 177) | def test_sample_len_is_total_not_per_filter(self): method test_multiple_metrics_sample_count (line 211) | def test_multiple_metrics_sample_count(self): class TestNestedGroupPipeline (line 248) | class TestNestedGroupPipeline: method test_two_level_hierarchy (line 251) | def test_two_level_hierarchy(self): method test_parent_with_mixed_children (line 285) | def test_parent_with_mixed_children(self): class TestGroupStderrPipeline (line 319) | class TestGroupStderrPipeline: method test_group_stderr_aggregated (line 322) | def test_group_stderr_aggregated(self): method test_group_stderr_na_when_task_has_single_sample (line 346) | def test_group_stderr_na_when_task_has_single_sample(self): class TestGroupAggregationWarnings (line 360) | class TestGroupAggregationWarnings: method test_warns_when_metric_missing_in_some_tasks (line 363) | def test_warns_when_metric_missing_in_some_tasks(self, caplog): method test_warns_when_metric_missing_in_all_tasks (line 400) | def test_warns_when_metric_missing_in_all_tasks(self, caplog): method test_no_warning_when_all_tasks_have_metric (line 423) | def test_no_warning_when_all_tasks_have_metric(self, caplog): FILE: tests/test_cli_subcommands.py class TestHarnessCLI (line 20) | class TestHarnessCLI: method test_harness_cli_init (line 23) | def test_harness_cli_init(self): method test_harness_cli_has_subcommands (line 29) | def test_harness_cli_has_subcommands(self): method test_harness_cli_backward_compatibility (line 37) | def test_harness_cli_backward_compatibility(self): method test_harness_cli_help_default (line 47) | def test_harness_cli_help_default(self): method test_harness_cli_run_help_only (line 58) | def test_harness_cli_run_help_only(self): class TestListCommand (line 65) | class TestListCommand: method test_list_command_creation (line 68) | def test_list_command_creation(self): method test_list_command_arguments (line 75) | def test_list_command_arguments(self): method test_list_command_choices (line 90) | def test_list_command_choices(self): method test_list_command_execute_tasks (line 106) | def test_list_command_execute_tasks(self, mock_task_manager): method test_list_command_execute_groups (line 123) | def test_list_command_execute_groups(self, mock_task_manager): class TestRunCommand (line 142) | class TestRunCommand: method test_run_command_creation (line 145) | def test_run_command_creation(self): method test_run_command_basic_arguments (line 152) | def test_run_command_basic_arguments(self): method test_run_command_tasks_comma_separated (line 164) | def test_run_command_tasks_comma_separated(self): method test_run_command_tasks_mixed_format (line 175) | def test_run_command_tasks_mixed_format(self): method test_run_command_tasks_None (line 185) | def test_run_command_tasks_None(self): method test_run_command_model_args (line 194) | def test_run_command_model_args(self): method test_run_command_batch_size (line 210) | def test_run_command_batch_size(self): method test_run_command_seed_parsing (line 228) | def test_run_command_seed_parsing(self): method test_run_command_execute_basic (line 250) | def test_run_command_execute_basic( class TestValidateCommand (line 299) | class TestValidateCommand: method test_validate_command_creation (line 302) | def test_validate_command_creation(self): method test_validate_command_arguments (line 309) | def test_validate_command_arguments(self): method test_validate_command_requires_tasks (line 325) | def test_validate_command_requires_tasks(self): method test_validate_command_execute_success (line 335) | def test_validate_command_execute_success(self, mock_task_manager): method test_validate_command_execute_missing_tasks (line 354) | def test_validate_command_execute_missing_tasks(self, mock_task_manager): class TestEvaluatorConfigTaskLoading (line 376) | class TestEvaluatorConfigTaskLoading: method test_process_tasks_comma_separated_in_list (line 380) | def test_process_tasks_comma_separated_in_list(self, mock_task_manager): method test_process_tasks_mixed_comma_and_space_separated (line 400) | def test_process_tasks_mixed_comma_and_space_separated(self, mock_task... method test_process_tasks_string_comma_separated (line 421) | def test_process_tasks_string_comma_separated(self, mock_task_manager): method test_custom_yaml_file_relative_path (line 439) | def test_custom_yaml_file_relative_path(self, tmp_path): method test_missing_yaml_file_raises_error (line 466) | def test_missing_yaml_file_raises_error(self, tmp_path): class TestEvaluatorConfigFromCLI (line 480) | class TestEvaluatorConfigFromCLI: method test_defaults_applied (line 483) | def test_defaults_applied(self, tmp_path): method test_cli_args_override_defaults (line 508) | def test_cli_args_override_defaults(self, tmp_path): method test_model_args_dict_passed_through (line 531) | def test_model_args_dict_passed_through(self, tmp_path): method test_gen_kwargs_passed_through (line 549) | def test_gen_kwargs_passed_through(self, tmp_path): method test_none_args_use_defaults (line 567) | def test_none_args_use_defaults(self, tmp_path): method test_fewshot_as_multiturn_defaults_with_chat_template (line 586) | def test_fewshot_as_multiturn_defaults_with_chat_template(self, tmp_pa... method test_empty_tasks_allowed_at_config_level (line 603) | def test_empty_tasks_allowed_at_config_level(self): method test_validation_error_log_samples_without_output (line 615) | def test_validation_error_log_samples_without_output(self): class TestCLIUtils (line 633) | class TestCLIUtils: method test_try_parse_json_with_json_string (line 636) | def test_try_parse_json_with_json_string(self): method test_try_parse_json_with_dict (line 641) | def test_try_parse_json_with_dict(self): method test_try_parse_json_with_none (line 647) | def test_try_parse_json_with_none(self): method test_try_parse_json_with_plain_string (line 652) | def test_try_parse_json_with_plain_string(self): method test_try_parse_json_with_invalid_json (line 657) | def test_try_parse_json_with_invalid_json(self): method test_int_or_none_list_single_value (line 664) | def test_int_or_none_list_single_value(self): method test_int_or_none_list_multiple_values (line 669) | def test_int_or_none_list_multiple_values(self): method test_int_or_none_list_with_none (line 674) | def test_int_or_none_list_with_none(self): method test_int_or_none_list_invalid_value (line 679) | def test_int_or_none_list_invalid_value(self): method test_int_or_none_list_too_few_values (line 684) | def test_int_or_none_list_too_few_values(self): method test_int_or_none_list_too_many_values (line 689) | def test_int_or_none_list_too_many_values(self): method test_request_caching_arg_to_dict_none (line 694) | def test_request_caching_arg_to_dict_none(self): method test_request_caching_arg_to_dict_true (line 699) | def test_request_caching_arg_to_dict_true(self): method test_request_caching_arg_to_dict_refresh (line 708) | def test_request_caching_arg_to_dict_refresh(self): method test_request_caching_arg_to_dict_delete (line 717) | def test_request_caching_arg_to_dict_delete(self): method test_request_caching_arg_to_dict_invalid (line 726) | def test_request_caching_arg_to_dict_invalid(self): method test_cache_requests_argparse_integration (line 731) | def test_cache_requests_argparse_integration(self): method test_check_argument_types_raises_on_untyped (line 748) | def test_check_argument_types_raises_on_untyped(self): method test_check_argument_types_passes_on_typed (line 758) | def test_check_argument_types_passes_on_typed(self): method test_check_argument_types_skips_const_actions (line 766) | def test_check_argument_types_skips_const_actions(self): class TestMergeDictAction (line 775) | class TestMergeDictAction: method test_comma_separated_key_value (line 778) | def test_comma_separated_key_value(self): method test_space_separated_key_value (line 786) | def test_space_separated_key_value(self): method test_json_dict_input (line 794) | def test_json_dict_input(self): method test_json_nested_dict (line 802) | def test_json_nested_dict(self): method test_empty_values (line 810) | def test_empty_values(self): method test_type_coercion (line 818) | def test_type_coercion(self): method test_multiple_invocations_merge (line 828) | def test_multiple_invocations_merge(self): method test_key_overwrite (line 836) | def test_key_overwrite(self): class TestEvaluatorConfigPrecedence (line 845) | class TestEvaluatorConfigPrecedence: method test_cli_overrides_yaml_overrides_defaults (line 848) | def test_cli_overrides_yaml_overrides_defaults(self, tmp_path): method test_yaml_overrides_defaults (line 895) | def test_yaml_overrides_defaults(self, tmp_path): method test_cli_overrides_yaml_with_explicit_zero (line 923) | def test_cli_overrides_yaml_with_explicit_zero(self, tmp_path): FILE: tests/test_evaluator.py function test_evaluator (line 37) | def test_evaluator( function test_printed_results (line 114) | def test_printed_results( FILE: tests/test_evaluator_utils.py function _m (line 32) | def _m(d: dict[str, Any]) -> _TaskMetrics: class MockEvalTask (line 37) | class MockEvalTask(Task): method __init__ (line 42) | def __init__( method task_name (line 58) | def task_name(self): method dump_config (line 62) | def dump_config(self) -> dict: method aggregation (line 65) | def aggregation(self): method higher_is_better (line 68) | def higher_is_better(self): method eval_docs (line 73) | def eval_docs(self): method has_training_docs (line 77) | def has_training_docs(self): method has_validation_docs (line 80) | def has_validation_docs(self): method has_test_docs (line 83) | def has_test_docs(self): method test_docs (line 86) | def test_docs(self): method doc_to_text (line 89) | def doc_to_text(self, doc): method doc_to_target (line 92) | def doc_to_target(self, doc): method construct_requests (line 95) | def construct_requests(self, doc, ctx, **kwargs): method process_results (line 98) | def process_results(self, doc, results): function make_result_acc (line 102) | def make_result_acc( class TestEvalResults (line 120) | class TestEvalResults: method test_default_fields_are_empty (line 121) | def test_default_fields_are_empty(self): method test_fields_are_independent_instances (line 132) | def test_fields_are_independent_instances(self): class TestGetSampleSize (line 144) | class TestGetSampleSize: method _task (line 145) | def _task(self, n: int = 100): method test_limit_none_returns_none (line 148) | def test_limit_none_returns_none(self): method test_limit_integer_returns_int (line 151) | def test_limit_integer_returns_int(self): method test_limit_fractional_rounds_up (line 154) | def test_limit_fractional_rounds_up(self): method test_limit_fractional_small (line 158) | def test_limit_fractional_small(self): method test_limit_one_is_treated_as_integer (line 162) | def test_limit_one_is_treated_as_integer(self): method test_limit_float_exactly_one_is_integer (line 166) | def test_limit_float_exactly_one_is_integer(self): class TestComputeTaskAggregations (line 178) | class TestComputeTaskAggregations: method _task (line 179) | def _task(self, agg=None): method test_single_metric_mean_aggregation (line 182) | def test_single_metric_mean_aggregation(self): method test_stderr_with_bootstrap_iters_zero (line 189) | def test_stderr_with_bootstrap_iters_zero(self): method test_stderr_with_bootstrap_iters_none (line 195) | def test_stderr_with_bootstrap_iters_none(self): method test_stderr_with_positive_bootstrap_iters (line 201) | def test_stderr_with_positive_bootstrap_iters(self): method test_stderr_na_for_single_sample (line 207) | def test_stderr_na_for_single_sample(self): method test_fallback_to_mean_for_unknown_metric (line 214) | def test_fallback_to_mean_for_unknown_metric(self): method test_multiple_metrics_and_filters (line 221) | def test_multiple_metrics_and_filters(self): method test_bleu_metric_bootstrap_cap (line 233) | def test_bleu_metric_bootstrap_cap(self): class TestCollectResults (line 246) | class TestCollectResults: method _simple_acc (line 247) | def _simple_acc(self): method test_single_task_basic_collection (line 257) | def test_single_task_basic_collection(self): method test_alias_from_task_config (line 266) | def test_alias_from_task_config(self): method test_alias_defaults_to_task_name (line 271) | def test_alias_defaults_to_task_name(self): method test_configs_populated (line 278) | def test_configs_populated(self): method test_versions_populated (line 283) | def test_versions_populated(self): method test_num_fewshot_populated (line 288) | def test_num_fewshot_populated(self): method test_higher_is_better_populated (line 293) | def test_higher_is_better_populated(self): method test_samples_populated (line 298) | def test_samples_populated(self): method test_groups_stored (line 303) | def test_groups_stored(self): method test_groups_default_to_empty (line 311) | def test_groups_default_to_empty(self): method test_multiple_tasks (line 316) | def test_multiple_tasks(self): class TestGetRootGroups (line 335) | class TestGetRootGroups: method test_single_root_group (line 336) | def test_single_root_group(self): method test_root_excludes_children (line 341) | def test_root_excludes_children(self): method test_multiple_independent_roots (line 348) | def test_multiple_independent_roots(self): method test_empty_groups (line 355) | def test_empty_groups(self): method test_deep_hierarchy (line 358) | def test_deep_hierarchy(self): method test_deep_hierarchy_multiple_roots (line 367) | def test_deep_hierarchy_multiple_roots(self): class TestCollectGroupsBottomUp (line 399) | class TestCollectGroupsBottomUp: method test_single_group_no_children (line 400) | def test_single_group_no_children(self): method test_parent_child_order (line 405) | def test_parent_child_order(self): method test_deep_hierarchy_order (line 413) | def test_deep_hierarchy_order(self): method test_no_duplicates (line 423) | def test_no_duplicates(self): method test_empty_groups (line 434) | def test_empty_groups(self): class TestAggregateGroups (line 443) | class TestAggregateGroups: method test_group_metrics_added_to_results (line 444) | def test_group_metrics_added_to_results(self): method test_no_groups_noop (line 467) | def test_no_groups_noop(self): method test_bottom_up_aggregation (line 475) | def test_bottom_up_aggregation(self): class TestProcessResults (line 512) | class TestProcessResults: method _basic_acc (line 513) | def _basic_acc(self): method test_returns_eval_results (line 517) | def test_returns_eval_results(self): method test_with_groups (line 521) | def test_with_groups(self): method test_without_groups (line 529) | def test_without_groups(self): class TestGetResultsData (line 540) | class TestGetResultsData: method test_preserves_sample_len (line 541) | def test_preserves_sample_len(self): method test_alias_not_indented (line 547) | def test_alias_not_indented(self): method test_group_with_aggregation_in_group_results (line 571) | def test_group_with_aggregation_in_group_results(self): method test_group_without_aggregation_not_in_group_results (line 585) | def test_group_without_aggregation_not_in_group_results(self): method test_task_only_in_task_results (line 598) | def test_task_only_in_task_results(self): class TestPropagateHigherIsBetter (line 611) | class TestPropagateHigherIsBetter: method test_propagation_from_children (line 612) | def test_propagation_from_children(self): method test_conflicting_values_set_to_none (line 620) | def test_conflicting_values_set_to_none(self): method test_conflicting_values_log_warning (line 630) | def test_conflicting_values_log_warning(self, caplog): method test_no_children_in_higher_is_better (line 641) | def test_no_children_in_higher_is_better(self): method test_multiple_metrics_mixed (line 650) | def test_multiple_metrics_mixed(self): method test_empty_groups_list (line 664) | def test_empty_groups_list(self): class TestToEvalResults (line 676) | class TestToEvalResults: method _make_eval_acc (line 679) | def _make_eval_acc(self, *, with_group: bool = False, has_aggregation:... method test_output_has_required_keys (line 704) | def test_output_has_required_keys(self): method test_results_contain_task_metrics (line 718) | def test_results_contain_task_metrics(self): method test_n_samples_effective_from_sample_len (line 724) | def test_n_samples_effective_from_sample_len(self): method test_groups_key_present_when_group_has_aggregation (line 732) | def test_groups_key_present_when_group_has_aggregation(self): method test_groups_key_absent_when_no_group_has_aggregation (line 738) | def test_groups_key_absent_when_no_group_has_aggregation(self): method test_groups_key_absent_when_no_groups (line 743) | def test_groups_key_absent_when_no_groups(self): method test_samples_included_when_provided (line 748) | def test_samples_included_when_provided(self): method test_samples_absent_when_not_provided (line 754) | def test_samples_absent_when_not_provided(self): method test_higher_is_better_propagated_to_groups (line 759) | def test_higher_is_better_propagated_to_groups(self): method test_configs_sorted (line 765) | def test_configs_sorted(self): method test_versions_sorted (line 770) | def test_versions_sorted(self): class TestCollectResultsNSamples (line 781) | class TestCollectResultsNSamples: method test_n_samples_effective_equals_sample_len (line 784) | def test_n_samples_effective_equals_sample_len(self): method test_n_samples_original_from_eval_docs (line 795) | def test_n_samples_original_from_eval_docs(self): FILE: tests/test_fewshot_context.py function default_delimiters (line 19) | def default_delimiters(): class TestMessage (line 29) | class TestMessage: method test_to_dict_excludes_private_fields (line 32) | def test_to_dict_excludes_private_fields(self): method test_to_text_appends_delimiter (line 41) | def test_to_text_appends_delimiter(self): method test_to_text_empty_delimiter (line 49) | def test_to_text_empty_delimiter(self): class TestMaybeDelimit (line 63) | class TestMaybeDelimit: method test_both_present_no_whitespace (line 66) | def test_both_present_no_whitespace(self): method test_prefix_ends_with_space (line 72) | def test_prefix_ends_with_space(self): method test_suffix_starts_with_space (line 78) | def test_suffix_starts_with_space(self): method test_both_have_whitespace (line 84) | def test_both_have_whitespace(self): method test_prefix_only (line 90) | def test_prefix_only(self): method test_suffix_only (line 95) | def test_suffix_only(self): method test_both_empty (line 100) | def test_both_empty(self): method test_custom_delimiter (line 105) | def test_custom_delimiter(self): class TestMultiturnToSingleturn (line 117) | class TestMultiturnToSingleturn: method test_collapses_user_messages (line 120) | def test_collapses_user_messages(self): method test_preserves_final_assistant (line 134) | def test_preserves_final_assistant(self): method test_preserves_system_message (line 150) | def test_preserves_system_message(self): method test_system_with_assistant_ending (line 164) | def test_system_with_assistant_ending(self): function messages_to_text (line 185) | def messages_to_text(msgs: list[Message]) -> str: class TestBuildQaTurn (line 190) | class TestBuildQaTurn: method task (line 194) | def task(self): method test_basic_qa_format (line 198) | def test_basic_qa_format(self, task): method test_no_answer_format (line 211) | def test_no_answer_format(self, task): method test_choice_with_int_answer (line 220) | def test_choice_with_int_answer(self, task): method test_answer_as_string_directly (line 235) | def test_answer_as_string_directly(self, task): method test_answer_as_list (line 244) | def test_answer_as_list(self, task): method test_gen_prefix_without_answer (line 252) | def test_gen_prefix_without_answer(self, task): method test_gen_prefix_with_answer (line 264) | def test_gen_prefix_with_answer(self, task): method test_gen_prefix_spacing_added_when_needed (line 282) | def test_gen_prefix_spacing_added_when_needed(self, task): method test_gen_prefix_no_extra_space_when_prefix_has_trailing (line 290) | def test_gen_prefix_no_extra_space_when_prefix_has_trailing(self, task): method test_gen_prefix_no_extra_space_when_answer_has_leading (line 298) | def test_gen_prefix_no_extra_space_when_answer_has_leading(self, task): method test_gen_prefix_without_answer_preserves_content (line 306) | def test_gen_prefix_without_answer_preserves_content(self, task): method test_gen_prefix_with_trailing_space_without_answer (line 319) | def test_gen_prefix_with_trailing_space_without_answer(self, task): method test_custom_delimiters (line 327) | def test_custom_delimiters(self, task): method test_empty_delimiters (line 335) | def test_empty_delimiters(self, task): method test_whitespace_delimiter_matrix (line 343) | def test_whitespace_delimiter_matrix(self, task): method test_raises_on_non_string_question (line 419) | def test_raises_on_non_string_question(self, task): method test_answer_index_zero_uses_delimiter (line 424) | def test_answer_index_zero_uses_delimiter(self, task): method test_answer_index_nonzero_uses_delimiter (line 443) | def test_answer_index_nonzero_uses_delimiter(self, task): class TestFewshotContext (line 460) | class TestFewshotContext: method test_zero_shot_format (line 463) | def test_zero_shot_format(self, mock_configurable_task): method test_one_shot_format (line 476) | def test_one_shot_format(self, mock_configurable_task): method test_two_shot_format (line 491) | def test_two_shot_format(self, mock_configurable_task): method test_with_system_instruction (line 509) | def test_with_system_instruction(self, mock_configurable_task): method test_with_description (line 525) | def test_with_description(self, mock_configurable_task): method test_system_instruction_and_description (line 539) | def test_system_instruction_and_description(self, mock_configurable_ta... method test_with_choices (line 557) | def test_with_choices(self, mock_configurable_task): method test_custom_delimiters (line 582) | def test_custom_delimiters(self, mock_configurable_task): method test_gen_prefix_in_fewshot (line 601) | def test_gen_prefix_in_fewshot(self, mock_configurable_task): method test_sampler_excludes_eval_doc_when_same_split (line 622) | def test_sampler_excludes_eval_doc_when_same_split(self, mock_configur... method test_sampler_no_exclusion_when_different_split (line 640) | def test_sampler_no_exclusion_when_different_split(self, mock_configur... method test_chat_template_multiturn (line 658) | def test_chat_template_multiturn(self, mock_configurable_task): method test_chat_template_singleturn (line 688) | def test_chat_template_singleturn(self, mock_configurable_task): class TestChatTemplateFormat (line 722) | class TestChatTemplateFormat: method test_messages_to_dict_list (line 725) | def test_messages_to_dict_list(self): method test_singleturn_collapse_for_chat (line 741) | def test_singleturn_collapse_for_chat(self): FILE: tests/test_group.py class MockTask (line 18) | class MockTask(Task): method __init__ (line 23) | def __init__(self, task_name: str): method task_name (line 27) | def task_name(self): method has_training_docs (line 30) | def has_training_docs(self): method has_validation_docs (line 33) | def has_validation_docs(self): method has_test_docs (line 36) | def has_test_docs(self): method test_docs (line 39) | def test_docs(self): method doc_to_text (line 42) | def doc_to_text(self, doc): method doc_to_target (line 45) | def doc_to_target(self, doc): method construct_requests (line 48) | def construct_requests(self, doc, ctx, **kwargs): method process_results (line 51) | def process_results(self, doc, results): method aggregation (line 54) | def aggregation(self): method higher_is_better (line 57) | def higher_is_better(self): class TestAggMetricConfig (line 61) | class TestAggMetricConfig: method test_default_filter_list_is_none (line 64) | def test_default_filter_list_is_none(self): method test_explicit_filter_list (line 69) | def test_explicit_filter_list(self): method test_string_filter_normalized_to_list (line 74) | def test_string_filter_normalized_to_list(self): method test_empty_filter_list (line 80) | def test_empty_filter_list(self): method test_multiple_filters (line 85) | def test_multiple_filters(self): method test_default_aggregation_is_mean (line 90) | def test_default_aggregation_is_mean(self): method test_default_weight_by_size_is_true (line 95) | def test_default_weight_by_size_is_true(self): class TestGroupFilterDiscovery (line 101) | class TestGroupFilterDiscovery: method setup_method (line 104) | def setup_method(self): method test_discover_filters_single_filter (line 132) | def test_discover_filters_single_filter(self): method test_discover_filters_multiple_filters (line 147) | def test_discover_filters_multiple_filters(self): method test_discover_filters_no_matches (line 156) | def test_discover_filters_no_matches(self): method test_discover_filters_excludes_stderr (line 166) | def test_discover_filters_excludes_stderr(self): method test_discover_filters_partial_availability (line 185) | def test_discover_filters_partial_availability(self): class TestGroupAggregation (line 196) | class TestGroupAggregation: method setup_method (line 199) | def setup_method(self): method test_auto_discovery_aggregates_all_filters (line 225) | def test_auto_discovery_aggregates_all_filters(self): method test_explicit_filter_list_backward_compatibility (line 253) | def test_explicit_filter_list_backward_compatibility(self): method test_multiple_explicit_filters (line 271) | def test_multiple_explicit_filters(self): method test_empty_filter_list_no_aggregation (line 289) | def test_empty_filter_list_no_aggregation(self): method test_multiple_metrics_auto_discovery (line 308) | def test_multiple_metrics_auto_discovery(self): method test_mixed_auto_and_explicit_filters (line 344) | def test_mixed_auto_and_explicit_filters(self): method test_stderr_aggregation_with_auto_discovery (line 379) | def test_stderr_aggregation_with_auto_discovery(self): method test_sample_len_count_with_auto_discovery (line 404) | def test_sample_len_count_with_auto_discovery(self): method test_sample_count_per_metric_with_asymmetric_filters (line 426) | def test_sample_count_per_metric_with_asymmetric_filters(self): class TestGroupWeightedAggregation (line 441) | class TestGroupWeightedAggregation: method test_weighted_aggregation_auto_discovery (line 444) | def test_weighted_aggregation_auto_discovery(self): class TestGroupEdgeCases (line 490) | class TestGroupEdgeCases: method test_no_aggregation_config (line 493) | def test_no_aggregation_config(self): method test_task_not_in_metrics (line 504) | def test_task_not_in_metrics(self): method test_metric_missing_in_some_tasks (line 528) | def test_metric_missing_in_some_tasks(self, caplog): class TestGroup (line 573) | class TestGroup: method setup_method (line 576) | def setup_method(self): method test_add_task_uses_task_name (line 582) | def test_add_task_uses_task_name(self): method test_add_group_uses_name (line 588) | def test_add_group_uses_name(self): method test_pop_existing_child (line 595) | def test_pop_existing_child(self): method test_pop_nonexistent_child_no_error (line 602) | def test_pop_nonexistent_child_no_error(self): method test_get_existing (line 607) | def test_get_existing(self): method test_get_missing_returns_none (line 612) | def test_get_missing_returns_none(self): method test_contains_present (line 618) | def test_contains_present(self): method test_contains_absent (line 623) | def test_contains_absent(self): method test_iter_yields_child_values (line 629) | def test_iter_yields_child_values(self): method test_len (line 640) | def test_len(self): method test_get_all_tasks_recursive (line 650) | def test_get_all_tasks_recursive(self): method test_get_all_tasks_non_recursive (line 662) | def test_get_all_tasks_non_recursive(self): method test_get_all_groups_recursive (line 674) | def test_get_all_groups_recursive(self): method test_get_all_groups_non_recursive (line 686) | def test_get_all_groups_non_recursive(self): method test_child_names_returns_keys (line 698) | def test_child_names_returns_keys(self): method test_has_aggregation_true (line 706) | def test_has_aggregation_true(self): method test_has_aggregation_false_none (line 713) | def test_has_aggregation_false_none(self): method test_has_aggregation_false_empty (line 717) | def test_has_aggregation_false_empty(self): method test_repr (line 723) | def test_repr(self): class TestGroupSerialization (line 732) | class TestGroupSerialization: method test_to_dict_round_trip (line 735) | def test_to_dict_round_trip(self): method test_from_config_basic (line 753) | def test_from_config_basic(self): method test_from_config_single_dict_agg_metric (line 775) | def test_from_config_single_dict_agg_metric(self): method test_from_config_missing_group_key (line 786) | def test_from_config_missing_group_key(self): method test_to_dict_no_optional_fields (line 792) | def test_to_dict_no_optional_fields(self): class TestAggMetricConfigValidation (line 803) | class TestAggMetricConfigValidation: method test_invalid_aggregation_raises (line 806) | def test_invalid_aggregation_raises(self): method test_callable_aggregation_allowed (line 810) | def test_callable_aggregation_allowed(self): FILE: tests/test_janitor.py function simple_ngram (line 39) | def simple_ngram(sequence, n): function test_form_ngrams (line 51) | def test_form_ngrams(): function test_word_ngrams (line 62) | def test_word_ngrams(): function test_split_indices (line 76) | def test_split_indices(): function test_word_ngrams_indices (line 106) | def test_word_ngrams_indices(): function test_janitor1 (line 142) | def test_janitor1(): function test_janitor2 (line 185) | def test_janitor2(): function test_janitor3 (line 222) | def test_janitor3(): function test_janitor4 (line 256) | def test_janitor4(): function test_janitor5 (line 298) | def test_janitor5(): function test_janitor6 (line 342) | def test_janitor6(): function test_janitor7 (line 394) | def test_janitor7(): function test_janitor8 (line 449) | def test_janitor8(): FILE: tests/test_metrics.py class MockConfigurableTask (line 8) | class MockConfigurableTask(ConfigurableTask): method __init__ (line 11) | def __init__(self): method doc_to_choice (line 36) | def doc_to_choice(self, doc): method doc_to_target (line 39) | def doc_to_target(self, doc): method has_training_docs (line 43) | def has_training_docs(self): method has_validation_docs (line 46) | def has_validation_docs(self): method has_test_docs (line 49) | def has_test_docs(self): method download (line 52) | def download(self, **kwargs): function test_acc_mutual_info_slicing (line 56) | def test_acc_mutual_info_slicing(): function test_acc_mutual_info_different_predictions (line 93) | def test_acc_mutual_info_different_predictions(): function test_acc_mutual_info_without_metric (line 127) | def test_acc_mutual_info_without_metric(): function test_bootstrap_internal_no_mp (line 156) | def test_bootstrap_internal_no_mp(): function test_dict_metric_uses_custom_aggregation (line 181) | def test_dict_metric_uses_custom_aggregation(): FILE: tests/test_misc.py function test_bootstrapping (line 8) | def test_bootstrapping(): FILE: tests/test_prompt.py function test_mmlu_prompt_rendering (line 196) | def test_mmlu_prompt_rendering( FILE: tests/test_registry.py class TestRegistryBasics (line 27) | class TestRegistryBasics: method test_create_registry (line 30) | def test_create_registry(self): method test_decorator_registration (line 36) | def test_decorator_registration(self): method test_decorator_multiple_aliases (line 48) | def test_decorator_multiple_aliases(self): method test_decorator_auto_name (line 60) | def test_decorator_auto_name(self): method test_lazy_registration (line 70) | def test_lazy_registration(self): method test_unknown_key_error (line 87) | def test_unknown_key_error(self): method test_default_value (line 96) | def test_default_value(self): method test_iteration (line 104) | def test_iteration(self): method test_contains (line 115) | def test_contains(self): method test_keys_values_items (line 123) | def test_keys_values_items(self): class TestRegistryCollisions (line 134) | class TestRegistryCollisions: method test_duplicate_raises_error (line 137) | def test_duplicate_raises_error(self): method test_placeholder_upgrade (line 153) | def test_placeholder_upgrade(self): method test_same_object_no_error (line 170) | def test_same_object_no_error(self): class TestRegistryFreeze (line 183) | class TestRegistryFreeze: method test_freeze (line 186) | def test_freeze(self): method test_freeze_all (line 199) | def test_freeze_all(self): class TestRegistryThreadSafety (line 207) | class TestRegistryThreadSafety: method test_concurrent_registration (line 210) | def test_concurrent_registration(self): method test_concurrent_access (line 232) | def test_concurrent_access(self): class TestModelRegistry (line 255) | class TestModelRegistry: method test_model_registry_exists (line 258) | def test_model_registry_exists(self): method test_lazy_model_loading (line 262) | def test_lazy_model_loading(self): method test_get_model_error (line 277) | def test_get_model_error(self): class TestFilterRegistry (line 285) | class TestFilterRegistry: method test_filter_registry_exists (line 288) | def test_filter_registry_exists(self): method test_register_filter (line 292) | def test_register_filter(self): method test_get_filter_callable (line 304) | def test_get_filter_callable(self): class TestMetricRegistry (line 313) | class TestMetricRegistry: method test_metric_registry_exists (line 316) | def test_metric_registry_exists(self): method test_aggregation_registry_exists (line 320) | def test_aggregation_registry_exists(self): method test_register_aggregation (line 324) | def test_register_aggregation(self): method test_register_metric (line 334) | def test_register_metric(self): method test_builtin_metrics_loaded (line 355) | def test_builtin_metrics_loaded(self): class TestBackwardCompatibility (line 365) | class TestBackwardCompatibility: method test_registry_aliases (line 368) | def test_registry_aliases(self): class TestRegistryClear (line 387) | class TestRegistryClear: method test_clear (line 390) | def test_clear(self): FILE: tests/test_requests_caching.py function setup_and_teardown (line 26) | def setup_and_teardown(): function clear_cache (line 35) | def clear_cache(): function get_cache_files (line 44) | def get_cache_files(tasks: Optional[List[str]] = None) -> Tuple[List[str... function assert_created (line 57) | def assert_created(tasks: List[str], file_task_names: List[str]): function requests_caching_true (line 65) | def requests_caching_true(tasks: List[str]): function requests_caching_refresh (line 74) | def requests_caching_refresh(tasks: List[str]): function requests_caching_delete (line 94) | def requests_caching_delete(tasks: List[str]): function run_tests (line 108) | def run_tests(): FILE: tests/test_samplers.py function sample_docs (line 21) | def sample_docs() -> list[dict]: function large_docs (line 33) | def large_docs() -> list[dict]: class TestContextSampler (line 43) | class TestContextSampler: method test_sample_returns_exactly_n_documents (line 47) | def test_sample_returns_exactly_n_documents(self, sample_docs, n): method test_sample_with_seed_is_reproducible (line 55) | def test_sample_with_seed_is_reproducible(self, sample_docs): method test_different_seeds_produce_different_samples (line 65) | def test_different_seeds_produce_different_samples(self, large_docs): method test_sample_zero_returns_empty (line 75) | def test_sample_zero_returns_empty(self, sample_docs): method test_sample_negative_raises (line 83) | def test_sample_negative_raises(self, sample_docs): method test_sample_excludes_eval_doc (line 90) | def test_sample_excludes_eval_doc(self, sample_docs): method test_sample_all_docs_with_exclusion (line 100) | def test_sample_all_docs_with_exclusion(self, sample_docs): method test_fewshot_indices_filters_documents (line 110) | def test_fewshot_indices_filters_documents(self, sample_docs): method test_set_rnd_changes_random_state (line 123) | def test_set_rnd_changes_random_state(self, large_docs): method test_replace_df_updates_documents (line 133) | def test_replace_df_updates_documents(self, sample_docs): method test_replace_df_resets_loaded_state (line 143) | def test_replace_df_resets_loaded_state(self, sample_docs): method test_empty_df_raises_on_sample (line 156) | def test_empty_df_raises_on_sample(self): method test_none_df_defaults_to_empty (line 163) | def test_none_df_defaults_to_empty(self): method test_sample_with_df_override (line 169) | def test_sample_with_df_override(self, sample_docs, large_docs): class TestRmEvalDoc (line 180) | class TestRmEvalDoc: method test_removes_matching_doc (line 183) | def test_removes_matching_doc(self): method test_limits_to_n_results (line 193) | def test_limits_to_n_results(self): method test_no_match_returns_all (line 203) | def test_no_match_returns_all(self): class TestFirstNSampler (line 218) | class TestFirstNSampler: method test_returns_first_n_in_order (line 221) | def test_returns_first_n_in_order(self, sample_docs): method test_is_deterministic (line 232) | def test_is_deterministic(self, sample_docs): method test_sample_all (line 242) | def test_sample_all(self, sample_docs): method test_exceeding_available_raises (line 250) | def test_exceeding_available_raises(self, sample_docs): method test_ignores_eval_doc (line 257) | def test_ignores_eval_doc(self, sample_docs): class TestSamplerRegistry (line 273) | class TestSamplerRegistry: method test_registry_contains_default (line 276) | def test_registry_contains_default(self): method test_registry_contains_first_n (line 281) | def test_registry_contains_first_n(self): method test_get_sampler_returns_class (line 286) | def test_get_sampler_returns_class(self): method test_get_sampler_unknown_raises_keyerror (line 293) | def test_get_sampler_unknown_raises_keyerror(self): method test_get_sampler_error_lists_available (line 298) | def test_get_sampler_error_lists_available(self): class TestSamplerIntegration (line 309) | class TestSamplerIntegration: method test_method_chaining (line 312) | def test_method_chaining(self, sample_docs): method test_sampler_from_registry (line 320) | def test_sampler_from_registry(self, sample_docs): method test_first_n_from_registry (line 329) | def test_first_n_from_registry(self, sample_docs): FILE: tests/test_task_manager.py function custom_task_name (line 18) | def custom_task_name(): function custom_task_tag (line 23) | def custom_task_tag(): function task_yaml (line 28) | def task_yaml(pytestconfig, custom_task_name, custom_task_tag): function task_code (line 38) | def task_code(): function custom_task_files_dir (line 57) | def custom_task_files_dir(task_yaml, task_code, custom_task_name): function test_python_task_inclusion (line 68) | def test_python_task_inclusion( class TestConfigLoader (line 89) | class TestConfigLoader: method test_load_simple_yaml (line 90) | def test_load_simple_yaml(self, tmp_path): method test_load_yaml_with_include (line 106) | def test_load_yaml_with_include(self, tmp_path): method test_load_yaml_with_function_tag_resolved (line 130) | def test_load_yaml_with_function_tag_resolved(self, tmp_path): method test_load_yaml_without_function_resolution (line 148) | def test_load_yaml_without_function_resolution(self, tmp_path): method test_load_yaml_recursive_includes (line 163) | def test_load_yaml_recursive_includes(self, tmp_path): method test_load_yaml_cycle_detection (line 188) | def test_load_yaml_cycle_detection(self, tmp_path): class TestKind (line 210) | class TestKind: method test_kind_enum_values (line 211) | def test_kind_enum_values(self): class TestEntry (line 219) | class TestEntry: method test_entry_dataclass_fields (line 220) | def test_entry_dataclass_fields(self): class TestTaskIndex (line 236) | class TestTaskIndex: method test_build_from_directory (line 237) | def test_build_from_directory(self, tmp_path): method test_deterministic_traversal (line 252) | def test_deterministic_traversal(self, tmp_path): method test_duplicate_task_detection (line 267) | def test_duplicate_task_detection(self, tmp_path, caplog): method test_duplicate_group_detection (line 287) | def test_duplicate_group_detection(self, tmp_path, caplog): method test_kind_detection_task (line 312) | def test_kind_detection_task(self): method test_kind_detection_group (line 318) | def test_kind_detection_group(self): method test_kind_detection_py_task (line 324) | def test_kind_detection_py_task(self): method test_tag_registration (line 330) | def test_tag_registration(self, tmp_path): method test_ignore_pycache (line 347) | def test_ignore_pycache(self, tmp_path): function shared_task_manager (line 366) | def shared_task_manager(): function test_configs_task_manager (line 372) | def test_configs_task_manager(): class TestTaskManagerIntegration (line 378) | class TestTaskManagerIntegration: method test_initialization (line 379) | def test_initialization(self, shared_task_manager): method test_all_tasks_sorted (line 383) | def test_all_tasks_sorted(self, shared_task_manager): method test_all_groups_property (line 388) | def test_all_groups_property(self, shared_task_manager): method test_all_subtasks_property (line 398) | def test_all_subtasks_property(self, shared_task_manager): method test_all_tags_property (line 406) | def test_all_tags_property(self, shared_task_manager): method test_load_task_by_name (line 416) | def test_load_task_by_name(self, test_configs_task_manager): method test_load_group_by_name (line 421) | def test_load_group_by_name(self, test_configs_task_manager): method test_load_tag_by_name (line 431) | def test_load_tag_by_name(self, shared_task_manager): method test_include_path (line 438) | def test_include_path(self): method test_include_defaults_false (line 445) | def test_include_defaults_false(self): method test_include_resolution (line 454) | def test_include_resolution(self): method test_include_inheritance_override (line 461) | def test_include_inheritance_override(self): method test_include_custom_metrics (line 476) | def test_include_custom_metrics(self): method test_group_loading (line 489) | def test_group_loading(self): method test_include_group (line 496) | def test_include_group(self): method test_match_tasks_glob (line 507) | def test_match_tasks_glob(self, shared_task_manager): method test_name_is_registered (line 513) | def test_name_is_registered(self, shared_task_manager): method test_name_is_task_tag (line 518) | def test_name_is_task_tag(self, shared_task_manager): method test_include_path_precedence (line 526) | def test_include_path_precedence(self, shared_task_manager): method test_include_defaults_false_with_custom_path (line 585) | def test_include_defaults_false_with_custom_path(self): method test_include_defaults_true_with_new_tasks (line 637) | def test_include_defaults_true_with_new_tasks(self, shared_task_manager): method test_tag_expansion_in_group (line 691) | def test_tag_expansion_in_group(self, test_configs_task_manager): method test_nested_group_with_tag (line 715) | def test_nested_group_with_tag(self, test_configs_task_manager): method test_inline_subgroup_syntax (line 746) | def test_inline_subgroup_syntax(self, test_configs_task_manager): class TestTaskManagerLoad (line 782) | class TestTaskManagerLoad: method test_load_task_by_name (line 789) | def test_load_task_by_name(self, test_configs_task_manager): method test_load_group_by_name (line 794) | def test_load_group_by_name(self, test_configs_task_manager): method test_load_group_map (line 802) | def test_load_group_map(self, test_configs_task_manager): method test_load_tag_by_name (line 810) | def test_load_tag_by_name(self, shared_task_manager): method test_include_inheritance_override (line 818) | def test_include_inheritance_override(self): method test_include_custom_metrics (line 829) | def test_include_custom_metrics(self): method test_tag_expansion_in_group (line 842) | def test_tag_expansion_in_group(self, test_configs_task_manager): method test_nested_group_with_tag (line 852) | def test_nested_group_with_tag(self, test_configs_task_manager): method test_include_path_precedence (line 865) | def test_include_path_precedence(self, shared_task_manager): method test_load_returns_same_tasks_as_legacy (line 902) | def test_load_returns_same_tasks_as_legacy(self, test_configs_task_man... class TestGroupBuilding (line 926) | class TestGroupBuilding: method tm (line 934) | def tm(self): method test_existing_group_ref_has_children (line 940) | def test_existing_group_ref_has_children(self, tm): method test_existing_group_ref_overrides_propagate (line 961) | def test_existing_group_ref_overrides_propagate(self, tm): method test_group_level_config_propagates_to_children (line 978) | def test_group_level_config_propagates_to_children(self, tm): method test_caller_overrides_beat_group_defaults (line 993) | def test_caller_overrides_beat_group_defaults(self, tm): method test_mixed_members_string_ref (line 1009) | def test_mixed_members_string_ref(self, tm): method test_mixed_members_dict_with_overrides (line 1018) | def test_mixed_members_dict_with_overrides(self, tm): method test_mixed_members_inline_subgroup (line 1024) | def test_mixed_members_inline_subgroup(self, tm): method test_empty_group_has_no_children (line 1041) | def test_empty_group_has_no_children(self, tm): method test_parse_aggregation_with_list (line 1055) | def test_parse_aggregation_with_list(self): method test_parse_aggregation_single_dict_normalized (line 1076) | def test_parse_aggregation_single_dict_normalized(self): method test_parse_aggregation_missing_returns_none (line 1093) | def test_parse_aggregation_missing_returns_none(self): method test_group_alias_preserved (line 1105) | def test_group_alias_preserved(self, tm): method test_group_metadata_includes_factory_meta (line 1114) | def test_group_metadata_includes_factory_meta(self): method test_deeply_nested_get_all_tasks_recursive (line 1129) | def test_deeply_nested_get_all_tasks_recursive(self, tm): method test_deeply_nested_get_all_tasks_non_recursive (line 1141) | def test_deeply_nested_get_all_tasks_non_recursive(self, tm): FILE: tests/test_tasks.py function get_new_tasks_else_default (line 20) | def get_new_tasks_else_default(): function task_class (line 32) | def task_class( function limit (line 50) | def limit() -> int: class BaseTasks (line 54) | class BaseTasks: method test_download (line 59) | def test_download(self, task_class: ConfigurableTask): method test_has_training_docs (line 63) | def test_has_training_docs(self, task_class: ConfigurableTask): method test_check_training_docs (line 66) | def test_check_training_docs(self, task_class: ConfigurableTask): method test_has_validation_docs (line 70) | def test_has_validation_docs(self, task_class): method test_check_validation_docs (line 73) | def test_check_validation_docs(self, task_class): method test_has_test_docs (line 77) | def test_has_test_docs(self, task_class): method test_check_test_docs (line 80) | def test_check_test_docs(self, task_class): method test_should_decontaminate (line 85) | def test_should_decontaminate(self, task_class): method test_doc_to_text (line 91) | def test_doc_to_text(self, task_class, limit): method test_create_choices (line 114) | def test_create_choices(self, task_class, limit): method test_doc_to_target (line 126) | def test_doc_to_target(self, task_class, limit): method test_build_all_requests (line 138) | def test_build_all_requests(self, task_class, limit): method test_construct_requests (line 143) | def test_construct_requests(self, task_class, limit): class TestNewTasksElseDefault (line 165) | class TestNewTasksElseDefault(BaseTasks): FILE: tests/test_unitxt_tasks.py function limit (line 17) | def limit() -> int: class TestUnitxtTasks (line 28) | class TestUnitxtTasks(BaseTasks): method test_check_training_docs (line 35) | def test_check_training_docs(self, task_class: ConfigurableTask): method test_check_validation_docs (line 39) | def test_check_validation_docs(self, task_class): method test_check_test_docs (line 43) | def test_check_test_docs(self, task_class): method test_doc_to_text (line 48) | def test_doc_to_text(self, task_class, limit: int): FILE: tests/test_utils.py function test_get_rolling_token_windows_v1 (line 25) | def test_get_rolling_token_windows_v1(): function test_get_rolling_token_windows_v2 (line 55) | def test_get_rolling_token_windows_v2(): function test_get_rolling_token_windows_v3 (line 84) | def test_get_rolling_token_windows_v3(): function test_get_rolling_token_windows_v4 (line 129) | def test_get_rolling_token_windows_v4(): function test_get_rolling_token_windows_v5 (line 170) | def test_get_rolling_token_windows_v5(): function test_get_rolling_token_windows_v6 (line 199) | def test_get_rolling_token_windows_v6(): function test_get_rolling_token_windows_empty (line 223) | def test_get_rolling_token_windows_empty(): function test_make_disjoint_window (line 236) | def test_make_disjoint_window(): class TestCollator (line 245) | class TestCollator: method make_generate_sample (line 246) | def make_generate_sample(self, end=10): method make_loglikelihood_sample (line 259) | def make_loglikelihood_sample(self, end=11): method make_loglikelihood_sample_group (line 266) | def make_loglikelihood_sample_group(self, end=11): method test_generations (line 275) | def test_generations(self, batch_size, end): method test_loglikelihood (line 305) | def test_loglikelihood(self, batch_size, end): method test_context_grouping (line 328) | def test_context_grouping(self, batch_size): function test_aggregate_mean (line 369) | def test_aggregate_mean(): function test_aggregate_stderrs (line 388) | def test_aggregate_stderrs(samples): function test_remote_tokenizer_custom_cert_and_token (line 405) | def test_remote_tokenizer_custom_cert_and_token(monkeypatch): function test_remote_tokenizer_no_cert (line 433) | def test_remote_tokenizer_no_cert(monkeypatch): function test_remote_tokenizer_http_url (line 458) | def test_remote_tokenizer_http_url(monkeypatch): function test_check_remote_tokenizer_support (line 482) | def test_check_remote_tokenizer_support(monkeypatch): function test_apply_chat_template (line 520) | def test_apply_chat_template(monkeypatch): class TestRequiresDelimiter (line 552) | class TestRequiresDelimiter: method test_no_whitespace_requires_delimiter (line 555) | def test_no_whitespace_requires_delimiter(self): method test_prefix_ends_with_space (line 559) | def test_prefix_ends_with_space(self): method test_suffix_starts_with_space (line 563) | def test_suffix_starts_with_space(self): method test_both_have_whitespace (line 567) | def test_both_have_whitespace(self): method test_prefix_ends_with_newline (line 571) | def test_prefix_ends_with_newline(self): method test_suffix_starts_with_tab (line 575) | def test_suffix_starts_with_tab(self): class TestMaybeDelimit (line 580) | class TestMaybeDelimit: method test_both_present_no_whitespace (line 583) | def test_both_present_no_whitespace(self): method test_both_present_prefix_has_space (line 587) | def test_both_present_prefix_has_space(self): method test_both_present_suffix_has_space (line 591) | def test_both_present_suffix_has_space(self): method test_custom_delimiter (line 595) | def test_custom_delimiter(self): method test_prefix_is_none (line 599) | def test_prefix_is_none(self): method test_prefix_is_empty (line 603) | def test_prefix_is_empty(self): method test_suffix_is_none (line 607) | def test_suffix_is_none(self): method test_suffix_is_empty (line 611) | def test_suffix_is_empty(self): method test_both_none (line 615) | def test_both_none(self): method test_both_empty (line 619) | def test_both_empty(self): method test_newline_delimiter (line 623) | def test_newline_delimiter(self): method test_prefix_ends_with_newline_no_extra_delimiter (line 627) | def test_prefix_ends_with_newline_no_extra_delimiter(self): class TestHandleArgString (line 632) | class TestHandleArgString: method test_bool_true (line 635) | def test_bool_true(self): method test_bool_false (line 640) | def test_bool_false(self): method test_none (line 644) | def test_none(self): method test_positive_int (line 648) | def test_positive_int(self): method test_negative_int (line 652) | def test_negative_int(self): method test_float (line 656) | def test_float(self): method test_negative_float (line 660) | def test_negative_float(self): method test_scientific_notation (line 663) | def test_scientific_notation(self): method test_plain_string (line 667) | def test_plain_string(self): method test_explicit_quoted_string_preserves_numeric (line 670) | def test_explicit_quoted_string_preserves_numeric(self): method test_explicit_single_quoted_string (line 675) | def test_explicit_single_quoted_string(self): method test_empty_string (line 679) | def test_empty_string(self): method test_whitespace_stripped (line 682) | def test_whitespace_stripped(self): class TestSimpleParseArgsString (line 686) | class TestSimpleParseArgsString: method test_basic_parsing (line 689) | def test_basic_parsing(self): method test_numeric_revision_stays_int_by_default (line 693) | def test_numeric_revision_stays_int_by_default(self): method test_quoted_revision_stays_string (line 698) | def test_quoted_revision_stays_string(self): method test_none_input (line 704) | def test_none_input(self): method test_empty_input (line 707) | def test_empty_input(self): method test_bool_and_float_coercion (line 710) | def test_bool_and_float_coercion(self): FILE: tests/utils.py function load_changed_files (line 13) | def load_changed_files(file_path: str) -> list[str]: function parser (line 24) | def parser(full_path: list[str]) -> list[str]: function new_tasks (line 36) | def new_tasks() -> list[str] | None: