SYMBOL INDEX (1340 symbols across 74 files) FILE: apply_delta.py function main (line 11) | def main(base_model_path, target_model_path, delta_path): FILE: bbh.py class BBHSample (line 12) | class BBHSample(BaseModel): method as_prompt (line 16) | def as_prompt(self, include_answer: bool = True): class BBHData (line 24) | class BBHData(BaseModel): method get_config_names (line 28) | def get_config_names(cls, path: str = "lukaemon/bbh") -> List[str]: method load_from_huggingface (line 32) | def load_from_huggingface( function gen_prompt (line 40) | def gen_prompt(data: BBHData, k=-1): function evaluate (line 49) | def evaluate(model: EvalModel, data: BBHData, ntrain: int) -> dict: function main (line 75) | def main(data_dir: str = "lukaemon/bbh", ntrain: int = 3, **kwargs): FILE: crass.py class CrassSample (line 14) | class CrassSample(BaseModel): method as_prompt (line 20) | def as_prompt(self, include_answer=True): method get_answer_label (line 32) | def get_answer_label(self) -> str: class CrassData (line 37) | class CrassData(BaseModel): method load_train_set (line 41) | def load_train_set(cls): method load_test_set (line 79) | def load_test_set( method analyze (line 107) | def analyze(self): function test_data (line 122) | def test_data(): function gen_prompt (line 129) | def gen_prompt(data: CrassData, k=-1): function evaluate (line 138) | def evaluate(model: EvalModel, data_train: CrassData, data_test: CrassDa... function main (line 166) | def main(ntrain: int = 3, **kwargs): FILE: drop.py class DropAnswer (line 14) | class DropAnswer(BaseModel): class DropSample (line 19) | class DropSample(BaseModel): method get_answers (line 26) | def get_answers(self) -> List[str]: method as_prompt (line 29) | def as_prompt(self, include_answer=True): class DropData (line 38) | class DropData(BaseModel): method load_from_huggingface (line 42) | def load_from_huggingface(cls, path: str = "drop", split: str = "valid... method load (line 48) | def load(cls, path: str): method save (line 53) | def save(self, path: str): method analyze (line 59) | def analyze(self): method train_test_split (line 63) | def train_test_split(self, num_train: int, seed: int = 0): function test_data (line 72) | def test_data(path_out: str = "data/drop.json"): function gen_prompt (line 78) | def gen_prompt(data: DropData, k=-1): function filter_dataset (line 87) | def filter_dataset(data: DropData) -> DropData: function evaluate (line 98) | def evaluate(model: EvalModel, data: DropData, ntrain: int) -> dict: function main (line 128) | def main(data_dir: str = "drop", ntrain: int = 3, **kwargs): FILE: hhh.py class HHHDataset (line 20) | class HHHDataset(BaseModel): method load_from_dict (line 27) | def load_from_dict(cls, d): method to_prompt (line 37) | def to_prompt(self) -> str: function load_data (line 80) | def load_data(data_path: str) -> List[HHHDataset]: function evaluate (line 113) | def evaluate( function main (line 164) | def main(**kwargs): FILE: human_eval/data.py function read_problems (line 11) | def read_problems(evalset_file: str = HUMAN_EVAL) -> Dict[str, Dict]: function stream_jsonl (line 15) | def stream_jsonl(filename: str) -> Iterable[Dict]: function write_jsonl (line 32) | def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False): FILE: human_eval/evaluation.py function estimate_pass_at_k (line 13) | def estimate_pass_at_k( function evaluate_functional_correctness (line 39) | def evaluate_functional_correctness( FILE: human_eval/execution.py function check_correctness (line 13) | def check_correctness(problem: Dict, completion: str, timeout: float, function time_limit (line 91) | def time_limit(seconds: float): function swallow_io (line 103) | def swallow_io(): function create_tempdir (line 112) | def create_tempdir(): class TimeoutException (line 118) | class TimeoutException(Exception): class WriteOnlyStringIO (line 122) | class WriteOnlyStringIO(io.StringIO): method read (line 125) | def read(self, *args, **kwargs): method readline (line 128) | def readline(self, *args, **kwargs): method readlines (line 131) | def readlines(self, *args, **kwargs): method readable (line 134) | def readable(self, *args, **kwargs): class redirect_stdin (line 139) | class redirect_stdin(contextlib._RedirectStream): # type: ignore function chdir (line 144) | def chdir(root): function reliability_guard (line 158) | def reliability_guard(maximum_memory_bytes: Optional[int] = None): FILE: human_eval/main.py function entry_point (line 11) | def entry_point( function filter_code (line 30) | def filter_code(completion: str, model: EvalModel) -> str: function gen_prompt (line 40) | def gen_prompt(prompt: str, model: EvalModel) -> str: function count_indent (line 53) | def count_indent(text: str) -> int: function fix_indents (line 63) | def fix_indents(text: str, multiple: int = 2): function test_fix_indents (line 72) | def test_fix_indents(): function evaluate (line 77) | def evaluate(model: EvalModel, data_path: str, **kwargs) -> dict: function main (line 112) | def main(data_path: str = "human_eval/HumanEval.jsonl.gz", **kwargs): FILE: lm_eval/base.py class LM (line 17) | class LM(abc.ABC): method __init__ (line 18) | def __init__(self): method loglikelihood (line 22) | def loglikelihood(self, requests): method loglikelihood_rolling (line 46) | def loglikelihood_rolling(self, requests): method greedy_until (line 89) | def greedy_until(self, requests): method create_from_arg_string (line 107) | def create_from_arg_string(cls, arg_string, additional_config=None): method set_cache_hook (line 113) | def set_cache_hook(self, cache_hook): class BaseLM (line 117) | class BaseLM(LM): method eot_token_id (line 120) | def eot_token_id(self): method max_length (line 125) | def max_length(self): method max_gen_toks (line 130) | def max_gen_toks(self): method batch_size (line 135) | def batch_size(self): method device (line 140) | def device(self): method tok_encode (line 144) | def tok_encode(self, string: str): method tok_decode (line 148) | def tok_decode(self, tokens: Iterable[int]): method _model_generate (line 152) | def _model_generate(self, context, max_length, eos_token_id): method _model_call (line 156) | def _model_call(self, inps): method loglikelihood (line 169) | def loglikelihood(self, requests): method loglikelihood_rolling (line 184) | def loglikelihood_rolling(self, requests): method _loglikelihood_tokens (line 218) | def _loglikelihood_tokens(self, requests, disable_tqdm=False): method greedy_until (line 329) | def greedy_until(self, requests): class Task (line 369) | class Task(abc.ABC): method __init__ (line 386) | def __init__(self, data_dir=None, cache_dir=None, download_mode=None): method download (line 413) | def download(self, data_dir=None, cache_dir=None, download_mode=None): method should_decontaminate (line 446) | def should_decontaminate(self): method has_training_docs (line 451) | def has_training_docs(self): method has_validation_docs (line 456) | def has_validation_docs(self): method has_test_docs (line 461) | def has_test_docs(self): method training_docs (line 465) | def training_docs(self): method validation_docs (line 472) | def validation_docs(self): method test_docs (line 479) | def test_docs(self): method _process_doc (line 486) | def _process_doc(self, doc): method fewshot_examples (line 497) | def fewshot_examples(self, k, rnd): method doc_to_decontamination_query (line 503) | def doc_to_decontamination_query(self, doc): method doc_to_text (line 510) | def doc_to_text(self, doc): method doc_to_target (line 514) | def doc_to_target(self, doc): method construct_requests (line 518) | def construct_requests(self, doc, ctx): method process_results (line 532) | def process_results(self, doc, results): method aggregation (line 545) | def aggregation(self): method higher_is_better (line 554) | def higher_is_better(self): method fewshot_description (line 562) | def fewshot_description(self): method fewshot_context (line 573) | def fewshot_context( class MultipleChoiceTask (line 642) | class MultipleChoiceTask(Task): method doc_to_target (line 643) | def doc_to_target(self, doc): method construct_requests (line 646) | def construct_requests(self, doc, ctx): method process_results (line 653) | def process_results(self, doc, results): method higher_is_better (line 665) | def higher_is_better(self): method aggregation (line 671) | def aggregation(self): class PerplexityTask (line 678) | class PerplexityTask(Task, abc.ABC): method should_decontaminate (line 679) | def should_decontaminate(self): method has_training_docs (line 683) | def has_training_docs(self): method fewshot_examples (line 686) | def fewshot_examples(self, k, rnd): method fewshot_context (line 690) | def fewshot_context( method higher_is_better (line 712) | def higher_is_better(self): method doc_to_decontamination_query (line 719) | def doc_to_decontamination_query(self, doc): method doc_to_text (line 722) | def doc_to_text(self, doc): method doc_to_target (line 725) | def doc_to_target(self, doc): method construct_requests (line 728) | def construct_requests(self, doc, ctx): method process_results (line 733) | def process_results(self, doc, results): method aggregation (line 743) | def aggregation(self): method count_bytes (line 751) | def count_bytes(cls, doc): method count_words (line 755) | def count_words(cls, doc): function hash_args (line 760) | def hash_args(attr, args): class CacheHook (line 765) | class CacheHook: method __init__ (line 766) | def __init__(self, cachinglm): method add_partial (line 773) | def add_partial(self, attr, req, res): class Request (line 787) | class Request: method __init__ (line 788) | def __init__(self, request_type, args, index=None): method __iter__ (line 798) | def __iter__(self): method __getitem__ (line 804) | def __getitem__(self, i): method __eq__ (line 809) | def __eq__(self, other): method __repr__ (line 816) | def __repr__(self): class RequestFactory (line 820) | class RequestFactory: method __getattr__ (line 821) | def __getattr__(self, attr): FILE: lm_eval/evaluator.py function simple_evaluate (line 13) | def simple_evaluate( function evaluate (line 109) | def evaluate( function make_table (line 288) | def make_table(result_dict): FILE: lm_eval/main.py class MultiChoice (line 11) | class MultiChoice: method __init__ (line 12) | def __init__(self, choices): method __contains__ (line 16) | def __contains__(self, values): method __iter__ (line 23) | def __iter__(self): function parse_args (line 28) | def parse_args(): function pattern_match (line 47) | def pattern_match(patterns, source_list): function main (line 55) | def main(): FILE: lm_eval/metrics.py function mean (line 10) | def mean(arr): function pop_stddev (line 14) | def pop_stddev(arr): function sample_stddev (line 19) | def sample_stddev(arr): function mean_stderr (line 24) | def mean_stderr(arr): function median (line 28) | def median(arr): function matthews_corrcoef (line 32) | def matthews_corrcoef(items): function f1_score (line 39) | def f1_score(items): function acc_all (line 48) | def acc_all(items): function acc_all_stderr (line 67) | def acc_all_stderr(items): function metric_max_over_ground_truths (line 85) | def metric_max_over_ground_truths(metric_fn, prediction, ground_truths): function perplexity (line 94) | def perplexity(items): function weighted_mean (line 98) | def weighted_mean(items): function weighted_perplexity (line 103) | def weighted_perplexity(items): function bits_per_byte (line 107) | def bits_per_byte(items): function bleu (line 111) | def bleu(items): function chrf (line 128) | def chrf(items): function ter (line 142) | def ter(items): function is_non_str_iterable (line 157) | def is_non_str_iterable(obj): function _sacreformat (line 161) | def _sacreformat(refs, preds): class _bootstrap_internal (line 192) | class _bootstrap_internal: method __init__ (line 193) | def __init__(self, f, n): method __call__ (line 197) | def __call__(self, v): function bootstrap_stderr (line 207) | def bootstrap_stderr(f, xs, iters): function stderr_for_metric (line 236) | def stderr_for_metric(metric, bootstrap_iters): function yesno (line 255) | def yesno(x): FILE: lm_eval/models/__init__.py function get_model (line 17) | def get_model(model_name): FILE: lm_eval/models/dummy.py class DummyLM (line 5) | class DummyLM(LM): method __init__ (line 6) | def __init__(self): method create_from_arg_string (line 10) | def create_from_arg_string(cls, arg_string, additional_config=None): method loglikelihood (line 13) | def loglikelihood(self, requests): method greedy_until (line 21) | def greedy_until(self, requests): method loglikelihood_rolling (line 30) | def loglikelihood_rolling(self, requests): FILE: lm_eval/models/gpt2.py class HFLM (line 6) | class HFLM(BaseLM): method __init__ (line 7) | def __init__( method eot_token_id (line 81) | def eot_token_id(self): method max_length (line 86) | def max_length(self): method max_gen_toks (line 94) | def max_gen_toks(self): method batch_size (line 98) | def batch_size(self): method device (line 103) | def device(self): method tok_encode (line 107) | def tok_encode(self, string: str): method tok_decode (line 110) | def tok_decode(self, tokens): method _model_call (line 113) | def _model_call(self, inps): method _model_generate (line 124) | def _model_generate(self, context, max_length, eos_token_id): FILE: lm_eval/models/gpt3.py function get_result (line 10) | def get_result(response, ctxlen): function oa_completion (line 38) | def oa_completion(**kwargs): class GPT3LM (line 57) | class GPT3LM(BaseLM): method __init__ (line 60) | def __init__(self, engine, truncate=False): method eot_token_id (line 89) | def eot_token_id(self): method max_length (line 93) | def max_length(self): method max_gen_toks (line 98) | def max_gen_toks(self): method batch_size (line 102) | def batch_size(self): method device (line 107) | def device(self): method tok_encode (line 111) | def tok_encode(self, string: str): method tok_decode (line 114) | def tok_decode(self, tokens): method _loglikelihood_tokens (line 117) | def _loglikelihood_tokens(self, requests, disable_tqdm=False): method greedy_until (line 168) | def greedy_until(self, requests): method _model_call (line 224) | def _model_call(self, inps): method _model_generate (line 228) | def _model_generate(self, context, max_length, eos_token_id): FILE: lm_eval/models/llama.py class LlamaLM (line 7) | class LlamaLM(BaseLM): method __init__ (line 8) | def __init__( method eot_token_id (line 55) | def eot_token_id(self): method max_length (line 60) | def max_length(self): method max_gen_toks (line 67) | def max_gen_toks(self): method batch_size (line 71) | def batch_size(self): method device (line 75) | def device(self): method tok_encode (line 78) | def tok_encode(self, string: str): method tok_decode (line 81) | def tok_decode(self, tokens): method _model_call (line 84) | def _model_call(self, inps): method _model_generate (line 95) | def _model_generate(self, context, max_length, eos_token_id): FILE: lm_eval/models/textsynth.py function textsynth_completion (line 25) | def textsynth_completion(**kwargs): class TextSynthLM (line 41) | class TextSynthLM(BaseLM): method __init__ (line 42) | def __init__(self, engine, truncate=False): method eot_token_id (line 58) | def eot_token_id(self): method max_length (line 63) | def max_length(self): method max_gen_toks (line 68) | def max_gen_toks(self): method batch_size (line 72) | def batch_size(self): method device (line 77) | def device(self): method tok_encode (line 81) | def tok_encode(self, string: str): method tok_decode (line 85) | def tok_decode(self, tokens): method loglikelihood (line 89) | def loglikelihood(self, requests): method loglikelihood_rolling (line 109) | def loglikelihood_rolling(self, requests): method greedy_until (line 119) | def greedy_until(self, requests): method _model_call (line 149) | def _model_call(self, inps): method _model_generate (line 153) | def _model_generate(self, context, max_length, eos_token_id): FILE: lm_eval/tasks/__init__.py function get_task (line 280) | def get_task(task_name): function get_task_name_from_object (line 289) | def get_task_name_from_object(task_object): function get_task_dict (line 302) | def get_task_dict(task_name_list: List[Union[str, lm_eval.base.Task]]): FILE: lm_eval/tasks/anli.py class ANLIBase (line 33) | class ANLIBase(Task): method has_training_docs (line 39) | def has_training_docs(self): method has_validation_docs (line 42) | def has_validation_docs(self): method has_test_docs (line 45) | def has_test_docs(self): method training_docs (line 48) | def training_docs(self): method validation_docs (line 54) | def validation_docs(self): method test_docs (line 58) | def test_docs(self): method doc_to_text (line 62) | def doc_to_text(self, doc): method should_decontaminate (line 74) | def should_decontaminate(self): method doc_to_decontamination_query (line 77) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 80) | def doc_to_target(self, doc): method construct_requests (line 86) | def construct_requests(self, doc, ctx): method process_results (line 102) | def process_results(self, doc, results): method aggregation (line 116) | def aggregation(self): method higher_is_better (line 124) | def higher_is_better(self): class ANLIRound1 (line 133) | class ANLIRound1(ANLIBase): class ANLIRound2 (line 137) | class ANLIRound2(ANLIBase): class ANLIRound3 (line 141) | class ANLIRound3(ANLIBase): FILE: lm_eval/tasks/arc.py class ARCEasy (line 29) | class ARCEasy(MultipleChoiceTask): method has_training_docs (line 34) | def has_training_docs(self): method has_validation_docs (line 37) | def has_validation_docs(self): method has_test_docs (line 40) | def has_test_docs(self): method training_docs (line 43) | def training_docs(self): method validation_docs (line 48) | def validation_docs(self): method test_docs (line 51) | def test_docs(self): method _process_doc (line 54) | def _process_doc(self, doc): method doc_to_text (line 67) | def doc_to_text(self, doc): method should_decontaminate (line 70) | def should_decontaminate(self): method doc_to_decontamination_query (line 73) | def doc_to_decontamination_query(self, doc): class ARCChallenge (line 77) | class ARCChallenge(ARCEasy): FILE: lm_eval/tasks/arithmetic.py class Arithmetic (line 29) | class Arithmetic(Task): method has_training_docs (line 34) | def has_training_docs(self): method has_validation_docs (line 37) | def has_validation_docs(self): method has_test_docs (line 40) | def has_test_docs(self): method training_docs (line 43) | def training_docs(self): method validation_docs (line 46) | def validation_docs(self): method test_docs (line 49) | def test_docs(self): method doc_to_text (line 52) | def doc_to_text(self, doc): method should_decontaminate (line 55) | def should_decontaminate(self): method doc_to_decontamination_query (line 58) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 61) | def doc_to_target(self, doc): method construct_requests (line 64) | def construct_requests(self, doc, ctx): method process_results (line 68) | def process_results(self, doc, results): method aggregation (line 72) | def aggregation(self): method higher_is_better (line 77) | def higher_is_better(self): class Arithmetic2DPlus (line 81) | class Arithmetic2DPlus(Arithmetic): class Arithmetic2DMinus (line 85) | class Arithmetic2DMinus(Arithmetic): class Arithmetic3DPlus (line 89) | class Arithmetic3DPlus(Arithmetic): class Arithmetic3DMinus (line 93) | class Arithmetic3DMinus(Arithmetic): class Arithmetic4DPlus (line 97) | class Arithmetic4DPlus(Arithmetic): class Arithmetic4DMinus (line 101) | class Arithmetic4DMinus(Arithmetic): class Arithmetic5DPlus (line 105) | class Arithmetic5DPlus(Arithmetic): class Arithmetic5DMinus (line 109) | class Arithmetic5DMinus(Arithmetic): class Arithmetic2DMultiplication (line 113) | class Arithmetic2DMultiplication(Arithmetic): class Arithmetic1DComposite (line 117) | class Arithmetic1DComposite(Arithmetic): FILE: lm_eval/tasks/blimp.py class BlimpTask (line 34) | class BlimpTask(Task): method has_training_docs (line 38) | def has_training_docs(self): method has_validation_docs (line 41) | def has_validation_docs(self): method has_test_docs (line 44) | def has_test_docs(self): method validation_docs (line 47) | def validation_docs(self): method fewshot_context (line 53) | def fewshot_context( method doc_to_text (line 73) | def doc_to_text(self, doc): method should_decontaminate (line 77) | def should_decontaminate(self): method doc_to_decontamination_query (line 80) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 83) | def doc_to_target(self, doc): method construct_requests (line 87) | def construct_requests(self, doc, ctx): method process_results (line 97) | def process_results(self, doc, results): method higher_is_better (line 107) | def higher_is_better(self): method aggregation (line 112) | def aggregation(self): class BlimpAdjunctIsland (line 118) | class BlimpAdjunctIsland(BlimpTask): class BlimpAnaphorGenderAgreement (line 122) | class BlimpAnaphorGenderAgreement(BlimpTask): class BlimpAnaphorNumberAgreement (line 126) | class BlimpAnaphorNumberAgreement(BlimpTask): class BlimpAnimateSubjectPassive (line 130) | class BlimpAnimateSubjectPassive(BlimpTask): class BlimpAnimateSubjectTrans (line 134) | class BlimpAnimateSubjectTrans(BlimpTask): class BlimpCausative (line 138) | class BlimpCausative(BlimpTask): class BlimpComplex_NPIsland (line 142) | class BlimpComplex_NPIsland(BlimpTask): class BlimpCoordinateStructureConstraintComplexLeftBranch (line 146) | class BlimpCoordinateStructureConstraintComplexLeftBranch(BlimpTask): class BlimpCoordinateStructureConstraintObjectExtraction (line 150) | class BlimpCoordinateStructureConstraintObjectExtraction(BlimpTask): class BlimpDeterminerNounAgreement_1 (line 154) | class BlimpDeterminerNounAgreement_1(BlimpTask): class BlimpDeterminerNounAgreement_2 (line 158) | class BlimpDeterminerNounAgreement_2(BlimpTask): class BlimpDeterminerNounAgreementIrregular_1 (line 162) | class BlimpDeterminerNounAgreementIrregular_1(BlimpTask): class BlimpDeterminerNounAgreementIrregular_2 (line 166) | class BlimpDeterminerNounAgreementIrregular_2(BlimpTask): class BlimpDeterminerNounAgreementWithAdj_2 (line 170) | class BlimpDeterminerNounAgreementWithAdj_2(BlimpTask): class BlimpDeterminerNounAgreementWithAdjIrregular_1 (line 174) | class BlimpDeterminerNounAgreementWithAdjIrregular_1(BlimpTask): class BlimpDeterminerNounAgreementWithAdjIrregular_2 (line 178) | class BlimpDeterminerNounAgreementWithAdjIrregular_2(BlimpTask): class BlimpDeterminerNounAgreementWithAdjective_1 (line 182) | class BlimpDeterminerNounAgreementWithAdjective_1(BlimpTask): class BlimpDistractorAgreementRelationalNoun (line 186) | class BlimpDistractorAgreementRelationalNoun(BlimpTask): class BlimpDistractorAgreementRelativeClause (line 190) | class BlimpDistractorAgreementRelativeClause(BlimpTask): class BlimpDropArgument (line 194) | class BlimpDropArgument(BlimpTask): class BlimpEllipsisNBar_1 (line 198) | class BlimpEllipsisNBar_1(BlimpTask): class BlimpEllipsisNBar_2 (line 202) | class BlimpEllipsisNBar_2(BlimpTask): class BlimpExistentialThereObjectRaising (line 206) | class BlimpExistentialThereObjectRaising(BlimpTask): class BlimpExistentialThereQuantifiers_1 (line 210) | class BlimpExistentialThereQuantifiers_1(BlimpTask): class BlimpExistentialThereQuantifiers_2 (line 214) | class BlimpExistentialThereQuantifiers_2(BlimpTask): class BlimpExistentialThereSubjectRaising (line 218) | class BlimpExistentialThereSubjectRaising(BlimpTask): class BlimpExpletiveItObjectRaising (line 222) | class BlimpExpletiveItObjectRaising(BlimpTask): class BlimpInchoative (line 226) | class BlimpInchoative(BlimpTask): class BlimpIntransitive (line 230) | class BlimpIntransitive(BlimpTask): class BlimpIrregularPastParticipleAdjectives (line 234) | class BlimpIrregularPastParticipleAdjectives(BlimpTask): class BlimpIrregularPastParticipleVerbs (line 238) | class BlimpIrregularPastParticipleVerbs(BlimpTask): class BlimpIrregularPluralSubjectVerbAgreement_1 (line 242) | class BlimpIrregularPluralSubjectVerbAgreement_1(BlimpTask): class BlimpIrregularPluralSubjectVerbAgreement_2 (line 246) | class BlimpIrregularPluralSubjectVerbAgreement_2(BlimpTask): class BlimpLeftBranchIslandEchoQuestion (line 250) | class BlimpLeftBranchIslandEchoQuestion(BlimpTask): class BlimpLeftBranchIslandSimpleQuestion (line 254) | class BlimpLeftBranchIslandSimpleQuestion(BlimpTask): class BlimpMatrixQuestionNpiLicensorPresent (line 258) | class BlimpMatrixQuestionNpiLicensorPresent(BlimpTask): class BlimpNpiPresent_1 (line 262) | class BlimpNpiPresent_1(BlimpTask): class BlimpNpiPresent_2 (line 266) | class BlimpNpiPresent_2(BlimpTask): class BlimpOnlyNpiLicensorPresent (line 270) | class BlimpOnlyNpiLicensorPresent(BlimpTask): class BlimpOnlyNpiScope (line 274) | class BlimpOnlyNpiScope(BlimpTask): class BlimpPassive_1 (line 278) | class BlimpPassive_1(BlimpTask): class BlimpPassive_2 (line 282) | class BlimpPassive_2(BlimpTask): class BlimpPrinciple_ACCommand (line 286) | class BlimpPrinciple_ACCommand(BlimpTask): class BlimpPrinciple_ACase_1 (line 290) | class BlimpPrinciple_ACase_1(BlimpTask): class BlimpPrinciple_ACase_2 (line 294) | class BlimpPrinciple_ACase_2(BlimpTask): class BlimpPrinciple_ADomain_1 (line 298) | class BlimpPrinciple_ADomain_1(BlimpTask): class BlimpPrinciple_ADomain_2 (line 302) | class BlimpPrinciple_ADomain_2(BlimpTask): class BlimpPrinciple_ADomain_3 (line 306) | class BlimpPrinciple_ADomain_3(BlimpTask): class BlimpPrinciple_AReconstruction (line 310) | class BlimpPrinciple_AReconstruction(BlimpTask): class BlimpRegularPluralSubjectVerbAgreement_1 (line 314) | class BlimpRegularPluralSubjectVerbAgreement_1(BlimpTask): class BlimpRegularPluralSubjectVerbAgreement_2 (line 318) | class BlimpRegularPluralSubjectVerbAgreement_2(BlimpTask): class BlimpSententialNegationNpiLicensorPresent (line 322) | class BlimpSententialNegationNpiLicensorPresent(BlimpTask): class BlimpSententialNegationNpiScope (line 326) | class BlimpSententialNegationNpiScope(BlimpTask): class BlimpSententialSubjectIsland (line 330) | class BlimpSententialSubjectIsland(BlimpTask): class BlimpSuperlativeQuantifiers_1 (line 334) | class BlimpSuperlativeQuantifiers_1(BlimpTask): class BlimpSuperlativeQuantifiers_2 (line 338) | class BlimpSuperlativeQuantifiers_2(BlimpTask): class BlimpToughVsRaising_1 (line 342) | class BlimpToughVsRaising_1(BlimpTask): class BlimpToughVsRaising_2 (line 346) | class BlimpToughVsRaising_2(BlimpTask): class BlimpTransitive (line 350) | class BlimpTransitive(BlimpTask): class BlimpWhIsland (line 354) | class BlimpWhIsland(BlimpTask): class BlimpWhQuestionsObjectGap (line 358) | class BlimpWhQuestionsObjectGap(BlimpTask): class BlimpWhQuestionsSubjectGap (line 362) | class BlimpWhQuestionsSubjectGap(BlimpTask): class BlimpWhQuestionsSubjectGapLongDistance (line 366) | class BlimpWhQuestionsSubjectGapLongDistance(BlimpTask): class BlimpWhVsThatNoGap (line 370) | class BlimpWhVsThatNoGap(BlimpTask): class BlimpWhVsThatNoGapLongDistance (line 374) | class BlimpWhVsThatNoGapLongDistance(BlimpTask): class BlimpWhVsThatWithGap (line 378) | class BlimpWhVsThatWithGap(BlimpTask): class BlimpWhVsThatWithGapLongDistance (line 382) | class BlimpWhVsThatWithGapLongDistance(BlimpTask): FILE: lm_eval/tasks/cbt.py class CBTBase (line 32) | class CBTBase(Task): method has_training_docs (line 37) | def has_training_docs(self): method has_validation_docs (line 40) | def has_validation_docs(self): method has_test_docs (line 43) | def has_test_docs(self): method training_docs (line 46) | def training_docs(self): method validation_docs (line 51) | def validation_docs(self): method test_docs (line 54) | def test_docs(self): method detokenize (line 57) | def detokenize(self, text): method doc_to_text (line 73) | def doc_to_text(self, doc): method should_decontaminate (line 78) | def should_decontaminate(self): method doc_to_decontamination_query (line 81) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 85) | def doc_to_target(self, doc): method fewshot_examples (line 88) | def fewshot_examples(self, k, rnd): method construct_requests (line 94) | def construct_requests(self, doc, ctx): method process_results (line 113) | def process_results(self, doc, results): method aggregation (line 127) | def aggregation(self): method higher_is_better (line 135) | def higher_is_better(self): class CBTCN (line 144) | class CBTCN(CBTBase): class CBTNE (line 148) | class CBTNE(CBTBase): FILE: lm_eval/tasks/coqa.py class CoQA (line 29) | class CoQA(Task): method has_training_docs (line 33) | def has_training_docs(self): method has_validation_docs (line 36) | def has_validation_docs(self): method has_test_docs (line 39) | def has_test_docs(self): method training_docs (line 42) | def training_docs(self): method validation_docs (line 45) | def validation_docs(self): method test_docs (line 48) | def test_docs(self): method doc_to_text (line 51) | def doc_to_text(self, doc): method should_decontaminate (line 63) | def should_decontaminate(self): method doc_to_decontamination_query (line 66) | def doc_to_decontamination_query(self, doc): method get_answers (line 70) | def get_answers(cls, doc, turn_id): method get_answer_choice (line 87) | def get_answer_choice(self, raw_text): method compute_scores (line 101) | def compute_scores(gold_list, pred): method doc_to_target (line 123) | def doc_to_target(self, doc, turnid=None): method construct_requests (line 130) | def construct_requests(self, doc, ctx): method process_results (line 144) | def process_results(self, doc, results): method higher_is_better (line 165) | def higher_is_better(self): method aggregation (line 171) | def aggregation(self): FILE: lm_eval/tasks/drop.py class DROP (line 39) | class DROP(Task): method has_training_docs (line 43) | def has_training_docs(self): method has_validation_docs (line 46) | def has_validation_docs(self): method has_test_docs (line 49) | def has_test_docs(self): method training_docs (line 52) | def training_docs(self): method validation_docs (line 57) | def validation_docs(self): method _process_doc (line 60) | def _process_doc(self, doc): method get_answers (line 69) | def get_answers(cls, qa): method parse_answer (line 100) | def parse_answer(cls, answer): method doc_to_text (line 112) | def doc_to_text(self, doc): method should_decontaminate (line 115) | def should_decontaminate(self): method doc_to_decontamination_query (line 118) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 121) | def doc_to_target(self, doc): method construct_requests (line 124) | def construct_requests(self, doc, ctx): method process_results (line 138) | def process_results(self, doc, results): method get_metrics (line 158) | def get_metrics(self, predicted, gold): method _answer_to_bags (line 181) | def _answer_to_bags(self, answer): method _align_bags (line 194) | def _align_bags(self, predicted, gold): method _compute_f1 (line 213) | def _compute_f1(self, predicted_bag, gold_bag): method _match_numbers_if_present (line 230) | def _match_numbers_if_present(self, gold_bag, predicted_bag): method _is_number (line 243) | def _is_number(self, text): method _remove_articles (line 250) | def _remove_articles(self, text): method _white_space_fix (line 253) | def _white_space_fix(self, text): method _remove_punc (line 256) | def _remove_punc(self, text): method _fix_number (line 263) | def _fix_number(self, text): method _tokenize (line 266) | def _tokenize(self, text): method _normalize (line 269) | def _normalize(self, answer): method aggregation (line 282) | def aggregation(self): method higher_is_better (line 290) | def higher_is_better(self): FILE: lm_eval/tasks/glue.py class CoLA (line 48) | class CoLA(Task): method has_training_docs (line 53) | def has_training_docs(self): method has_validation_docs (line 56) | def has_validation_docs(self): method has_test_docs (line 59) | def has_test_docs(self): method training_docs (line 62) | def training_docs(self): method validation_docs (line 67) | def validation_docs(self): method doc_to_text (line 70) | def doc_to_text(self, doc): method should_decontaminate (line 75) | def should_decontaminate(self): method doc_to_decontamination_query (line 78) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 81) | def doc_to_target(self, doc): method construct_requests (line 84) | def construct_requests(self, doc, ctx): method process_results (line 89) | def process_results(self, doc, results): method higher_is_better (line 95) | def higher_is_better(self): method aggregation (line 98) | def aggregation(self): class SST (line 102) | class SST(Task): method has_training_docs (line 107) | def has_training_docs(self): method has_validation_docs (line 110) | def has_validation_docs(self): method has_test_docs (line 113) | def has_test_docs(self): method training_docs (line 116) | def training_docs(self): method validation_docs (line 121) | def validation_docs(self): method doc_to_text (line 124) | def doc_to_text(self, doc): method doc_to_target (line 129) | def doc_to_target(self, doc): method construct_requests (line 132) | def construct_requests(self, doc, ctx): method process_results (line 137) | def process_results(self, doc, results): method higher_is_better (line 143) | def higher_is_better(self): method aggregation (line 146) | def aggregation(self): class MNLI (line 153) | class MNLI(Task): method has_training_docs (line 158) | def has_training_docs(self): method has_validation_docs (line 161) | def has_validation_docs(self): method has_test_docs (line 164) | def has_test_docs(self): method training_docs (line 167) | def training_docs(self): method validation_docs (line 172) | def validation_docs(self): method test_docs (line 176) | def test_docs(self): method doc_to_text (line 180) | def doc_to_text(self, doc): method doc_to_target (line 187) | def doc_to_target(self, doc): method construct_requests (line 193) | def construct_requests(self, doc, ctx): method process_results (line 199) | def process_results(self, doc, results): method higher_is_better (line 204) | def higher_is_better(self): method aggregation (line 207) | def aggregation(self): class MNLIMismatched (line 211) | class MNLIMismatched(MNLI): method validation_docs (line 214) | def validation_docs(self): method test_docs (line 218) | def test_docs(self): class QNLI (line 223) | class QNLI(Task): method has_training_docs (line 228) | def has_training_docs(self): method has_validation_docs (line 231) | def has_validation_docs(self): method has_test_docs (line 234) | def has_test_docs(self): method training_docs (line 237) | def training_docs(self): method validation_docs (line 242) | def validation_docs(self): method doc_to_text (line 245) | def doc_to_text(self, doc): method doc_to_target (line 253) | def doc_to_target(self, doc): method construct_requests (line 258) | def construct_requests(self, doc, ctx): method process_results (line 263) | def process_results(self, doc, results): method higher_is_better (line 269) | def higher_is_better(self): method aggregation (line 272) | def aggregation(self): class WNLI (line 276) | class WNLI(Task): method has_training_docs (line 281) | def has_training_docs(self): method has_validation_docs (line 284) | def has_validation_docs(self): method has_test_docs (line 287) | def has_test_docs(self): method training_docs (line 290) | def training_docs(self): method validation_docs (line 295) | def validation_docs(self): method doc_to_text (line 298) | def doc_to_text(self, doc): method doc_to_target (line 304) | def doc_to_target(self, doc): method construct_requests (line 309) | def construct_requests(self, doc, ctx): method process_results (line 314) | def process_results(self, doc, results): method higher_is_better (line 320) | def higher_is_better(self): method aggregation (line 323) | def aggregation(self): class RTE (line 327) | class RTE(Task): method has_training_docs (line 332) | def has_training_docs(self): method has_validation_docs (line 335) | def has_validation_docs(self): method has_test_docs (line 338) | def has_test_docs(self): method training_docs (line 341) | def training_docs(self): method validation_docs (line 346) | def validation_docs(self): method doc_to_text (line 349) | def doc_to_text(self, doc): method doc_to_target (line 355) | def doc_to_target(self, doc): method construct_requests (line 360) | def construct_requests(self, doc, ctx): method process_results (line 365) | def process_results(self, doc, results): method higher_is_better (line 371) | def higher_is_better(self): method aggregation (line 374) | def aggregation(self): class MRPC (line 381) | class MRPC(Task): method has_training_docs (line 386) | def has_training_docs(self): method has_validation_docs (line 389) | def has_validation_docs(self): method has_test_docs (line 392) | def has_test_docs(self): method training_docs (line 395) | def training_docs(self): method validation_docs (line 400) | def validation_docs(self): method doc_to_text (line 403) | def doc_to_text(self, doc): method doc_to_target (line 409) | def doc_to_target(self, doc): method construct_requests (line 412) | def construct_requests(self, doc, ctx): method process_results (line 417) | def process_results(self, doc, results): method higher_is_better (line 426) | def higher_is_better(self): method aggregation (line 429) | def aggregation(self): class QQP (line 433) | class QQP(Task): method has_training_docs (line 438) | def has_training_docs(self): method has_validation_docs (line 441) | def has_validation_docs(self): method has_test_docs (line 444) | def has_test_docs(self): method training_docs (line 447) | def training_docs(self): method validation_docs (line 452) | def validation_docs(self): method doc_to_text (line 455) | def doc_to_text(self, doc): method doc_to_target (line 461) | def doc_to_target(self, doc): method construct_requests (line 464) | def construct_requests(self, doc, ctx): method process_results (line 469) | def process_results(self, doc, results): method higher_is_better (line 478) | def higher_is_better(self): method aggregation (line 481) | def aggregation(self): class STSB (line 485) | class STSB(Task): method has_training_docs (line 490) | def has_training_docs(self): method has_validation_docs (line 493) | def has_validation_docs(self): method has_test_docs (line 496) | def has_test_docs(self): method training_docs (line 499) | def training_docs(self): method validation_docs (line 504) | def validation_docs(self): method test_docs (line 507) | def test_docs(self): method doc_to_text (line 510) | def doc_to_text(self, doc): method doc_to_target (line 516) | def doc_to_target(self, doc): method construct_requests (line 519) | def construct_requests(self, doc, ctx): method process_results (line 533) | def process_results(self, doc, results): method aggregation (line 546) | def aggregation(self): method higher_is_better (line 555) | def higher_is_better(self): FILE: lm_eval/tasks/gsm8k.py class GradeSchoolMath8K (line 40) | class GradeSchoolMath8K(Task): method has_training_docs (line 45) | def has_training_docs(self): method has_validation_docs (line 48) | def has_validation_docs(self): method has_test_docs (line 51) | def has_test_docs(self): method training_docs (line 54) | def training_docs(self): method validation_docs (line 57) | def validation_docs(self): method test_docs (line 60) | def test_docs(self): method doc_to_text (line 63) | def doc_to_text(self, doc): method doc_to_target (line 66) | def doc_to_target(self, doc): method construct_requests (line 69) | def construct_requests(self, doc, ctx): method _extract_answer (line 85) | def _extract_answer(self, completion): method _is_correct (line 94) | def _is_correct(self, completion, answer): method process_results (line 99) | def process_results(self, doc, results): method aggregation (line 113) | def aggregation(self): method higher_is_better (line 121) | def higher_is_better(self): FILE: lm_eval/tasks/headqa.py class HeadQABase (line 26) | class HeadQABase(MultipleChoiceTask): method has_training_docs (line 31) | def has_training_docs(self): method has_validation_docs (line 34) | def has_validation_docs(self): method has_test_docs (line 37) | def has_test_docs(self): method training_docs (line 40) | def training_docs(self): method validation_docs (line 45) | def validation_docs(self): method test_docs (line 48) | def test_docs(self): method _process_doc (line 51) | def _process_doc(self, doc): method doc_to_text (line 60) | def doc_to_text(self, doc): method should_decontaminate (line 63) | def should_decontaminate(self): method doc_to_decontamination_query (line 66) | def doc_to_decontamination_query(self, doc): class HeadQAEn (line 70) | class HeadQAEn(HeadQABase): class HeadQAEs (line 74) | class HeadQAEs(HeadQABase): class HeadQAEsDeprecated (line 79) | class HeadQAEsDeprecated(HeadQABase): method __init__ (line 82) | def __init__(self): FILE: lm_eval/tasks/hellaswag.py class HellaSwag (line 30) | class HellaSwag(MultipleChoiceTask): method has_training_docs (line 35) | def has_training_docs(self): method has_validation_docs (line 38) | def has_validation_docs(self): method has_test_docs (line 41) | def has_test_docs(self): method training_docs (line 44) | def training_docs(self): method validation_docs (line 49) | def validation_docs(self): method _process_doc (line 52) | def _process_doc(self, doc): method preprocess (line 62) | def preprocess(cls, text): method doc_to_text (line 70) | def doc_to_text(self, doc): method should_decontaminate (line 73) | def should_decontaminate(self): method doc_to_decontamination_query (line 76) | def doc_to_decontamination_query(self, doc): FILE: lm_eval/tasks/hendrycks_ethics.py class Ethics (line 35) | class Ethics(Task): method has_training_docs (line 39) | def has_training_docs(self): method has_validation_docs (line 42) | def has_validation_docs(self): method has_test_docs (line 45) | def has_test_docs(self): method training_docs (line 50) | def training_docs(self): method validation_docs (line 53) | def validation_docs(self): method test_docs (line 56) | def test_docs(self): method doc_to_text (line 60) | def doc_to_text(self, doc): method doc_to_target (line 64) | def doc_to_target(self, doc): method construct_requests (line 68) | def construct_requests(self, doc, ctx): method process_results (line 72) | def process_results(self, doc, results): method aggregation (line 76) | def aggregation(self): method higher_is_better (line 80) | def higher_is_better(self): class EthicsCM (line 84) | class EthicsCM(Ethics): method doc_to_text (line 88) | def doc_to_text(self, doc): method should_decontaminate (line 91) | def should_decontaminate(self): method doc_to_decontamination_query (line 94) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 97) | def doc_to_target(self, doc): method construct_requests (line 100) | def construct_requests(self, doc, ctx): method process_results (line 105) | def process_results(self, doc, results): method aggregation (line 111) | def aggregation(self): method higher_is_better (line 114) | def higher_is_better(self): class EthicsDeontology (line 118) | class EthicsDeontology(Ethics): method doc_to_text (line 122) | def doc_to_text(self, doc): method should_decontaminate (line 128) | def should_decontaminate(self): method doc_to_decontamination_query (line 131) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 134) | def doc_to_target(self, doc): method construct_requests (line 138) | def construct_requests(self, doc, ctx): method process_results (line 143) | def process_results(self, doc, results): method calc_em (line 148) | def calc_em(self, items): method aggregation (line 162) | def aggregation(self): method higher_is_better (line 165) | def higher_is_better(self): class EthicsJustice (line 169) | class EthicsJustice(Ethics): method doc_to_text (line 173) | def doc_to_text(self, doc): method should_decontaminate (line 178) | def should_decontaminate(self): method doc_to_decontamination_query (line 181) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 184) | def doc_to_target(self, doc): method construct_requests (line 188) | def construct_requests(self, doc, ctx): method process_results (line 193) | def process_results(self, doc, results): method calc_em (line 198) | def calc_em(self, items): method aggregation (line 212) | def aggregation(self): method higher_is_better (line 215) | def higher_is_better(self): class EthicsUtilitarianismOriginal (line 219) | class EthicsUtilitarianismOriginal(Ethics): method has_training_docs (line 223) | def has_training_docs(self): method fewshot_examples (line 227) | def fewshot_examples(self, k, rnd): method doc_to_text (line 251) | def doc_to_text(self, doc): method should_decontaminate (line 254) | def should_decontaminate(self): method doc_to_decontamination_query (line 257) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 260) | def doc_to_target(self, doc): method construct_requests (line 263) | def construct_requests(self, doc, ctx): method process_results (line 271) | def process_results(self, doc, results): method aggregation (line 285) | def aggregation(self): method higher_is_better (line 288) | def higher_is_better(self): class EthicsUtilitarianism (line 292) | class EthicsUtilitarianism(Ethics): method training_docs (line 301) | def training_docs(self): method validation_docs (line 305) | def validation_docs(self): method test_docs (line 308) | def test_docs(self): method _process_doc (line 312) | def _process_doc(self, doc): method doc_to_text (line 323) | def doc_to_text(self, doc): method doc_to_target (line 328) | def doc_to_target(self, doc): method construct_requests (line 331) | def construct_requests(self, doc, ctx): method process_results (line 336) | def process_results(self, doc, results): method aggregation (line 342) | def aggregation(self): method higher_is_better (line 345) | def higher_is_better(self): class EthicsVirtue (line 349) | class EthicsVirtue(Ethics): method _process_doc (line 353) | def _process_doc(self, doc): method doc_to_text (line 356) | def doc_to_text(self, doc): method doc_to_target (line 361) | def doc_to_target(self, doc): method construct_requests (line 364) | def construct_requests(self, doc, ctx): method process_results (line 369) | def process_results(self, doc, results): method calc_em (line 375) | def calc_em(self, items): method aggregation (line 390) | def aggregation(self): method higher_is_better (line 393) | def higher_is_better(self): FILE: lm_eval/tasks/hendrycks_math.py class Math (line 26) | class Math(Task): method has_training_docs (line 29) | def has_training_docs(self): method has_validation_docs (line 32) | def has_validation_docs(self): method has_test_docs (line 35) | def has_test_docs(self): method training_docs (line 38) | def training_docs(self): method validation_docs (line 41) | def validation_docs(self): method test_docs (line 44) | def test_docs(self): method _process_doc (line 47) | def _process_doc(self, doc): method doc_to_text (line 51) | def doc_to_text(self, doc): method should_decontaminate (line 54) | def should_decontaminate(self): method doc_to_decontamination_query (line 57) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 60) | def doc_to_target(self, doc): method construct_requests (line 63) | def construct_requests(self, doc, ctx): method process_results (line 66) | def process_results(self, doc, results): method aggregation (line 80) | def aggregation(self): method higher_is_better (line 83) | def higher_is_better(self): method is_equiv (line 86) | def is_equiv(self, str1, str2, verbose=False): method remove_boxed (line 102) | def remove_boxed(self, s): method last_boxed_only_string (line 115) | def last_boxed_only_string(self, string): method fix_fracs (line 145) | def fix_fracs(self, string): method fix_a_slash_b (line 176) | def fix_a_slash_b(self, string): method remove_right_units (line 190) | def remove_right_units(self, string): method fix_sqrt (line 199) | def fix_sqrt(self, string): class NotEqual (line 213) | class NotEqual: method __eq__ (line 214) | def __eq__(self, other): method strip_string (line 217) | def strip_string(self, string): class MathAlgebra (line 282) | class MathAlgebra(Math): class MathCountingAndProbability (line 287) | class MathCountingAndProbability(Math): class MathGeometry (line 292) | class MathGeometry(Math): class MathIntermediateAlgebra (line 297) | class MathIntermediateAlgebra(Math): class MathNumberTheory (line 302) | class MathNumberTheory(Math): class MathPrealgebra (line 307) | class MathPrealgebra(Math): class MathPrecalculus (line 312) | class MathPrecalculus(Math): FILE: lm_eval/tasks/hendrycks_test.py function create_all_tasks (line 89) | def create_all_tasks(): function create_task (line 97) | def create_task(subject): class GeneralHendrycksTest (line 105) | class GeneralHendrycksTest(MultipleChoiceTask): method __init__ (line 110) | def __init__(self, subject): method has_training_docs (line 114) | def has_training_docs(self): method has_validation_docs (line 117) | def has_validation_docs(self): method has_test_docs (line 120) | def has_test_docs(self): method validation_docs (line 123) | def validation_docs(self): method test_docs (line 126) | def test_docs(self): method _process_doc (line 129) | def _process_doc(self, doc): method fewshot_examples (line 156) | def fewshot_examples(self, k, rnd): method doc_to_text (line 165) | def doc_to_text(self, doc): method should_decontaminate (line 168) | def should_decontaminate(self): method doc_to_decontamination_query (line 171) | def doc_to_decontamination_query(self, doc): FILE: lm_eval/tasks/lambada.py class LambadaBase (line 32) | class LambadaBase(Task): method training_docs (line 35) | def training_docs(self): method validation_docs (line 39) | def validation_docs(self): method test_docs (line 43) | def test_docs(self): method doc_to_text (line 47) | def doc_to_text(self, doc): method should_decontaminate (line 50) | def should_decontaminate(self): method doc_to_decontamination_query (line 53) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 56) | def doc_to_target(self, doc): method construct_requests (line 59) | def construct_requests(self, doc, ctx): method process_results (line 64) | def process_results(self, doc, results): method aggregation (line 69) | def aggregation(self): method higher_is_better (line 72) | def higher_is_better(self): class LambadaStandard (line 76) | class LambadaStandard(LambadaBase): method has_training_docs (line 82) | def has_training_docs(self): method has_validation_docs (line 85) | def has_validation_docs(self): method has_test_docs (line 88) | def has_test_docs(self): class LambadaOpenAI (line 92) | class LambadaOpenAI(LambadaBase): method has_training_docs (line 103) | def has_training_docs(self): method has_validation_docs (line 106) | def has_validation_docs(self): method has_test_docs (line 109) | def has_test_docs(self): FILE: lm_eval/tasks/lambada_cloze.py class LambadaStandardCloze (line 31) | class LambadaStandardCloze(LambadaStandard): method doc_to_text (line 36) | def doc_to_text(self, doc): method should_decontaminate (line 39) | def should_decontaminate(self): method doc_to_decontamination_query (line 42) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 45) | def doc_to_target(self, doc): class LambadaOpenAICloze (line 49) | class LambadaOpenAICloze(LambadaOpenAI): method doc_to_text (line 54) | def doc_to_text(self, doc): method should_decontaminate (line 57) | def should_decontaminate(self): method doc_to_decontamination_query (line 60) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 63) | def doc_to_target(self, doc): FILE: lm_eval/tasks/lambada_multilingual.py class LambadaOpenAIMultilingualEnglish (line 33) | class LambadaOpenAIMultilingualEnglish(LambadaOpenAI): class LambadaOpenAIMultilingualFrench (line 38) | class LambadaOpenAIMultilingualFrench(LambadaOpenAI): class LambadaOpenAIMultilingualGerman (line 43) | class LambadaOpenAIMultilingualGerman(LambadaOpenAI): class LambadaOpenAIMultilingualItalian (line 48) | class LambadaOpenAIMultilingualItalian(LambadaOpenAI): class LambadaOpenAIMultilingualSpanish (line 53) | class LambadaOpenAIMultilingualSpanish(LambadaOpenAI): function construct_tasks (line 67) | def construct_tasks(): FILE: lm_eval/tasks/mathqa.py class MathQA (line 27) | class MathQA(MultipleChoiceTask): method has_training_docs (line 32) | def has_training_docs(self): method has_validation_docs (line 35) | def has_validation_docs(self): method has_test_docs (line 38) | def has_test_docs(self): method training_docs (line 41) | def training_docs(self): method validation_docs (line 46) | def validation_docs(self): method test_docs (line 49) | def test_docs(self): method _process_doc (line 52) | def _process_doc(self, doc): method doc_to_text (line 66) | def doc_to_text(self, doc): method should_decontaminate (line 69) | def should_decontaminate(self): method doc_to_decontamination_query (line 72) | def doc_to_decontamination_query(self, doc): FILE: lm_eval/tasks/mc_taco.py class MCTACO (line 37) | class MCTACO(Task): method has_training_docs (line 42) | def has_training_docs(self): method has_validation_docs (line 45) | def has_validation_docs(self): method has_test_docs (line 48) | def has_test_docs(self): method validation_docs (line 51) | def validation_docs(self): method test_docs (line 54) | def test_docs(self): method doc_to_text (line 57) | def doc_to_text(self, doc): method should_decontaminate (line 63) | def should_decontaminate(self): method doc_to_decontamination_query (line 66) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 69) | def doc_to_target(self, doc): method construct_requests (line 72) | def construct_requests(self, doc, ctx): method process_results (line 87) | def process_results(self, doc, results): method _question2id (line 104) | def _question2id(self, doc): method aggregation (line 108) | def aggregation(self): method higher_is_better (line 114) | def higher_is_better(self): function exact_match (line 121) | def exact_match(items): function f1 (line 133) | def f1(items): FILE: lm_eval/tasks/naturalqs.py class NaturalQs (line 32) | class NaturalQs(Task): method has_training_docs (line 37) | def has_training_docs(self): method has_validation_docs (line 40) | def has_validation_docs(self): method has_test_docs (line 43) | def has_test_docs(self): method training_docs (line 46) | def training_docs(self): method validation_docs (line 53) | def validation_docs(self): method fewshot_examples (line 56) | def fewshot_examples(self, k, rnd): method doc_to_text (line 63) | def doc_to_text(self, doc): method should_decontaminate (line 66) | def should_decontaminate(self): method doc_to_decontamination_query (line 69) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 72) | def doc_to_target(self, doc): method construct_requests (line 91) | def construct_requests(self, doc, ctx): method process_results (line 105) | def process_results(self, doc, results): method aggregation (line 118) | def aggregation(self): method higher_is_better (line 127) | def higher_is_better(self): FILE: lm_eval/tasks/openbookqa.py class OpenBookQA (line 30) | class OpenBookQA(MultipleChoiceTask): method has_training_docs (line 35) | def has_training_docs(self): method has_validation_docs (line 38) | def has_validation_docs(self): method has_test_docs (line 41) | def has_test_docs(self): method training_docs (line 44) | def training_docs(self): method validation_docs (line 49) | def validation_docs(self): method test_docs (line 52) | def test_docs(self): method _process_doc (line 55) | def _process_doc(self, doc): method doc_to_text (line 64) | def doc_to_text(self, doc): method should_decontaminate (line 67) | def should_decontaminate(self): method doc_to_decontamination_query (line 70) | def doc_to_decontamination_query(self, doc): FILE: lm_eval/tasks/pile.py class PilePerplexityTask (line 26) | class PilePerplexityTask(PerplexityTask): method has_validation_docs (line 31) | def has_validation_docs(self): method has_test_docs (line 34) | def has_test_docs(self): method validation_docs (line 37) | def validation_docs(self): method test_docs (line 41) | def test_docs(self): class PileArxiv (line 46) | class PileArxiv(PilePerplexityTask): class PileBooks3 (line 50) | class PileBooks3(PilePerplexityTask): class PileBookCorpus2 (line 54) | class PileBookCorpus2(PilePerplexityTask): class PileDmMathematics (line 58) | class PileDmMathematics(PilePerplexityTask): class PileEnron (line 62) | class PileEnron(PilePerplexityTask): class PileEuroparl (line 66) | class PileEuroparl(PilePerplexityTask): class PileFreeLaw (line 70) | class PileFreeLaw(PilePerplexityTask): class PileGithub (line 74) | class PileGithub(PilePerplexityTask): class PileGutenberg (line 78) | class PileGutenberg(PilePerplexityTask): class PileHackernews (line 82) | class PileHackernews(PilePerplexityTask): class PileNIHExporter (line 86) | class PileNIHExporter(PilePerplexityTask): class PileOpenSubtitles (line 90) | class PileOpenSubtitles(PilePerplexityTask): class PileOpenWebText2 (line 94) | class PileOpenWebText2(PilePerplexityTask): class PilePhilPapers (line 98) | class PilePhilPapers(PilePerplexityTask): class PilePileCc (line 102) | class PilePileCc(PilePerplexityTask): class PilePubmedAbstracts (line 106) | class PilePubmedAbstracts(PilePerplexityTask): class PilePubmedCentral (line 110) | class PilePubmedCentral(PilePerplexityTask): class PileStackExchange (line 114) | class PileStackExchange(PilePerplexityTask): class PileUspto (line 118) | class PileUspto(PilePerplexityTask): class PileUbuntuIrc (line 122) | class PileUbuntuIrc(PilePerplexityTask): class PileWikipedia (line 126) | class PileWikipedia(PilePerplexityTask): class PileYoutubeSubtitles (line 130) | class PileYoutubeSubtitles(PilePerplexityTask): FILE: lm_eval/tasks/piqa.py class PiQA (line 29) | class PiQA(MultipleChoiceTask): method has_training_docs (line 34) | def has_training_docs(self): method has_validation_docs (line 37) | def has_validation_docs(self): method has_test_docs (line 40) | def has_test_docs(self): method training_docs (line 43) | def training_docs(self): method validation_docs (line 48) | def validation_docs(self): method _process_doc (line 51) | def _process_doc(self, doc): method doc_to_text (line 59) | def doc_to_text(self, doc): method should_decontaminate (line 62) | def should_decontaminate(self): method doc_to_decontamination_query (line 65) | def doc_to_decontamination_query(self, doc): FILE: lm_eval/tasks/prost.py class PROST (line 38) | class PROST(MultipleChoiceTask): method has_training_docs (line 43) | def has_training_docs(self): method has_validation_docs (line 46) | def has_validation_docs(self): method has_test_docs (line 49) | def has_test_docs(self): method test_docs (line 52) | def test_docs(self): method fewshot_context (line 55) | def fewshot_context( method _process_doc (line 65) | def _process_doc(self, doc): method doc_to_text (line 73) | def doc_to_text(self, doc): method should_decontaminate (line 76) | def should_decontaminate(self): method doc_to_decontamination_query (line 79) | def doc_to_decontamination_query(self, doc): FILE: lm_eval/tasks/pubmedqa.py class Pubmed_QA (line 34) | class Pubmed_QA(Task): method has_training_docs (line 39) | def has_training_docs(self): method has_validation_docs (line 42) | def has_validation_docs(self): method has_test_docs (line 45) | def has_test_docs(self): method test_docs (line 48) | def test_docs(self): method doc_to_text (line 53) | def doc_to_text(self, doc): method should_decontaminate (line 59) | def should_decontaminate(self): method doc_to_decontamination_query (line 62) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 65) | def doc_to_target(self, doc): method construct_requests (line 68) | def construct_requests(self, doc, ctx): method process_results (line 77) | def process_results(self, doc, results): method aggregation (line 85) | def aggregation(self): method higher_is_better (line 88) | def higher_is_better(self): FILE: lm_eval/tasks/qa4mre.py class QA4MRE (line 29) | class QA4MRE(MultipleChoiceTask): method has_training_docs (line 34) | def has_training_docs(self): method has_validation_docs (line 37) | def has_validation_docs(self): method has_test_docs (line 40) | def has_test_docs(self): method test_docs (line 43) | def test_docs(self): method _process_doc (line 47) | def _process_doc(self, doc): method doc_to_text (line 57) | def doc_to_text(self, doc): method should_decontaminate (line 60) | def should_decontaminate(self): method doc_to_decontamination_query (line 63) | def doc_to_decontamination_query(self, doc): class QA4MRE_2011 (line 67) | class QA4MRE_2011(QA4MRE): class QA4MRE_2012 (line 71) | class QA4MRE_2012(QA4MRE): class QA4MRE_2013 (line 75) | class QA4MRE_2013(QA4MRE): FILE: lm_eval/tasks/qasper.py function normalize_answer (line 43) | def normalize_answer(s): function categorise_answer (line 65) | def categorise_answer(answer_blob): function token_f1_score (line 88) | def token_f1_score(prediction, ground_truth): class QASPER (line 104) | class QASPER(Task): method has_training_docs (line 109) | def has_training_docs(self): method has_validation_docs (line 112) | def has_validation_docs(self): method has_test_docs (line 115) | def has_test_docs(self): method doc_to_text (line 118) | def doc_to_text(self, doc): method doc_to_target (line 132) | def doc_to_target(self, doc): method training_docs (line 138) | def training_docs(self): method validation_docs (line 142) | def validation_docs(self): method _process_doc (line 146) | def _process_doc(self, doc): method process_results (line 167) | def process_results(self, doc, results): method aggregation (line 198) | def aggregation(self): method construct_requests (line 204) | def construct_requests(self, doc, ctx): method higher_is_better (line 225) | def higher_is_better(self): FILE: lm_eval/tasks/quac.py class QuAC (line 26) | class QuAC(Task): method has_training_docs (line 30) | def has_training_docs(self): method has_validation_docs (line 33) | def has_validation_docs(self): method has_test_docs (line 36) | def has_test_docs(self): method training_docs (line 39) | def training_docs(self): method validation_docs (line 44) | def validation_docs(self): method test_docs (line 47) | def test_docs(self): method _process_doc (line 50) | def _process_doc(self, doc): method doc_to_text (line 54) | def doc_to_text(self, doc): method should_decontaminate (line 68) | def should_decontaminate(self): method doc_to_decontamination_query (line 71) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 74) | def doc_to_target(self, doc): method construct_requests (line 77) | def construct_requests(self, doc, ctx): method process_results (line 91) | def process_results(self, doc, results): method aggregation (line 104) | def aggregation(self): method higher_is_better (line 113) | def higher_is_better(self): FILE: lm_eval/tasks/race.py class each (line 29) | class each: method __init__ (line 30) | def __init__(self, f): method __rrshift__ (line 33) | def __rrshift__(self, other): class RACE (line 37) | class RACE(Task): method has_training_docs (line 45) | def has_training_docs(self): method has_validation_docs (line 48) | def has_validation_docs(self): method has_test_docs (line 51) | def has_test_docs(self): method _collate_data (line 54) | def _collate_data(self, set): method training_docs (line 87) | def training_docs(self): method validation_docs (line 90) | def validation_docs(self): method test_docs (line 93) | def test_docs(self): method get_answer_option (line 97) | def get_answer_option(cls, problem): method last_problem (line 102) | def last_problem(cls, doc): method doc_to_text (line 105) | def doc_to_text(self, doc): method should_decontaminate (line 119) | def should_decontaminate(self): method doc_to_decontamination_query (line 122) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 125) | def doc_to_target(self, doc): method construct_requests (line 128) | def construct_requests(self, doc, ctx): method process_results (line 145) | def process_results(self, doc, results): method aggregation (line 159) | def aggregation(self): method higher_is_better (line 167) | def higher_is_better(self): FILE: lm_eval/tasks/sciq.py class SciQ (line 25) | class SciQ(MultipleChoiceTask): method has_training_docs (line 30) | def has_training_docs(self): method has_validation_docs (line 33) | def has_validation_docs(self): method has_test_docs (line 36) | def has_test_docs(self): method training_docs (line 39) | def training_docs(self): method validation_docs (line 44) | def validation_docs(self): method test_docs (line 47) | def test_docs(self): method _process_doc (line 50) | def _process_doc(self, doc): method doc_to_text (line 66) | def doc_to_text(self, doc): method should_decontaminate (line 69) | def should_decontaminate(self): method doc_to_decontamination_query (line 72) | def doc_to_decontamination_query(self, doc): FILE: lm_eval/tasks/squad.py function _squad_metric (line 35) | def _squad_metric(predictions, references): function _squad_agg (line 40) | def _squad_agg(key, items): class SQuAD2 (line 46) | class SQuAD2(Task): method has_training_docs (line 56) | def has_training_docs(self): method has_validation_docs (line 59) | def has_validation_docs(self): method has_test_docs (line 62) | def has_test_docs(self): method training_docs (line 65) | def training_docs(self): method validation_docs (line 68) | def validation_docs(self): method doc_to_text (line 71) | def doc_to_text(self, doc): method should_decontaminate (line 85) | def should_decontaminate(self): method doc_to_decontamination_query (line 88) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 91) | def doc_to_target(self, doc): method construct_requests (line 99) | def construct_requests(self, doc, ctx): method process_results (line 114) | def process_results(self, doc, results): method aggregation (line 171) | def aggregation(self): method higher_is_better (line 204) | def higher_is_better(self): FILE: lm_eval/tasks/storycloze.py class StoryCloze (line 36) | class StoryCloze(Task): method __init__ (line 41) | def __init__(self, data_dir: str): method has_training_docs (line 49) | def has_training_docs(self): method has_validation_docs (line 52) | def has_validation_docs(self): method has_test_docs (line 55) | def has_test_docs(self): method training_docs (line 58) | def training_docs(self): method validation_docs (line 61) | def validation_docs(self): method test_docs (line 64) | def test_docs(self): method doc_to_text (line 67) | def doc_to_text(self, doc): method should_decontaminate (line 77) | def should_decontaminate(self): method doc_to_decontamination_query (line 80) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 90) | def doc_to_target(self, doc): method construct_requests (line 95) | def construct_requests(self, doc, ctx): method process_results (line 110) | def process_results(self, doc, results): method aggregation (line 124) | def aggregation(self): method higher_is_better (line 132) | def higher_is_better(self): class StoryCloze2016 (line 141) | class StoryCloze2016(StoryCloze): class StoryCloze2018 (line 145) | class StoryCloze2018(StoryCloze): FILE: lm_eval/tasks/superglue.py class BoolQ (line 35) | class BoolQ(Task): method has_training_docs (line 40) | def has_training_docs(self): method has_validation_docs (line 43) | def has_validation_docs(self): method has_test_docs (line 46) | def has_test_docs(self): method training_docs (line 49) | def training_docs(self): method validation_docs (line 54) | def validation_docs(self): method doc_to_text (line 57) | def doc_to_text(self, doc): method should_decontaminate (line 60) | def should_decontaminate(self): method doc_to_decontamination_query (line 63) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 66) | def doc_to_target(self, doc): method construct_requests (line 69) | def construct_requests(self, doc, ctx): method process_results (line 76) | def process_results(self, doc, results): method higher_is_better (line 84) | def higher_is_better(self): method aggregation (line 87) | def aggregation(self): class CommitmentBank (line 91) | class CommitmentBank(Task): method has_training_docs (line 96) | def has_training_docs(self): method has_validation_docs (line 99) | def has_validation_docs(self): method has_test_docs (line 102) | def has_test_docs(self): method training_docs (line 105) | def training_docs(self): method validation_docs (line 110) | def validation_docs(self): method doc_to_text (line 113) | def doc_to_text(self, doc): method doc_to_target (line 119) | def doc_to_target(self, doc): method construct_requests (line 125) | def construct_requests(self, doc, ctx): method process_results (line 132) | def process_results(self, doc, results): method higher_is_better (line 139) | def higher_is_better(self): method cb_multi_fi (line 143) | def cb_multi_fi(cls, items): method aggregation (line 153) | def aggregation(self): class Copa (line 160) | class Copa(Task): method has_training_docs (line 165) | def has_training_docs(self): method has_validation_docs (line 168) | def has_validation_docs(self): method has_test_docs (line 171) | def has_test_docs(self): method training_docs (line 174) | def training_docs(self): method validation_docs (line 179) | def validation_docs(self): method doc_to_text (line 182) | def doc_to_text(self, doc): method doc_to_target (line 190) | def doc_to_target(self, doc): method construct_requests (line 195) | def construct_requests(self, doc, ctx): method process_results (line 204) | def process_results(self, doc, results): method higher_is_better (line 211) | def higher_is_better(self): method aggregation (line 214) | def aggregation(self): method convert_choice (line 218) | def convert_choice(choice): class MultiRC (line 222) | class MultiRC(Task): method has_training_docs (line 227) | def has_training_docs(self): method has_validation_docs (line 230) | def has_validation_docs(self): method has_test_docs (line 233) | def has_test_docs(self): method training_docs (line 236) | def training_docs(self): method validation_docs (line 241) | def validation_docs(self): method doc_to_text (line 244) | def doc_to_text(self, doc): method doc_to_target (line 247) | def doc_to_target(self, doc): method format_answer (line 251) | def format_answer(answer, label): method construct_requests (line 255) | def construct_requests(self, doc, ctx): method process_results (line 264) | def process_results(self, doc, results): method higher_is_better (line 269) | def higher_is_better(self): method aggregation (line 272) | def aggregation(self): class ReCoRD (line 276) | class ReCoRD(Task): method has_training_docs (line 281) | def has_training_docs(self): method has_validation_docs (line 284) | def has_validation_docs(self): method has_test_docs (line 287) | def has_test_docs(self): method training_docs (line 290) | def training_docs(self): method validation_docs (line 299) | def validation_docs(self): method _process_doc (line 305) | def _process_doc(cls, doc): method doc_to_text (line 313) | def doc_to_text(self, doc): method format_answer (line 321) | def format_answer(cls, query, entity): method doc_to_target (line 324) | def doc_to_target(self, doc): method construct_requests (line 328) | def construct_requests(self, doc, ctx): method process_results (line 335) | def process_results(self, doc, results): method higher_is_better (line 356) | def higher_is_better(self): method aggregation (line 362) | def aggregation(self): class WordsInContext (line 369) | class WordsInContext(Task): method has_training_docs (line 374) | def has_training_docs(self): method has_validation_docs (line 377) | def has_validation_docs(self): method has_test_docs (line 380) | def has_test_docs(self): method training_docs (line 383) | def training_docs(self): method validation_docs (line 388) | def validation_docs(self): method doc_to_text (line 391) | def doc_to_text(self, doc): method doc_to_target (line 401) | def doc_to_target(self, doc): method construct_requests (line 404) | def construct_requests(self, doc, ctx): method process_results (line 410) | def process_results(self, doc, results): method higher_is_better (line 418) | def higher_is_better(self): method aggregation (line 421) | def aggregation(self): class SGWinogradSchemaChallenge (line 425) | class SGWinogradSchemaChallenge(Task): method has_training_docs (line 432) | def has_training_docs(self): method has_validation_docs (line 435) | def has_validation_docs(self): method has_test_docs (line 438) | def has_test_docs(self): method training_docs (line 441) | def training_docs(self): method validation_docs (line 450) | def validation_docs(self): method doc_to_text (line 453) | def doc_to_text(self, doc): method doc_to_target (line 468) | def doc_to_target(self, doc): method construct_requests (line 471) | def construct_requests(self, doc, ctx): method process_results (line 478) | def process_results(self, doc, results): method higher_is_better (line 486) | def higher_is_better(self): method aggregation (line 489) | def aggregation(self): FILE: lm_eval/tasks/swag.py class SWAG (line 28) | class SWAG(MultipleChoiceTask): method has_training_docs (line 33) | def has_training_docs(self): method has_validation_docs (line 36) | def has_validation_docs(self): method has_test_docs (line 39) | def has_test_docs(self): method training_docs (line 42) | def training_docs(self): method validation_docs (line 47) | def validation_docs(self): method _process_doc (line 50) | def _process_doc(self, doc): method doc_to_text (line 58) | def doc_to_text(self, doc): FILE: lm_eval/tasks/translation.py function create_tasks_from_benchmarks (line 52) | def create_tasks_from_benchmarks(benchmark_dict): function zh_split (line 78) | def zh_split(zh_text: List[str]) -> List[str]: function ja_split (line 89) | def ja_split(ja_text: List[str]) -> List[str]: function create_translation_task (line 107) | def create_translation_task(dataset, language_pair, version=0): class GeneralTranslationTask (line 117) | class GeneralTranslationTask(Task): method __init__ (line 121) | def __init__(self, sacrebleu_dataset, sacrebleu_language_pair=None): method download (line 128) | def download(self, data_dir=None, cache_dir=None, download_mode=None): method has_training_docs (line 138) | def has_training_docs(self): method has_validation_docs (line 143) | def has_validation_docs(self): method has_test_docs (line 147) | def has_test_docs(self): method test_docs (line 151) | def test_docs(self): method doc_to_text (line 160) | def doc_to_text(self, doc): method should_decontaminate (line 166) | def should_decontaminate(self): method doc_to_decontamination_query (line 169) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 172) | def doc_to_target(self, doc): method construct_requests (line 176) | def construct_requests(self, doc, ctx): method process_results (line 189) | def process_results(self, doc, results): method aggregation (line 205) | def aggregation(self): method higher_is_better (line 217) | def higher_is_better(self): method __str__ (line 229) | def __str__(self): function code_to_language (line 241) | def code_to_language(code): FILE: lm_eval/tasks/triviaqa.py class TriviaQA (line 29) | class TriviaQA(Task): method has_training_docs (line 34) | def has_training_docs(self): method has_validation_docs (line 37) | def has_validation_docs(self): method has_test_docs (line 40) | def has_test_docs(self): method training_docs (line 43) | def training_docs(self): method validation_docs (line 46) | def validation_docs(self): method test_docs (line 49) | def test_docs(self): method doc_to_text (line 52) | def doc_to_text(self, doc): method should_decontaminate (line 55) | def should_decontaminate(self): method doc_to_decontamination_query (line 58) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 61) | def doc_to_target(self, doc): method _remove_prefixes (line 64) | def _remove_prefixes(self, aliases): method construct_requests (line 74) | def construct_requests(self, doc, ctx): method process_results (line 81) | def process_results(self, doc, results): method aggregation (line 84) | def aggregation(self): method higher_is_better (line 89) | def higher_is_better(self): FILE: lm_eval/tasks/truthfulqa.py class TruthfulQAMultipleChoice (line 67) | class TruthfulQAMultipleChoice(Task): method has_training_docs (line 72) | def has_training_docs(self): method has_validation_docs (line 75) | def has_validation_docs(self): method has_test_docs (line 78) | def has_test_docs(self): method training_docs (line 81) | def training_docs(self): method validation_docs (line 84) | def validation_docs(self): method test_docs (line 87) | def test_docs(self): method doc_to_text (line 90) | def doc_to_text(self, doc): method should_decontaminate (line 93) | def should_decontaminate(self): method doc_to_decontamination_query (line 96) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 99) | def doc_to_target(self, doc): method fewshot_context (line 102) | def fewshot_context( method construct_requests (line 112) | def construct_requests(self, doc, ctx): method process_results (line 133) | def process_results(self, doc, results): method aggregation (line 161) | def aggregation(self): method higher_is_better (line 164) | def higher_is_better(self): class TruthfulQAGeneration (line 168) | class TruthfulQAGeneration(Task): method __init__ (line 173) | def __init__(self): method has_training_docs (line 183) | def has_training_docs(self): method has_validation_docs (line 186) | def has_validation_docs(self): method has_test_docs (line 189) | def has_test_docs(self): method training_docs (line 192) | def training_docs(self): method _format_answers (line 195) | def _format_answers(self, answers): method validation_docs (line 207) | def validation_docs(self): method test_docs (line 219) | def test_docs(self): method doc_to_text (line 222) | def doc_to_text(self, doc): method doc_to_target (line 225) | def doc_to_target(self, doc): method fewshot_context (line 228) | def fewshot_context( method construct_requests (line 238) | def construct_requests(self, doc, ctx): method process_results (line 253) | def process_results(self, doc, results): method aggregation (line 332) | def aggregation(self): method higher_is_better (line 351) | def higher_is_better(self): method bleu (line 370) | def bleu(self, refs, preds): method rouge (line 392) | def rouge(self, refs, preds): FILE: lm_eval/tasks/webqs.py class WebQs (line 34) | class WebQs(Task): method has_training_docs (line 39) | def has_training_docs(self): method has_validation_docs (line 42) | def has_validation_docs(self): method has_test_docs (line 45) | def has_test_docs(self): method training_docs (line 48) | def training_docs(self): method test_docs (line 53) | def test_docs(self): method doc_to_text (line 56) | def doc_to_text(self, doc): method should_decontaminate (line 59) | def should_decontaminate(self): method doc_to_decontamination_query (line 62) | def doc_to_decontamination_query(self, doc): method doc_to_target (line 65) | def doc_to_target(self, doc): method _remove_prefixes (line 71) | def _remove_prefixes(self, aliases): method construct_requests (line 82) | def construct_requests(self, doc, ctx): method process_results (line 89) | def process_results(self, doc, results): method aggregation (line 92) | def aggregation(self): method higher_is_better (line 97) | def higher_is_better(self): FILE: lm_eval/tasks/wikitext.py function wikitext_detokenizer (line 28) | def wikitext_detokenizer(string): class WikiText (line 62) | class WikiText(PerplexityTask): method has_training_docs (line 67) | def has_training_docs(self): method has_validation_docs (line 70) | def has_validation_docs(self): method has_test_docs (line 73) | def has_test_docs(self): method training_docs (line 76) | def training_docs(self): method validation_docs (line 79) | def validation_docs(self): method test_docs (line 82) | def test_docs(self): method _process_doc (line 85) | def _process_doc(self, doc): method doc_to_target (line 88) | def doc_to_target(self, doc): method should_decontaminate (line 91) | def should_decontaminate(self): method count_words (line 94) | def count_words(self, doc): FILE: lm_eval/tasks/winogrande.py class Winogrande (line 32) | class Winogrande(Task): method has_training_docs (line 39) | def has_training_docs(self): method has_validation_docs (line 42) | def has_validation_docs(self): method has_test_docs (line 45) | def has_test_docs(self): method training_docs (line 48) | def training_docs(self): method validation_docs (line 53) | def validation_docs(self): method doc_to_text (line 56) | def doc_to_text(self, doc): method should_decontaminate (line 59) | def should_decontaminate(self): method doc_to_decontamination_query (line 62) | def doc_to_decontamination_query(self, doc): method partial_context (line 66) | def partial_context(cls, doc, option): method doc_to_target (line 72) | def doc_to_target(self, doc): method partial_target (line 76) | def partial_target(cls, doc): method construct_requests (line 81) | def construct_requests(self, doc, ctx): method append_context (line 101) | def append_context(cls, ctx, partial_ctx): method process_results (line 106) | def process_results(self, doc, results): method aggregation (line 118) | def aggregation(self): method higher_is_better (line 126) | def higher_is_better(self): FILE: lm_eval/tasks/wsc273.py class WinogradSchemaChallenge273 (line 38) | class WinogradSchemaChallenge273(Task): method has_training_docs (line 57) | def has_training_docs(self): method has_validation_docs (line 60) | def has_validation_docs(self): method has_test_docs (line 63) | def has_test_docs(self): method test_docs (line 66) | def test_docs(self): method _process_doc (line 69) | def _process_doc(self, doc): method __normalize_option (line 76) | def __normalize_option(self, doc, option): method fewshot_examples (line 87) | def fewshot_examples(self, k, rnd): method doc_to_text (line 96) | def doc_to_text(self, doc): method should_decontaminate (line 99) | def should_decontaminate(self): method doc_to_decontamination_query (line 102) | def doc_to_decontamination_query(self, doc): method partial_context (line 106) | def partial_context(cls, doc, option): method doc_to_target (line 111) | def doc_to_target(self, doc): method partial_target (line 115) | def partial_target(cls, doc): method construct_requests (line 120) | def construct_requests(self, doc, ctx): method append_context (line 140) | def append_context(cls, ctx, partial_ctx): method process_results (line 145) | def process_results(self, doc, results): method aggregation (line 157) | def aggregation(self): method higher_is_better (line 165) | def higher_is_better(self): FILE: lm_eval/utils.py class ExitCodeError (line 9) | class ExitCodeError(Exception): function sh (line 13) | def sh(x): function simple_parse_args_string (line 18) | def simple_parse_args_string(args_string): function join_iters (line 35) | def join_iters(iters): function chunks (line 40) | def chunks(iter, n): function group (line 52) | def group(arr, fn): function general_detokenize (line 61) | def general_detokenize(string): function get_rolling_token_windows (line 71) | def get_rolling_token_windows(token_list, prefix_token, max_seq_len, con... function make_disjoint_window (line 112) | def make_disjoint_window(pair): class Reorderer (line 118) | class Reorderer: method __init__ (line 119) | def __init__(self, arr, fn): method get_reordered (line 128) | def get_reordered(self): method get_original (line 131) | def get_original(self, newarr): function positional_deprecated (line 145) | def positional_deprecated(fn): function find_test_root (line 165) | def find_test_root(start_path: pathlib.Path) -> pathlib.Path: FILE: main.py function main (line 11) | def main(task_name: str, **kwargs): FILE: mmlu.py function get_choices (line 18) | def get_choices(): function get_subcategories (line 22) | def get_subcategories(): function get_categories (line 84) | def get_categories(): function format_subject (line 106) | def format_subject(subject): function format_example (line 114) | def format_example(df, idx, include_answer=True): function gen_prompt (line 125) | def gen_prompt(train_df, subject, k=-1): function evaluate (line 136) | def evaluate(args, subject, model: EvalModel, dev_df, test_df): function main (line 168) | def main(data_dir: str = "data/mmlu", ntrain: int = 5, **kwargs): FILE: modeling.py class EvalModel (line 34) | class EvalModel(BaseModel, arbitrary_types_allowed=True): method run (line 39) | def run(self, prompt: str, **kwargs) -> str: method count_text_length (line 42) | def count_text_length(self, text: str) -> int: method check_valid_length (line 45) | def check_valid_length(self, text: str) -> bool: method load (line 48) | def load(self): class OpenAIModel (line 52) | class OpenAIModel(EvalModel): method load (line 62) | def load(self): method run (line 76) | def run(self, prompt: str, **kwargs) -> str: method count_text_length (line 105) | def count_text_length(self, text: str) -> int: method get_choice (line 109) | def get_choice(self, prompt: str, **kwargs) -> str: class SeqToSeqModel (line 133) | class SeqToSeqModel(EvalModel): method load (line 141) | def load(self): method run (line 155) | def run(self, prompt: str, **kwargs) -> str: method count_text_length (line 165) | def count_text_length(self, text: str) -> int: method get_choice (line 169) | def get_choice(self, text: str, **kwargs) -> Tuple[float, float]: class CausalModel (line 188) | class CausalModel(SeqToSeqModel): method load (line 189) | def load(self): method run (line 205) | def run(self, prompt: str, **kwargs) -> str: method get_choice (line 220) | def get_choice(self, text: str, **kwargs) -> Tuple[float, float]: class LlamaModel (line 235) | class LlamaModel(SeqToSeqModel): method load (line 243) | def load(self): method run (line 257) | def run(self, prompt: str, **kwargs) -> str: method get_choice (line 287) | def get_choice(self, text: str, **kwargs) -> Tuple[float, float]: function find_layers (line 302) | def find_layers(module, layers=(nn.Conv2d, nn.Linear), name=""): function noop (line 315) | def noop(*args, **kwargs): function load_quant (line 320) | def load_quant( class GPTQModel (line 370) | class GPTQModel(LlamaModel): method load (line 377) | def load(self): method test_max_length (line 398) | def test_max_length(self): class ChatGLMModel (line 404) | class ChatGLMModel(SeqToSeqModel): method load (line 405) | def load(self): method run (line 417) | def run(self, prompt: str, **kwargs) -> str: class RWKVModel (line 428) | class RWKVModel(EvalModel): method download (line 435) | def download(self, url: str) -> str: method load (line 441) | def load(self): method run (line 449) | def run(self, prompt: str, **kwargs) -> str: method count_text_length (line 487) | def count_text_length(self, text: str) -> int: function select_model (line 492) | def select_model(model_name: str, **kwargs) -> EvalModel: function test_model (line 508) | def test_model( FILE: quant/custom_autotune.py class Autotuner (line 14) | class Autotuner(triton.KernelInterface): method __init__ (line 15) | def __init__( method _bench (line 64) | def _bench(self, *args, config, **meta): method run (line 96) | def run(self, *args, **kwargs): method prune_configs (line 133) | def prune_configs(self, kwargs): method warmup (line 157) | def warmup(self, *args, **kwargs): function autotune (line 170) | def autotune( function matmul248_kernel_config_pruner (line 217) | def matmul248_kernel_config_pruner(configs, nargs): FILE: quant/fused_attn.py function rotate_half_kernel (line 9) | def rotate_half_kernel( function triton_rotate_half_ (line 64) | def triton_rotate_half_(qk, position_ids, config=None): class QuantLlamaAttention (line 112) | class QuantLlamaAttention(nn.Module): method __init__ (line 115) | def __init__(self, hidden_size, num_heads, qkv_proj, o_proj): method forward (line 129) | def forward( function make_quant_attn (line 192) | def make_quant_attn(model): FILE: quant/fused_mlp.py function fusedmatmul_248_kernel (line 125) | def fusedmatmul_248_kernel( function silu (line 252) | def silu(x): class QuantLlamaMLP (line 259) | class QuantLlamaMLP(nn.Module): method __init__ (line 260) | def __init__( method forward (line 284) | def forward(self, x): method triton_llama_mlp (line 287) | def triton_llama_mlp(self, x): method fused2cuda (line 326) | def fused2cuda(self): method fused2cpu (line 336) | def fused2cpu(self): function make_fused_mlp (line 347) | def make_fused_mlp(m, parent_name=""): function autotune_warmup_fused (line 362) | def autotune_warmup_fused(model): FILE: quant/quant_linear.py function matmul_248_kernel (line 105) | def matmul_248_kernel( function transpose_matmul_248_kernel (line 290) | def transpose_matmul_248_kernel( function matmul248 (line 394) | def matmul248(input, qweight, scales, qzeros, g_idx, bits, maxq): function transpose_matmul248 (line 427) | def transpose_matmul248(input, qweight, scales, qzeros, g_idx, bits, maxq): class QuantLinearFunction (line 461) | class QuantLinearFunction(torch.autograd.Function): method forward (line 464) | def forward(ctx, input, qweight, scales, qzeros, g_idx, bits, maxq): method backward (line 472) | def backward(ctx, grad_output): class QuantLinear (line 484) | class QuantLinear(nn.Module): method __init__ (line 485) | def __init__(self, bits, groupsize, infeatures, outfeatures, bias): method pack (line 526) | def pack(self, linear, scales, zeros, g_idx=None): method forward (line 583) | def forward(self, x): function make_quant_linear (line 598) | def make_quant_linear(module, names, bits, groupsize, name=""): function autotune_warmup_linear (line 623) | def autotune_warmup_linear(model, transpose=False): FILE: quant/quantizer.py class Quantizer (line 7) | class Quantizer(nn.Module): method __init__ (line 8) | def __init__(self, shape=1): method configure (line 14) | def configure( method _quantize (line 37) | def _quantize(self, x, scale, zero, maxq): method find_params (line 43) | def find_params(self, x, weight=False): method quantize (line 128) | def quantize(self, x): method enabled (line 134) | def enabled(self): method ready (line 137) | def ready(self): FILE: quant/triton_norm.py function rms_norm_fwd_fused (line 9) | def rms_norm_fwd_fused( class TritonLlamaRMSNorm (line 43) | class TritonLlamaRMSNorm(nn.Module): method __init__ (line 44) | def __init__(self, weight, eps=1e-6): method forward (line 52) | def forward(self, x): function make_quant_norm (line 78) | def make_quant_norm(model): FILE: red-eval/generate_responses.py function chat_completion (line 73) | def chat_completion(system, prompt): function clean_thoughts_ (line 99) | def clean_thoughts_(response): function get_context (line 112) | def get_context(file_name): function gen_prompt (line 117) | def gen_prompt(q, ctx): function process_data (line 121) | def process_data(dataset, ctx, nsamples): FILE: red-eval/gpt4_as_judge.py function clean_thoughts_ (line 45) | def clean_thoughts_(response): function chat_completion (line 61) | def chat_completion(system, prompt): function annotate_tag (line 108) | def annotate_tag(r): FILE: subjective.py class SubjectiveSample (line 17) | class SubjectiveSample(BaseModel): class SubjectiveData (line 26) | class SubjectiveData(BaseModel): method load (line 30) | def load(cls, path: str): method load_from_huggingface (line 48) | def load_from_huggingface(cls, path: str): method save (line 54) | def save(self, path: str): method analyze (line 60) | def analyze(self): function test_data (line 72) | def test_data(path: str = "data/SubjectiveData.csv"): function write_answers (line 77) | def write_answers( function score_answers (line 98) | def score_answers(mode: str, folder: str, **kwargs): function analyze_scores (line 152) | def analyze_scores(pattern: str):