gitextract_cw0dli8t/

├── Experiments/
│   ├── HumanAlignment/
│   │   ├── HaluEval/
│   │   │   ├── README.md
│   │   │   ├── claude_halu.py
│   │   │   ├── open-source_model.py
│   │   │   ├── openai_gpt.py
│   │   │   ├── requirements.txt
│   │   │   └── test_halueval.sh
│   │   ├── README.md
│   │   ├── eval_Crows_Pairs.sh
│   │   ├── eval_RealToxityPrompts.sh
│   │   ├── eval_TruthfulQA.sh
│   │   ├── eval_WinoGender.sh
│   │   ├── metric/
│   │   │   ├── Winogender.py
│   │   │   ├── cal_crows_res.py
│   │   │   ├── cal_toxicity_score.py
│   │   │   ├── cal_truth_res.py
│   │   │   ├── cal_wino_res.py
│   │   │   ├── crows_pairs.py
│   │   │   ├── eval_truthfulqa.py
│   │   │   └── real-toxicity-prompts.py
│   │   ├── model/
│   │   │   ├── Alpaca.py
│   │   │   ├── ChatGLM.py
│   │   │   ├── ChatGPT.py
│   │   │   ├── Claude.py
│   │   │   ├── Falcon.py
│   │   │   ├── LLaMA.py
│   │   │   ├── Pythia.py
│   │   │   ├── Vicuna.py
│   │   │   ├── davinci-002.py
│   │   │   ├── davinci-003.py
│   │   │   └── train.py
│   │   └── requirements.txt
│   ├── InstructTuning/
│   │   ├── README.md
│   │   ├── auto_eval/
│   │   │   ├── eval.py
│   │   │   ├── eval_gpt-3.5-turbo-0301.json
│   │   │   └── generate.py
│   │   ├── bbh/
│   │   │   ├── config/
│   │   │   │   ├── benchmark/
│   │   │   │   │   └── bbh10k.yaml
│   │   │   │   └── model/
│   │   │   │       ├── chatgpt.yaml
│   │   │   │       ├── dummy.yaml
│   │   │   │       ├── llama.yaml
│   │   │   │       ├── local.yaml
│   │   │   │       └── openai.yaml
│   │   │   ├── data/
│   │   │   │   └── bbh_full.json
│   │   │   ├── output/
│   │   │   │   └── bbh10kbenchmark/
│   │   │   │       ├── dummy-model_english.json
│   │   │   │       └── llama-model_english.json
│   │   │   ├── scripts/
│   │   │   │   ├── test.sh
│   │   │   │   ├── testChatGPTModel.sh
│   │   │   │   ├── testDummyModel.sh
│   │   │   │   ├── testLocalModel.sh
│   │   │   │   └── testOpenAIModel.sh
│   │   │   └── src/
│   │   │       ├── BBH10KBenchmark.py
│   │   │       ├── main.py
│   │   │       └── model/
│   │   │           ├── ChatGPTModel.py
│   │   │           ├── DummyModel.py
│   │   │           ├── LLaMAModel.py
│   │   │           ├── LocalModel.py
│   │   │           ├── Model.py
│   │   │           ├── OpenAIModel.py
│   │   │           └── __init__.py
│   │   ├── mmlu/
│   │   │   ├── mmlu.py
│   │   │   ├── modeling.py
│   │   │   └── quant/
│   │   │       ├── __init__.py
│   │   │       ├── custom_autotune.py
│   │   │       ├── fused_attn.py
│   │   │       ├── fused_mlp.py
│   │   │       ├── quant_linear.py
│   │   │       ├── quantizer.py
│   │   │       └── triton_norm.py
│   │   ├── requirements.txt
│   │   ├── test.sh
│   │   ├── train.py
│   │   └── train.sh
│   ├── KnowledgeReasoning/
│   │   ├── ChatGPT.py
│   │   ├── Claude.py
│   │   ├── data/
│   │   │   ├── hellaswag_valid.jsonl
│   │   │   ├── openbook_valid.jsonl
│   │   │   └── socialiqa_valid.jsonl
│   │   ├── davinci-002.py
│   │   ├── davinci-003.py
│   │   ├── get_response.py
│   │   └── run_all_models.sh
│   ├── KnowledgeUtilization/
│   │   ├── README.md
│   │   ├── WikiFact/
│   │   │   ├── em.py
│   │   │   ├── open-source_model.py
│   │   │   ├── wikifact_002.py
│   │   │   ├── wikifact_003.py
│   │   │   ├── wikifact_chatgpt.py
│   │   │   └── wikifact_claude.py
│   │   ├── requirements.txt
│   │   └── test_wikifact.sh
│   ├── LanguageGeneration/
│   │   ├── HumanEval/
│   │   │   ├── generate.py
│   │   │   ├── model.py
│   │   │   └── util.py
│   │   ├── LAMBADA/
│   │   │   ├── lambada_002.py
│   │   │   ├── lambada_003.py
│   │   │   ├── lambada_chatgpt.py
│   │   │   └── lambada_claude.py
│   │   ├── README.md
│   │   ├── WMT22/
│   │   │   ├── open-source_model.py
│   │   │   ├── test_bleu.py
│   │   │   ├── wmt-002.py
│   │   │   ├── wmt-003.py
│   │   │   ├── wmt_chatgpt.py
│   │   │   └── wmt_claude.py
│   │   ├── XSum/
│   │   │   ├── get_rougel.py
│   │   │   ├── open-source_model.py
│   │   │   ├── xsum_002.py
│   │   │   ├── xsum_003.py
│   │   │   ├── xsum_chatgpt.py
│   │   │   └── xsum_claude.py
│   │   ├── requirements.txt
│   │   ├── test_humaneval.sh
│   │   ├── test_lambada.sh
│   │   ├── test_wmt.sh
│   │   └── test_xsum.sh
│   ├── MathematicalReasoning/
│   │   ├── claude.py
│   │   ├── data_process.py
│   │   ├── dataset/
│   │   │   ├── gsm8k.json
│   │   │   └── math/
│   │   │       ├── test.json
│   │   │       └── train.json
│   │   ├── demo/
│   │   │   ├── aqua.json
│   │   │   ├── gsm8k.json
│   │   │   └── math.json
│   │   ├── do_gsm8k.py
│   │   ├── do_math.py
│   │   ├── evaluate.py
│   │   ├── evaluate_falcon.py
│   │   ├── prompt_pattern.py
│   │   ├── readme.md
│   │   ├── scripts/
│   │   │   ├── run_eval_gsm8k.sh
│   │   │   └── run_eval_math.sh
│   │   ├── solve_claude.py
│   │   ├── solve_text_002.py
│   │   ├── solve_text_003.py
│   │   ├── solve_turbo.py
│   │   ├── test_falcon_gsm8k.py
│   │   └── test_falcon_math.py
│   ├── README.md
│   ├── SymbolicReasoning/
│   │   ├── claude.py
│   │   ├── data_process.py
│   │   ├── dataset/
│   │   │   ├── colored_objects/
│   │   │   │   ├── demo.txt
│   │   │   │   └── test.json
│   │   │   └── penguins/
│   │   │       ├── demo.txt
│   │   │       ├── demo_falcon.txt
│   │   │       └── test.json
│   │   ├── do_color.py
│   │   ├── do_penguins.py
│   │   ├── evaluate_color.py
│   │   ├── evaluate_penguins.py
│   │   ├── prompt_pattern.py
│   │   ├── readme.md
│   │   ├── scripts/
│   │   │   ├── run_eval_color.sh
│   │   │   └── run_eval_penguins.sh
│   │   ├── solve_claude.py
│   │   ├── solve_text_002.py
│   │   ├── solve_text_003.py
│   │   ├── solve_turbo.py
│   │   ├── test_falcon_color.py
│   │   └── test_falcon_penguins.py
│   └── ToolManipulation/
│       ├── .idea/
│       │   ├── .gitignore
│       │   ├── ToolManipulation.iml
│       │   ├── inspectionProfiles/
│       │   │   └── profiles_settings.xml
│       │   ├── misc.xml
│       │   └── modules.xml
│       ├── Gorilla/
│       │   ├── data/
│       │   │   ├── README.md
│       │   │   ├── api/
│       │   │   │   ├── huggingface_api.jsonl
│       │   │   │   ├── tensorflowhub_api.jsonl
│       │   │   │   └── torchhub_api.jsonl
│       │   │   ├── apibench/
│       │   │   │   ├── huggingface_eval.json
│       │   │   │   ├── huggingface_train.json
│       │   │   │   ├── tensorflow_eval.json
│       │   │   │   ├── tensorflow_train.json
│       │   │   │   ├── torchhub_eval.json
│       │   │   │   └── torchhub_train.json
│       │   │   └── apizoo/
│       │   │       └── shawnharmsen1.json
│       │   ├── eval/
│       │   │   ├── README.md
│       │   │   ├── eval-data/
│       │   │   │   └── questions/
│       │   │   │       ├── huggingface/
│       │   │   │       │   ├── questions_huggingface_0_shot.jsonl
│       │   │   │       │   ├── questions_huggingface_bm25.jsonl
│       │   │   │       │   ├── questions_huggingface_gpt_index.jsonl
│       │   │   │       │   └── questions_huggingface_oracle.jsonl
│       │   │   │       ├── tensorflowhub/
│       │   │   │       │   ├── questions_tensorflowhub_0_shot.jsonl
│       │   │   │       │   ├── questions_tensorflowhub_bm25.jsonl
│       │   │   │       │   ├── questions_tensorflowhub_gpt_index.jsonl
│       │   │   │       │   └── questions_tensorflowhub_oracle.jsonl
│       │   │   │       └── torchhub/
│       │   │   │           ├── questions_torchhub_0_shot.jsonl
│       │   │   │           ├── questions_torchhub_bm25.jsonl
│       │   │   │           ├── questions_torchhub_gpt_index.jsonl
│       │   │   │           └── questions_torchhub_oracle.jsonl
│       │   │   ├── eval-scripts/
│       │   │   │   ├── ast_eval_hf.py
│       │   │   │   ├── ast_eval_tf.py
│       │   │   │   ├── ast_eval_th.py
│       │   │   │   └── codebleu/
│       │   │   │       ├── __init__.py
│       │   │   │       ├── bleu.py
│       │   │   │       ├── dataflow_match.py
│       │   │   │       ├── keywords/
│       │   │   │       │   ├── c_sharp.txt
│       │   │   │       │   ├── java.txt
│       │   │   │       │   └── python.txt
│       │   │   │       ├── parser/
│       │   │   │       │   ├── DFG.py
│       │   │   │       │   ├── __init__.py
│       │   │   │       │   ├── build.py
│       │   │   │       │   ├── build.sh
│       │   │   │       │   ├── tree-sitter-python/
│       │   │   │       │   │   ├── .gitattributes
│       │   │   │       │   │   ├── .github/
│       │   │   │       │   │   │   └── workflows/
│       │   │   │       │   │   │       └── ci.yml
│       │   │   │       │   │   ├── .gitignore
│       │   │   │       │   │   ├── .npmignore
│       │   │   │       │   │   ├── Cargo.toml
│       │   │   │       │   │   ├── LICENSE
│       │   │   │       │   │   ├── README.md
│       │   │   │       │   │   ├── binding.gyp
│       │   │   │       │   │   ├── bindings/
│       │   │   │       │   │   │   ├── node/
│       │   │   │       │   │   │   │   ├── binding.cc
│       │   │   │       │   │   │   │   └── index.js
│       │   │   │       │   │   │   └── rust/
│       │   │   │       │   │   │       ├── README.md
│       │   │   │       │   │   │       ├── build.rs
│       │   │   │       │   │   │       └── lib.rs
│       │   │   │       │   │   ├── examples/
│       │   │   │       │   │   │   ├── compound-statement-without-trailing-newline.py
│       │   │   │       │   │   │   ├── crlf-line-endings.py
│       │   │   │       │   │   │   ├── mixed-spaces-tabs.py
│       │   │   │       │   │   │   ├── multiple-newlines.py
│       │   │   │       │   │   │   ├── python2-grammar-crlf.py
│       │   │   │       │   │   │   ├── python2-grammar.py
│       │   │   │       │   │   │   ├── python3-grammar-crlf.py
│       │   │   │       │   │   │   ├── python3-grammar.py
│       │   │   │       │   │   │   ├── python3.8_grammar.py
│       │   │   │       │   │   │   ├── simple-statements-without-trailing-newline.py
│       │   │   │       │   │   │   ├── tabs.py
│       │   │   │       │   │   │   └── trailing-whitespace.py
│       │   │   │       │   │   ├── grammar.js
│       │   │   │       │   │   ├── package.json
│       │   │   │       │   │   ├── queries/
│       │   │   │       │   │   │   ├── highlights.scm
│       │   │   │       │   │   │   └── tags.scm
│       │   │   │       │   │   ├── script/
│       │   │   │       │   │   │   ├── known_failures.txt
│       │   │   │       │   │   │   └── parse-examples
│       │   │   │       │   │   ├── src/
│       │   │   │       │   │   │   ├── grammar.json
│       │   │   │       │   │   │   ├── node-types.json
│       │   │   │       │   │   │   ├── parser.c
│       │   │   │       │   │   │   ├── scanner.cc
│       │   │   │       │   │   │   └── tree_sitter/
│       │   │   │       │   │   │       └── parser.h
│       │   │   │       │   │   └── test/
│       │   │   │       │   │       ├── corpus/
│       │   │   │       │   │       │   ├── errors.txt
│       │   │   │       │   │       │   ├── expressions.txt
│       │   │   │       │   │       │   ├── literals.txt
│       │   │   │       │   │       │   ├── pattern_matching.txt
│       │   │   │       │   │       │   └── statements.txt
│       │   │   │       │   │       └── highlight/
│       │   │   │       │   │           ├── keywords.py
│       │   │   │       │   │           ├── parameters.py
│       │   │   │       │   │           └── pattern_matching.py
│       │   │   │       │   └── utils.py
│       │   │   │       ├── readme.txt
│       │   │   │       ├── syntax_check.py
│       │   │   │       ├── syntax_match.py
│       │   │   │       ├── utils.py
│       │   │   │       └── weighted_ngram_match.py
│       │   │   ├── get_hf_responses.py
│       │   │   ├── get_llm_responses.py
│       │   │   └── utils.py
│       │   ├── inference/
│       │   │   ├── README.md
│       │   │   ├── apply_delta.py
│       │   │   ├── example_questions/
│       │   │   │   └── example_questions.jsonl
│       │   │   ├── gorilla_eval.py
│       │   │   ├── requirements.txt
│       │   │   └── serve/
│       │   │       ├── conv_template.py
│       │   │       └── gorilla_cli.py
│       │   └── requirements.txt
│       ├── HotPotQA/
│       │   ├── base_config.yaml
│       │   ├── data/
│       │   │   ├── hotpot_dev_v1_simplified.json
│       │   │   ├── hotpot_test_v1_simplified.json
│       │   │   ├── hotpot_train_v1.1_simplified.json
│       │   │   └── paper_dev.jsonl
│       │   ├── hotpotqa-chat.py
│       │   ├── hotpotqa-claude.py
│       │   ├── hotpotqa-hf.py
│       │   ├── hotpotqa.py
│       │   ├── prompts/
│       │   │   ├── alfworld.json
│       │   │   ├── alfworld_3prompts.json
│       │   │   ├── fever.json
│       │   │   └── prompts_naive.json
│       │   ├── wikienv.py
│       │   └── wrappers.py
│       └── README.md
├── Prompts/
│   └── README.md
└── README.md