gitextract_ai_l2u9q/ ├── .gitignore ├── LICENSE ├── README.md ├── data/ │ └── mmlu/ │ ├── five_shot_mmlu_test.json │ ├── five_shot_mmlu_val.json │ ├── zero_shot_mmlu_test.json │ └── zero_shot_mmlu_val.json ├── eval/ │ ├── EVAL_README.md │ ├── eval_gpt_review.py │ ├── generations/ │ │ ├── oa/ │ │ │ ├── 13b-alpaca-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 13b-chip2-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 13b-flan-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 13b-guanaco-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 13b-hh-rlhf-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 13b-longform-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 13b-self-instruct-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 13b-unnatural-instructions-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 30b-alpaca-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 30b-chip2-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 30b-flan-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 30b-guanaco-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 30b-hh-rlhf-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 30b-longform-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 30b-self-instruct-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 30b-unnatural-instructions-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 65b-alpaca-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 65b-chip2-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 65b-flan-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 65b-guanaco-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 65b-hh-rlhf-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 65b-longform-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 65b-self-instruct-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 65b-unnatural-instructions-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 7b-alpaca-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 7b-chip2-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 7b-flan-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 7b-guanaco-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 7b-hh-rlhf-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 7b-longform-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 7b-self-instruct-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── 7b-unnatural-instructions-oa-generations-topp0.9-temp0.7.jsonl │ │ │ ├── gpt-3.5-oa-generations.jsonl │ │ │ ├── gpt-4-oa-generations.jsonl │ │ │ └── vicuna-13b-oa-generations.jsonl │ │ └── vicuna/ │ │ ├── 13-self-instruct-vicuna-generations-topp0.9-temp0.7-secondround.jsonl │ │ ├── 13b-alpaca-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 13b-chip2-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 13b-flan-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 13b-guanaco-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 13b-hh-rlhf-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 13b-longform-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 13b-self-instruct-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 13b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 30b-alpaca-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 30b-chip2-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 30b-flan-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 30b-guanaco-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 30b-hh-rlhf-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 30b-longform-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 30b-self-instruct-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 30b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 65b-alpaca-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 65b-chip2-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 65b-flan-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 65b-guanaco-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 65b-hh-rlhf-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 65b-longform-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 65b-self-instruct-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 65b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 7b-alpaca-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 7b-chip2-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 7b-flan-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 7b-guanaco-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 7b-hh-rlhf-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 7b-longform-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 7b-self-instruct-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── 7b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7.jsonl │ │ ├── answer_bard.jsonl │ │ ├── answer_gpt35.jsonl │ │ ├── answer_gpt4.jsonl │ │ └── answer_vicuna-13b.jsonl │ ├── generations_qualitative_comparison_guanaco65b_vs_gpt35.ipynb │ ├── prompts/ │ │ ├── oa_prompt_threeclass.jsonl │ │ ├── oa_questions.jsonl │ │ ├── oa_reviewer.jsonl │ │ ├── reviewer.jsonl │ │ ├── vicuna_prompt_relative.jsonl │ │ ├── vicuna_prompt_threeclass.jsonl │ │ └── vicuna_questions.jsonl │ ├── qa_baseline_gpt.py │ ├── ratings-gpt4/ │ │ ├── oa/ │ │ │ ├── 13b-guanaco-oa-generations-topp0.9-temp0.7-vs-30b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-oa-generations-topp0.9-temp0.7-vs-65b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-oa-generations-topp0.9-temp0.7-vs-7b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-oa-generations-topp0.9-temp0.7-vs-gpt-3.5-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-oa-generations-topp0.9-temp0.7-vs-gpt-4-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-oa-generations-topp0.9-temp0.7-vs-vicuna-13b-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-oa-generations-topp0.9-temp0.7-vs-13b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-oa-generations-topp0.9-temp0.7-vs-65b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-oa-generations-topp0.9-temp0.7-vs-7b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-oa-generations-topp0.9-temp0.7-vs-gpt-3.5-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-oa-generations-topp0.9-temp0.7-vs-gpt-4-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-oa-generations-topp0.9-temp0.7-vs-vicuna-13b-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-oa-generations-topp0.9-temp0.7-vs-13b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-oa-generations-topp0.9-temp0.7-vs-30b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-oa-generations-topp0.9-temp0.7-vs-7b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-oa-generations-topp0.9-temp0.7-vs-gpt-3.5-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-oa-generations-topp0.9-temp0.7-vs-gpt-4-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-oa-generations-topp0.9-temp0.7-vs-vicuna-13b-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-oa-generations-topp0.9-temp0.7-vs-13b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-oa-generations-topp0.9-temp0.7-vs-30b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-oa-generations-topp0.9-temp0.7-vs-65b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-oa-generations-topp0.9-temp0.7-vs-gpt-3.5-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-oa-generations-topp0.9-temp0.7-vs-gpt-4-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-oa-generations-topp0.9-temp0.7-vs-vicuna-13b-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-3.5-oa-generations-vs-13b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-3.5-oa-generations-vs-30b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-3.5-oa-generations-vs-65b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-3.5-oa-generations-vs-7b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-3.5-oa-generations-vs-gpt-4-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-3.5-oa-generations-vs-vicuna-13b-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-4-oa-generations-vs-13b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-4-oa-generations-vs-30b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-4-oa-generations-vs-65b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-4-oa-generations-vs-7b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-4-oa-generations-vs-gpt-3.5-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── gpt-4-oa-generations-vs-vicuna-13b-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── vicuna-13b-oa-generations-vs-13b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── vicuna-13b-oa-generations-vs-30b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── vicuna-13b-oa-generations-vs-65b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── vicuna-13b-oa-generations-vs-7b-guanaco-oa-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── vicuna-13b-oa-generations-vs-gpt-3.5-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ │ └── vicuna-13b-oa-generations-vs-gpt-4-oa-generations-gpt-4-reviewer-threeclass.jsonl │ │ └── vicuna/ │ │ ├── pairwise/ │ │ │ ├── 13b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-30b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-65b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-7b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-bard-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-gpt35-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-gpt4-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 13b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-vicuna-13b-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-13b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-65b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-7b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-bard-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-gpt35-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-gpt4-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 30b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-vicuna-13b-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-13b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-30b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-7b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-bard-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-gpt35-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-gpt4-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 65b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-vicuna-13b-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-13b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-30b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-65b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-bard-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-gpt35-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-gpt4-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── 7b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-answer-vicuna-13b-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-bard-vs-13b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-bard-vs-30b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-bard-vs-65b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-bard-vs-7b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-bard-vs-answer-gpt35-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-bard-vs-answer-gpt4-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-bard-vs-answer-vicuna-13b-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt35-vs-13b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt35-vs-30b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt35-vs-65b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt35-vs-7b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt35-vs-answer-bard-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt35-vs-answer-gpt4-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt35-vs-answer-vicuna-13b-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt4-vs-13b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt4-vs-30b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt4-vs-65b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt4-vs-7b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt4-vs-answer-bard-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt4-vs-answer-gpt35-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-gpt4-vs-answer-vicuna-13b-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-vicuna-13b-vs-13b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-vicuna-13b-vs-30b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-vicuna-13b-vs-65b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-vicuna-13b-vs-7b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-vicuna-13b-vs-answer-bard-gpt-4-reviewer-threeclass.jsonl │ │ │ ├── answer-vicuna-13b-vs-answer-gpt35-gpt-4-reviewer-threeclass.jsonl │ │ │ └── answer-vicuna-13b-vs-answer-gpt4-gpt-4-reviewer-threeclass.jsonl │ │ └── relative-gpt-3.5/ │ │ ├── 13b-alpaca-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 13b-chip2-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 13b-flan-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 13b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 13b-hh-rlhf-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 13b-longform-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 13b-self-instruct-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 13b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 30b-alpaca-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 30b-chip2-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 30b-flan-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 30b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 30b-hh-rlhf-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 30b-huggingchat-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 30b-longform-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 30b-self-instruct-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 30b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 65b-alpaca-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 65b-chip2-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 65b-flan-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 65b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 65b-hh-rlhf-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 65b-longform-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 65b-self-instruct-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 65b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 7b-alpaca-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 7b-chip2-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 7b-flan-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 7b-guanaco-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 7b-hh-rlhf-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 7b-longform-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 7b-self-instruct-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── 7b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── bard-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-13b-alpaca-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-13b-chip2-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-13b-flan-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-13b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-13b-hh-rlhf-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-13b-longform-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-13b-self-instruct-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-13b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-30b-alpaca-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-30b-chip2-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-30b-flan-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-30b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-30b-hh-rlhf-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-30b-huggingchat-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-30b-longform-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-30b-self-instruct-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-30b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-65b-alpaca-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-65b-chip2-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-65b-flan-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-65b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-65b-hh-rlhf-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-65b-longform-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-65b-self-instruct-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-65b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-7b-alpaca-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-7b-chip2-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-7b-flan-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-7b-guanaco-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-7b-hh-rlhf-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-7b-longform-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-7b-self-instruct-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-7b-unnatural-instructions-vicuna-generations-topp0.9-temp0.7-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-bard-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-gpt4-gpt4-reviewer.jsonl │ │ ├── gpt-3.5-vs-vicuna-13b-gpt4-reviewer.jsonl │ │ ├── gpt4-vs-gpt-3.5-gpt4-reviewer.jsonl │ │ └── vicuna-13b-vs-gpt-3.5-gpt4-reviewer.jsonl │ ├── ratings-human/ │ │ ├── mturk_ui.html │ │ └── vicuna_benchmark_human_annotations.csv │ └── requirements.txt ├── examples/ │ ├── guanaco_7B_demo_colab.ipynb │ └── guanaco_generate.py ├── qlora.py ├── requirements.txt └── scripts/ ├── finetune.sh ├── finetune_guanaco_13b.sh ├── finetune_guanaco_33b.sh ├── finetune_guanaco_65b.sh ├── finetune_guanaco_7b.sh ├── finetune_llama2_guanaco_7b.sh └── generate.sh