gitextract_2il4qejt/

├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── ask-a-question.md
│   │   └── bug-report.yaml
│   ├── scripts/
│   │   └── check_double_quotes.py
│   └── workflows/
│       ├── basic-tests-latest-python.yml
│       ├── basic-tests-linux-uv.yml
│       ├── basic-tests-macos-uv.yml
│       ├── basic-tests-old-pytorch.yml
│       ├── basic-tests-pip.yml
│       ├── basic-tests-pixi.yml
│       ├── basic-tests-pytorch-rc.yml
│       ├── basic-tests-windows-uv-pip.yml
│       ├── basic-tests-windows-uv-pip.yml.disabled
│       ├── basic-tests-windows-uv.yml.disabled
│       ├── check-links.yml
│       ├── check-spelling-errors.yml
│       └── pep8-linter.yml
├── .gitignore
├── .gitmodules
├── CITATION.cff
├── LICENSE.txt
├── README.md
├── appendix-A/
│   ├── 01_main-chapter-code/
│   │   ├── DDP-script-torchrun.py
│   │   ├── DDP-script.py
│   │   ├── README.md
│   │   ├── code-part1.ipynb
│   │   ├── code-part2.ipynb
│   │   └── exercise-solutions.ipynb
│   ├── 02_setup-recommendations/
│   │   └── README.md
│   └── README.md
├── appendix-B/
│   └── README.md
├── appendix-C/
│   └── README.md
├── appendix-D/
│   ├── 01_main-chapter-code/
│   │   ├── appendix-D.ipynb
│   │   └── previous_chapters.py
│   └── README.md
├── appendix-E/
│   ├── 01_main-chapter-code/
│   │   ├── appendix-E.ipynb
│   │   ├── gpt_download.py
│   │   └── previous_chapters.py
│   └── README.md
├── ch01/
│   ├── README.md
│   └── reading-recommendations.md
├── ch02/
│   ├── 01_main-chapter-code/
│   │   ├── README.md
│   │   ├── ch02.ipynb
│   │   ├── dataloader.ipynb
│   │   └── exercise-solutions.ipynb
│   ├── 02_bonus_bytepair-encoder/
│   │   ├── README.md
│   │   ├── bpe_openai_gpt2.py
│   │   ├── compare-bpe-tiktoken.ipynb
│   │   └── requirements-extra.txt
│   ├── 03_bonus_embedding-vs-matmul/
│   │   ├── README.md
│   │   └── embeddings-and-linear-layers.ipynb
│   ├── 04_bonus_dataloader-intuition/
│   │   ├── README.md
│   │   └── dataloader-intuition.ipynb
│   ├── 05_bpe-from-scratch/
│   │   ├── README.md
│   │   ├── bpe-from-scratch-simple.ipynb
│   │   ├── bpe-from-scratch.ipynb
│   │   └── tests.py
│   └── README.md
├── ch03/
│   ├── 01_main-chapter-code/
│   │   ├── README.md
│   │   ├── ch03.ipynb
│   │   ├── exercise-solutions.ipynb
│   │   ├── multihead-attention.ipynb
│   │   └── small-text-sample.txt
│   ├── 02_bonus_efficient-multihead-attention/
│   │   ├── README.md
│   │   ├── mha-implementations.ipynb
│   │   └── tests/
│   │       └── test_mha_implementations.py
│   ├── 03_understanding-buffers/
│   │   ├── README.md
│   │   └── understanding-buffers.ipynb
│   └── README.md
├── ch04/
│   ├── 01_main-chapter-code/
│   │   ├── README.md
│   │   ├── ch04.ipynb
│   │   ├── exercise-solutions.ipynb
│   │   ├── gpt.py
│   │   ├── previous_chapters.py
│   │   └── tests.py
│   ├── 02_performance-analysis/
│   │   ├── README.md
│   │   ├── flops-analysis.ipynb
│   │   └── requirements-extra.txt
│   ├── 03_kv-cache/
│   │   ├── README.md
│   │   ├── gpt_ch04.py
│   │   ├── gpt_with_kv_cache.py
│   │   ├── gpt_with_kv_cache_optimized.py
│   │   └── tests.py
│   ├── 04_gqa/
│   │   ├── README.md
│   │   ├── gpt_with_kv_gqa.py
│   │   ├── gpt_with_kv_mha.py
│   │   ├── memory_estimator_gqa.py
│   │   └── plot_memory_estimates_gqa.py
│   ├── 05_mla/
│   │   ├── README.md
│   │   ├── gpt_with_kv_mha.py
│   │   ├── gpt_with_kv_mla.py
│   │   ├── memory_estimator_mla.py
│   │   └── plot_memory_estimates_mla.py
│   ├── 06_swa/
│   │   ├── README.md
│   │   ├── gpt_with_kv_mha.py
│   │   ├── gpt_with_kv_swa.py
│   │   ├── memory_estimator_swa.py
│   │   └── plot_memory_estimates_swa.py
│   ├── 07_moe/
│   │   ├── README.md
│   │   ├── gpt_with_kv_ffn.py
│   │   ├── gpt_with_kv_moe.py
│   │   ├── memory_estimator_moe.py
│   │   └── plot_memory_estimates_moe.py
│   ├── 08_deltanet/
│   │   ├── README.md
│   │   └── plot_memory_estimates_gated_deltanet.py
│   └── README.md
├── ch05/
│   ├── 01_main-chapter-code/
│   │   ├── README.md
│   │   ├── ch05.ipynb
│   │   ├── exercise-solutions.ipynb
│   │   ├── gpt_download.py
│   │   ├── gpt_generate.py
│   │   ├── gpt_train.py
│   │   ├── previous_chapters.py
│   │   └── tests.py
│   ├── 02_alternative_weight_loading/
│   │   ├── README.md
│   │   ├── weight-loading-hf-safetensors.ipynb
│   │   ├── weight-loading-hf-transformers.ipynb
│   │   └── weight-loading-pytorch.ipynb
│   ├── 03_bonus_pretraining_on_gutenberg/
│   │   ├── README.md
│   │   ├── prepare_dataset.py
│   │   ├── pretraining_simple.py
│   │   └── tests.py
│   ├── 04_learning_rate_schedulers/
│   │   └── README.md
│   ├── 05_bonus_hparam_tuning/
│   │   ├── README.md
│   │   └── hparam_search.py
│   ├── 06_user_interface/
│   │   ├── README.md
│   │   ├── app_orig.py
│   │   ├── app_own.py
│   │   └── requirements-extra.txt
│   ├── 07_gpt_to_llama/
│   │   ├── README.md
│   │   ├── converting-gpt-to-llama2.ipynb
│   │   ├── converting-llama2-to-llama3.ipynb
│   │   ├── previous_chapters.py
│   │   ├── requirements-extra.txt
│   │   ├── standalone-llama32.ipynb
│   │   └── tests/
│   │       ├── test-requirements-extra.txt
│   │       ├── test_llama32_nb.py
│   │       └── tests_rope_and_parts.py
│   ├── 08_memory_efficient_weight_loading/
│   │   ├── README.md
│   │   ├── memory-efficient-state-dict.ipynb
│   │   └── previous_chapters.py
│   ├── 09_extending-tokenizers/
│   │   ├── README.md
│   │   └── extend-tiktoken.ipynb
│   ├── 10_llm-training-speed/
│   │   ├── 00_orig.py
│   │   ├── 01_opt_single_gpu.py
│   │   ├── 02_opt_multi_gpu_ddp.py
│   │   └── README.md
│   ├── 11_qwen3/
│   │   ├── README.md
│   │   ├── qwen3-chat-interface/
│   │   │   ├── README.md
│   │   │   ├── qwen3-chat-interface-multiturn.py
│   │   │   ├── qwen3-chat-interface.py
│   │   │   └── requirements-extra.txt
│   │   ├── standalone-qwen3-moe-plus-kvcache.ipynb
│   │   ├── standalone-qwen3-moe.ipynb
│   │   ├── standalone-qwen3-plus-kvcache.ipynb
│   │   ├── standalone-qwen3.ipynb
│   │   └── tests/
│   │       ├── test_qwen3_kvcache_nb.py
│   │       └── test_qwen3_nb.py
│   ├── 12_gemma3/
│   │   ├── README.md
│   │   ├── standalone-gemma3-plus-kvcache.ipynb
│   │   ├── standalone-gemma3.ipynb
│   │   └── tests/
│   │       ├── test_gemma3_kv_nb.py
│   │       └── test_gemma3_nb.py
│   ├── 13_olmo3/
│   │   ├── README.md
│   │   ├── standalone-olmo3-plus-kv-cache.ipynb
│   │   ├── standalone-olmo3.ipynb
│   │   └── tests/
│   │       ├── olmo3_layer_debugger.py
│   │       ├── test_olmo3_kvcache_nb.py
│   │       └── test_olmo3_nb.py
│   ├── 14_ch05_with_other_llms/
│   │   ├── README.md
│   │   ├── ch05-llama32.ipynb
│   │   └── ch05-qwen3.ipynb
│   ├── 15_tiny-aya/
│   │   ├── README.md
│   │   ├── standalone-tiny-aya-plus-kv-cache.ipynb
│   │   ├── standalone-tiny-aya.ipynb
│   │   └── tests/
│   │       ├── test_tiny_aya_kvcache_nb.py
│   │       ├── test_tiny_aya_nb.py
│   │       └── tiny_aya_layer_debugger.py
│   ├── 16_qwen3.5/
│   │   ├── README.md
│   │   ├── qwen3.5-plus-kv-cache.ipynb
│   │   ├── qwen3.5.ipynb
│   │   ├── qwen3_5_transformers.py
│   │   └── tests/
│   │       ├── qwen3_5_layer_debugger.py
│   │       └── test_qwen3_5_nb.py
│   └── README.md
├── ch06/
│   ├── 01_main-chapter-code/
│   │   ├── README.md
│   │   ├── ch06.ipynb
│   │   ├── exercise-solutions.ipynb
│   │   ├── gpt_class_finetune.py
│   │   ├── gpt_download.py
│   │   ├── load-finetuned-model.ipynb
│   │   ├── previous_chapters.py
│   │   └── tests.py
│   ├── 02_bonus_additional-experiments/
│   │   ├── README.md
│   │   ├── additional_experiments.py
│   │   ├── gpt_download.py
│   │   └── previous_chapters.py
│   ├── 03_bonus_imdb-classification/
│   │   ├── README.md
│   │   ├── download_prepare_dataset.py
│   │   ├── gpt_download.py
│   │   ├── previous_chapters.py
│   │   ├── requirements-extra.txt
│   │   ├── sklearn-baseline.ipynb
│   │   ├── train_bert_hf.py
│   │   ├── train_bert_hf_spam.py
│   │   ├── train_gpt.py
│   │   └── train_sklearn_logreg.py
│   ├── 04_user_interface/
│   │   ├── README.md
│   │   ├── app.py
│   │   └── requirements-extra.txt
│   └── README.md
├── ch07/
│   ├── 01_main-chapter-code/
│   │   ├── README.md
│   │   ├── ch07.ipynb
│   │   ├── exercise-solutions.ipynb
│   │   ├── exercise_experiments.py
│   │   ├── gpt_download.py
│   │   ├── gpt_instruction_finetuning.py
│   │   ├── instruction-data-with-response.json
│   │   ├── instruction-data.json
│   │   ├── load-finetuned-model.ipynb
│   │   ├── ollama_evaluate.py
│   │   ├── previous_chapters.py
│   │   └── tests.py
│   ├── 02_dataset-utilities/
│   │   ├── README.md
│   │   ├── create-passive-voice-entries.ipynb
│   │   ├── find-near-duplicates.py
│   │   ├── instruction-examples.json
│   │   └── requirements-extra.txt
│   ├── 03_model-evaluation/
│   │   ├── README.md
│   │   ├── eval-example-data.json
│   │   ├── llm-instruction-eval-ollama.ipynb
│   │   ├── llm-instruction-eval-openai.ipynb
│   │   ├── requirements-extra.txt
│   │   └── scores/
│   │       ├── correlation-analysis.ipynb
│   │       ├── gpt4-model-1-response.json
│   │       ├── gpt4-model-2-response.json
│   │       ├── llama3-8b-model-1-response.json
│   │       └── llama3-8b-model-2-response.json
│   ├── 04_preference-tuning-with-dpo/
│   │   ├── README.md
│   │   ├── create-preference-data-ollama.ipynb
│   │   ├── dpo-from-scratch.ipynb
│   │   ├── instruction-data-with-preference.json
│   │   └── previous_chapters.py
│   ├── 05_dataset-generation/
│   │   ├── README.md
│   │   ├── instruction-data-llama3-7b.json
│   │   ├── llama3-ollama.ipynb
│   │   ├── reflection-gpt4.ipynb
│   │   └── requirements-extra.txt
│   ├── 06_user_interface/
│   │   ├── README.md
│   │   ├── app.py
│   │   └── requirements-extra.txt
│   └── README.md
├── conftest.py
├── pixi.toml
├── pkg/
│   └── llms_from_scratch/
│       ├── README.md
│       ├── __init__.py
│       ├── appendix_a.py
│       ├── appendix_d.py
│       ├── appendix_e.py
│       ├── ch02.py
│       ├── ch03.py
│       ├── ch04.py
│       ├── ch05.py
│       ├── ch06.py
│       ├── ch07.py
│       ├── generate.py
│       ├── kv_cache/
│       │   ├── __init__.py
│       │   ├── generate.py
│       │   ├── gpt2.py
│       │   ├── llama3.py
│       │   ├── qwen3.py
│       │   └── utils.py
│       ├── kv_cache_batched/
│       │   ├── __init__.py
│       │   ├── generate.py
│       │   ├── qwen3.py
│       │   └── utils.py
│       ├── llama3.py
│       ├── qwen3.py
│       ├── tests/
│       │   ├── test_appendix_a.py
│       │   ├── test_appendix_d.py
│       │   ├── test_appendix_e.py
│       │   ├── test_ch02.py
│       │   ├── test_ch03.py
│       │   ├── test_ch04.py
│       │   ├── test_ch05.py
│       │   ├── test_ch06.py
│       │   ├── test_ch07.py
│       │   ├── test_generate.py
│       │   ├── test_llama3.py
│       │   └── test_qwen3.py
│       └── utils.py
├── pyproject.toml
├── requirements.txt
└── setup/
    ├── 01_optional-python-setup-preferences/
    │   ├── README.md
    │   ├── native-pixi.md
    │   └── native-uv.md
    ├── 02_installing-python-libraries/
    │   ├── README.md
    │   ├── python_environment_check.ipynb
    │   ├── python_environment_check.py
    │   └── tests.py
    ├── 03_optional-docker-environment/
    │   ├── .devcontainer/
    │   │   ├── Dockerfile
    │   │   ├── README.md
    │   │   └── devcontainer.json
    │   └── README.md
    ├── 04_optional-aws-sagemaker-notebook/
    │   ├── README.md
    │   └── cloudformation-template.yml
    └── README.md