gitextract_mj0bsng8/

├── .dockerignore
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug_report.md
│   │   └── feature_request.md
│   ├── dependabot.yml
│   └── workflows/
│       ├── build-and-release.yaml
│       ├── build-docker.yaml
│       ├── build-wheels-cuda.yaml
│       ├── build-wheels-metal.yaml
│       ├── generate-index-from-release.yaml
│       ├── publish-to-test.yaml
│       ├── publish.yaml
│       ├── test-pypi.yaml
│       └── test.yaml
├── .gitignore
├── .gitmodules
├── .readthedocs.yaml
├── CHANGELOG.md
├── CMakeLists.txt
├── LICENSE.md
├── Makefile
├── README.md
├── docker/
│   ├── README.md
│   ├── cuda_simple/
│   │   └── Dockerfile
│   ├── open_llama/
│   │   ├── Dockerfile
│   │   ├── build.sh
│   │   ├── hug_model.py
│   │   ├── start.sh
│   │   └── start_server.sh
│   ├── openblas_simple/
│   │   └── Dockerfile
│   └── simple/
│       ├── Dockerfile
│       └── run.sh
├── docs/
│   ├── api-reference.md
│   ├── changelog.md
│   ├── index.md
│   ├── install/
│   │   └── macos.md
│   ├── requirements.txt
│   └── server.md
├── examples/
│   ├── batch-processing/
│   │   └── server.py
│   ├── gradio_chat/
│   │   ├── local.py
│   │   └── server.py
│   ├── hf_pull/
│   │   └── main.py
│   ├── high_level_api/
│   │   ├── fastapi_server.py
│   │   ├── high_level_api_embedding.py
│   │   ├── high_level_api_inference.py
│   │   ├── high_level_api_infill.py
│   │   ├── high_level_api_streaming.py
│   │   └── langchain_custom_llm.py
│   ├── low_level_api/
│   │   ├── Chat.py
│   │   ├── Miku.py
│   │   ├── ReasonAct.py
│   │   ├── common.py
│   │   ├── low_level_api_chat_cpp.py
│   │   ├── low_level_api_llama_cpp.py
│   │   ├── quantize.py
│   │   ├── readme/
│   │   │   └── low_level_api_llama_cpp.md
│   │   └── util.py
│   ├── notebooks/
│   │   ├── Batching.ipynb
│   │   ├── Clients.ipynb
│   │   ├── Functions.ipynb
│   │   ├── Guidance.ipynb
│   │   ├── Multimodal.ipynb
│   │   ├── OpenHermesFunctionCalling.ipynb
│   │   └── PerformanceTuning.ipynb
│   └── ray/
│       ├── README.md
│       ├── llm.py
│       └── requirements.txt
├── llama_cpp/
│   ├── __init__.py
│   ├── _ctypes_extensions.py
│   ├── _ggml.py
│   ├── _internals.py
│   ├── _logger.py
│   ├── _utils.py
│   ├── llama.py
│   ├── llama_cache.py
│   ├── llama_chat_format.py
│   ├── llama_cpp.py
│   ├── llama_grammar.py
│   ├── llama_speculative.py
│   ├── llama_tokenizer.py
│   ├── llama_types.py
│   ├── llava_cpp.py
│   ├── mtmd_cpp.py
│   ├── py.typed
│   └── server/
│       ├── __init__.py
│       ├── __main__.py
│       ├── app.py
│       ├── cli.py
│       ├── errors.py
│       ├── model.py
│       ├── settings.py
│       └── types.py
├── mkdocs.yml
├── pyproject.toml
├── scripts/
│   ├── get-releases.sh
│   └── releases-to-pep-503.sh
└── tests/
    ├── test_llama.py
    ├── test_llama_chat_format.py
    ├── test_llama_grammar.py
    └── test_llama_speculative.py