gitextract_s4bq7ahm/

├── .clang-format
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   ├── bug-report.md
│   │   ├── config.yml
│   │   ├── documentation.md
│   │   ├── feature-request.md
│   │   ├── general.md
│   │   ├── model-request.md
│   │   ├── speed-report.md
│   │   └── tracking.md
│   └── workflows/
│       ├── documentation.yaml
│       ├── update-relax.yaml
│       └── windows-build.yaml
├── .gitignore
├── .gitmodules
├── .pre-commit-config.yaml
├── .pylintrc
├── CMakeLists.txt
├── CONTRIBUTORS.md
├── LICENSE
├── NOTICE
├── README.md
├── android/
│   ├── .gitignore
│   ├── MLCChat/
│   │   ├── README.md
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       └── main/
│   │   │           ├── AndroidManifest.xml
│   │   │           ├── java/
│   │   │           │   └── ai/
│   │   │           │       └── mlc/
│   │   │           │           └── mlcchat/
│   │   │           │               ├── AppViewModel.kt
│   │   │           │               ├── ChatView.kt
│   │   │           │               ├── MainActivity.kt
│   │   │           │               ├── NavView.kt
│   │   │           │               ├── StartView.kt
│   │   │           │               └── ui/
│   │   │           │                   └── theme/
│   │   │           │                       ├── Color.kt
│   │   │           │                       ├── Theme.kt
│   │   │           │                       └── Type.kt
│   │   │           └── res/
│   │   │               ├── drawable/
│   │   │               │   ├── ic_android_black_24dp.xml
│   │   │               │   └── mlc_logo_108.xml
│   │   │               ├── values/
│   │   │               │   ├── colors.xml
│   │   │               │   ├── strings.xml
│   │   │               │   └── themes.xml
│   │   │               └── xml/
│   │   │                   ├── backup_rules.xml
│   │   │                   └── data_extraction_rules.xml
│   │   ├── build.gradle
│   │   ├── bundle_weight.py
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       ├── gradle-wrapper.jar
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   ├── mlc-package-config.json
│   │   └── settings.gradle
│   ├── MLCEngineExample/
│   │   ├── README.md
│   │   ├── app/
│   │   │   ├── .gitignore
│   │   │   ├── build.gradle
│   │   │   ├── proguard-rules.pro
│   │   │   └── src/
│   │   │       └── main/
│   │   │           ├── AndroidManifest.xml
│   │   │           ├── java/
│   │   │           │   └── ai/
│   │   │           │       └── mlc/
│   │   │           │           └── mlcengineexample/
│   │   │           │               ├── MainActivity.kt
│   │   │           │               └── ui/
│   │   │           │                   └── theme/
│   │   │           │                       ├── Color.kt
│   │   │           │                       ├── Theme.kt
│   │   │           │                       └── Type.kt
│   │   │           └── res/
│   │   │               ├── drawable/
│   │   │               │   ├── ic_android_black_24dp.xml
│   │   │               │   └── mlc_logo_108.xml
│   │   │               ├── values/
│   │   │               │   ├── colors.xml
│   │   │               │   ├── strings.xml
│   │   │               │   └── themes.xml
│   │   │               └── xml/
│   │   │                   ├── backup_rules.xml
│   │   │                   └── data_extraction_rules.xml
│   │   ├── build.gradle
│   │   ├── bundle_weight.py
│   │   ├── gradle/
│   │   │   └── wrapper/
│   │   │       ├── gradle-wrapper.jar
│   │   │       └── gradle-wrapper.properties
│   │   ├── gradle.properties
│   │   ├── gradlew
│   │   ├── gradlew.bat
│   │   ├── mlc-package-config.json
│   │   └── settings.gradle
│   ├── README.md
│   └── mlc4j/
│       ├── .gitignore
│       ├── CMakeLists.txt
│       ├── build.gradle
│       ├── prepare_libs.py
│       └── src/
│           ├── cpp/
│           │   └── tvm_runtime.h
│           └── main/
│               ├── AndroidManifest.xml
│               └── java/
│                   └── ai/
│                       └── mlc/
│                           └── mlcllm/
│                               ├── JSONFFIEngine.java
│                               ├── MLCEngine.kt
│                               └── OpenAIProtocol.kt
├── ci/
│   ├── bash.sh
│   ├── build-environment.yaml
│   ├── jenkinsfile.groovy
│   └── task/
│       ├── black.sh
│       ├── build_clean.sh
│       ├── build_lib.sh
│       ├── build_win.bat
│       ├── clang-format.sh
│       ├── isort.sh
│       ├── mypy.sh
│       ├── pylint.sh
│       ├── test_model_compile.sh
│       └── test_unittest.sh
├── cmake/
│   └── gen_cmake_config.py
├── cpp/
│   ├── base.h
│   ├── json_ffi/
│   │   ├── conv_template.cc
│   │   ├── conv_template.h
│   │   ├── image_utils.cc
│   │   ├── image_utils.h
│   │   ├── json_ffi_engine.cc
│   │   ├── json_ffi_engine.h
│   │   ├── openai_api_protocol.cc
│   │   └── openai_api_protocol.h
│   ├── metadata/
│   │   ├── model.cc
│   │   └── model.h
│   ├── multi_gpu/
│   │   ├── builtin.cc
│   │   └── multi_gpu_loader.cc
│   ├── serve/
│   │   ├── config.cc
│   │   ├── config.h
│   │   ├── data.cc
│   │   ├── data.h
│   │   ├── draft_token_workspace_manager.cc
│   │   ├── draft_token_workspace_manager.h
│   │   ├── engine.cc
│   │   ├── engine.h
│   │   ├── engine_actions/
│   │   │   ├── action.cc
│   │   │   ├── action.h
│   │   │   ├── action_commons.cc
│   │   │   ├── action_commons.h
│   │   │   ├── auto_spec_decode.cc
│   │   │   ├── batch_decode.cc
│   │   │   ├── batch_draft.cc
│   │   │   ├── batch_jumpforward.cc
│   │   │   ├── batch_prefill_base.cc
│   │   │   ├── batch_prefill_base.h
│   │   │   ├── batch_verify.cc
│   │   │   ├── disagg_prepare_recv.cc
│   │   │   ├── disagg_remote_send.cc
│   │   │   ├── eagle_batch_draft.cc
│   │   │   ├── eagle_batch_verify.cc
│   │   │   ├── eagle_new_request_prefill.cc
│   │   │   └── new_request_prefill.cc
│   │   ├── engine_state.cc
│   │   ├── engine_state.h
│   │   ├── event_trace_recorder.cc
│   │   ├── event_trace_recorder.h
│   │   ├── function_table.cc
│   │   ├── function_table.h
│   │   ├── logit_processor.cc
│   │   ├── logit_processor.h
│   │   ├── metrics.cc
│   │   ├── metrics.h
│   │   ├── model.cc
│   │   ├── model.h
│   │   ├── prefix_cache.cc
│   │   ├── prefix_cache.h
│   │   ├── radix_tree.cc
│   │   ├── radix_tree.h
│   │   ├── request.cc
│   │   ├── request.h
│   │   ├── request_state.cc
│   │   ├── request_state.h
│   │   ├── sampler/
│   │   │   ├── cpu_sampler.cc
│   │   │   ├── gpu_sampler.cc
│   │   │   └── sampler.h
│   │   ├── threaded_engine.cc
│   │   └── threaded_engine.h
│   ├── support/
│   │   ├── debug_utils.h
│   │   ├── dynamic_bitset.h
│   │   ├── encoding.cc
│   │   ├── encoding.h
│   │   ├── json_parser.h
│   │   ├── load_bytes_from_file.h
│   │   ├── progress_bar.h
│   │   ├── random.h
│   │   ├── result.h
│   │   ├── utils.h
│   │   ├── vlm_utils.cc
│   │   └── vlm_utils.h
│   └── tokenizers/
│       ├── streamer.cc
│       ├── streamer.h
│       ├── tokenizers.cc
│       └── tokenizers.h
├── docs/
│   ├── .gitignore
│   ├── Makefile
│   ├── README.md
│   ├── community/
│   │   ├── faq.rst
│   │   └── guideline.rst
│   ├── compilation/
│   │   ├── compile_models.rst
│   │   ├── configure_quantization.rst
│   │   ├── convert_weights.rst
│   │   ├── define_new_models.rst
│   │   └── package_libraries_and_weights.rst
│   ├── conf.py
│   ├── deploy/
│   │   ├── android.rst
│   │   ├── cli.rst
│   │   ├── ide_integration.rst
│   │   ├── ios.rst
│   │   ├── mlc_chat_config.rst
│   │   ├── python_engine.rst
│   │   ├── rest.rst
│   │   └── webllm.rst
│   ├── get_started/
│   │   ├── introduction.rst
│   │   └── quick_start.rst
│   ├── index.rst
│   ├── install/
│   │   ├── conda.rst
│   │   ├── emcc.rst
│   │   ├── gpu.rst
│   │   ├── mlc_llm.rst
│   │   └── tvm.rst
│   ├── make.bat
│   ├── microserving/
│   │   └── tutorial.rst
│   ├── privacy.rst
│   └── requirements.txt
├── examples/
│   ├── python/
│   │   ├── microserving/
│   │   │   └── custom_router.py
│   │   └── sample_mlc_engine.py
│   └── rest/
│       ├── nodejs/
│       │   ├── README.MD
│       │   ├── dotenv.example
│       │   ├── package.json
│       │   ├── sample_client.js
│       │   ├── sample_langchain.ts
│       │   ├── sample_openai.js
│       │   └── tsconfig.json
│       ├── python/
│       │   ├── sample_client.py
│       │   ├── sample_langchain.py
│       │   └── sample_openai.py
│       └── resources/
│           ├── linux.txt
│           └── state_of_the_union.txt
├── ios/
│   ├── .gitignore
│   ├── MLCChat/
│   │   ├── MLCChat/
│   │   │   ├── Assets.xcassets/
│   │   │   │   ├── AccentColor.colorset/
│   │   │   │   │   └── Contents.json
│   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   └── Contents.json
│   │   │   │   └── Contents.json
│   │   │   ├── Common/
│   │   │   │   └── Constants.swift
│   │   │   ├── Info.plist
│   │   │   ├── MLCChat.entitlements
│   │   │   ├── MLCChatApp.swift
│   │   │   ├── Models/
│   │   │   │   ├── AppConfig.swift
│   │   │   │   ├── ModelConfig.swift
│   │   │   │   └── ParamsConfig.swift
│   │   │   ├── Preview Content/
│   │   │   │   └── Preview Assets.xcassets/
│   │   │   │       └── Contents.json
│   │   │   ├── States/
│   │   │   │   ├── AppState.swift
│   │   │   │   ├── ChatState.swift
│   │   │   │   └── ModelState.swift
│   │   │   └── Views/
│   │   │       ├── ChatView.swift
│   │   │       ├── ImageProcessing.swift
│   │   │       ├── MessageView.swift
│   │   │       ├── ModelView.swift
│   │   │       └── StartView.swift
│   │   ├── MLCChat.xcodeproj/
│   │   │   ├── project.pbxproj
│   │   │   ├── project.xcworkspace/
│   │   │   │   ├── contents.xcworkspacedata
│   │   │   │   └── xcshareddata/
│   │   │   │       ├── IDEWorkspaceChecks.plist
│   │   │   │       ├── WorkspaceSettings.xcsettings
│   │   │   │       └── swiftpm/
│   │   │   │           └── Package.resolved
│   │   │   └── xcshareddata/
│   │   │       └── xcschemes/
│   │   │           └── MLCChat.xcscheme
│   │   ├── README.md
│   │   └── mlc-package-config.json
│   ├── MLCEngineExample/
│   │   ├── MLCEngineExample/
│   │   │   ├── Assets.xcassets/
│   │   │   │   ├── AccentColor.colorset/
│   │   │   │   │   └── Contents.json
│   │   │   │   ├── AppIcon.appiconset/
│   │   │   │   │   └── Contents.json
│   │   │   │   └── Contents.json
│   │   │   ├── ContentView.swift
│   │   │   ├── MLCEngineExample.entitlements
│   │   │   ├── MLCEngineExampleApp.swift
│   │   │   └── Preview Content/
│   │   │       └── Preview Assets.xcassets/
│   │   │           └── Contents.json
│   │   ├── MLCEngineExample.xcodeproj/
│   │   │   ├── project.pbxproj
│   │   │   └── project.xcworkspace/
│   │   │       ├── contents.xcworkspacedata
│   │   │       └── xcshareddata/
│   │   │           └── IDEWorkspaceChecks.plist
│   │   ├── README.md
│   │   └── mlc-package-config.json
│   ├── MLCSwift/
│   │   ├── Package.swift
│   │   ├── README.md
│   │   └── Sources/
│   │       ├── ObjC/
│   │       │   ├── LLMEngine.mm
│   │       │   └── include/
│   │       │       └── LLMEngine.h
│   │       └── Swift/
│   │           ├── LLMEngine.swift
│   │           └── OpenAIProtocol.swift
│   ├── README.md
│   └── prepare_libs.sh
├── pyproject.toml
├── python/
│   ├── mlc_llm/
│   │   ├── __init__.py
│   │   ├── __main__.py
│   │   ├── base.py
│   │   ├── bench/
│   │   │   ├── __init__.py
│   │   │   ├── __main__.py
│   │   │   ├── api_endpoint.py
│   │   │   ├── dataset.py
│   │   │   ├── evaluation/
│   │   │   │   ├── gsm8k.py
│   │   │   │   └── mmlu.py
│   │   │   ├── request_processor.py
│   │   │   └── request_record.py
│   │   ├── cli/
│   │   │   ├── __init__.py
│   │   │   ├── calibrate.py
│   │   │   ├── chat.py
│   │   │   ├── check_device.py
│   │   │   ├── compile.py
│   │   │   ├── convert_weight.py
│   │   │   ├── delivery.py
│   │   │   ├── disco_remote_socket_session.py
│   │   │   ├── gen_config.py
│   │   │   ├── lib_delivery.py
│   │   │   ├── model_metadata.py
│   │   │   ├── package.py
│   │   │   ├── router.py
│   │   │   ├── serve.py
│   │   │   └── worker.py
│   │   ├── compiler_pass/
│   │   │   ├── __init__.py
│   │   │   ├── attach_cuda_graph_alloc_init_func.py
│   │   │   ├── attach_embedding_allocator.py
│   │   │   ├── attach_logit_processor.py
│   │   │   ├── attach_sampler.py
│   │   │   ├── attach_softmax_with_temperature.py
│   │   │   ├── attach_spec_decode_aux_funcs.py
│   │   │   ├── attach_support_info.py
│   │   │   ├── blas_dispatch.py
│   │   │   ├── clean_up_tir_attrs.py
│   │   │   ├── dispatch_kv_cache_creation.py
│   │   │   ├── dispatch_triton_kernel.py
│   │   │   ├── estimate_memory_usage.py
│   │   │   ├── fuse_add_norm.py
│   │   │   ├── fuse_dequantize_matmul_ewise.py
│   │   │   ├── fuse_dequantize_take.py
│   │   │   ├── fuse_dequantize_transpose.py
│   │   │   ├── fuse_ft_dequantize_matmul_epilogue.py
│   │   │   ├── fuse_transpose_matmul.py
│   │   │   ├── lift_global_buffer_alloc.py
│   │   │   ├── low_batch_specialization.py
│   │   │   ├── pipeline.py
│   │   │   ├── pipeline_parallel_rewrite.py
│   │   │   └── scatter_tuple_get_item.py
│   │   ├── contrib/
│   │   │   ├── __init__.py
│   │   │   └── embeddings/
│   │   │       ├── __init__.py
│   │   │       ├── embeddings.py
│   │   │       └── openai.py
│   │   ├── conversation_template/
│   │   │   ├── __init__.py
│   │   │   ├── cohere.py
│   │   │   ├── deepseek.py
│   │   │   ├── dolly.py
│   │   │   ├── gemma.py
│   │   │   ├── glm.py
│   │   │   ├── gorilla.py
│   │   │   ├── gpt.py
│   │   │   ├── hermes.py
│   │   │   ├── llama.py
│   │   │   ├── llava.py
│   │   │   ├── llm_jp.py
│   │   │   ├── ministral3.py
│   │   │   ├── ministral3_reasoning.py
│   │   │   ├── mistral.py
│   │   │   ├── nemotron.py
│   │   │   ├── oasst.py
│   │   │   ├── olmo.py
│   │   │   ├── orion.py
│   │   │   ├── phi.py
│   │   │   ├── qwen2.py
│   │   │   ├── redpajama.py
│   │   │   ├── registry.py
│   │   │   ├── rwkv.py
│   │   │   ├── stablelm.py
│   │   │   ├── tinyllama.py
│   │   │   └── wizardlm.py
│   │   ├── interface/
│   │   │   ├── __init__.py
│   │   │   ├── calibrate.py
│   │   │   ├── chat.py
│   │   │   ├── compile.py
│   │   │   ├── compiler_flags.py
│   │   │   ├── convert_weight.py
│   │   │   ├── gen_config.py
│   │   │   ├── help.py
│   │   │   ├── jit.py
│   │   │   ├── package.py
│   │   │   ├── router.py
│   │   │   └── serve.py
│   │   ├── json_ffi/
│   │   │   ├── __init__.py
│   │   │   └── engine.py
│   │   ├── libinfo.py
│   │   ├── loader/
│   │   │   ├── __init__.py
│   │   │   ├── huggingface_loader.py
│   │   │   ├── loader.py
│   │   │   ├── mapping.py
│   │   │   ├── standard_loader.py
│   │   │   ├── stats.py
│   │   │   └── utils.py
│   │   ├── model/
│   │   │   ├── __init__.py
│   │   │   ├── baichuan/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── baichuan_loader.py
│   │   │   │   └── baichuan_model.py
│   │   │   ├── bert/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── bert_loader.py
│   │   │   │   └── bert_model.py
│   │   │   ├── chatglm3/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── chatglm3_loader.py
│   │   │   │   └── chatglm3_model.py
│   │   │   ├── cohere/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── cohere_loader.py
│   │   │   │   └── cohere_model.py
│   │   │   ├── deepseek/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── deepseek_loader.py
│   │   │   │   └── deepseek_model.py
│   │   │   ├── deepseek_v2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── deepseek_v2_loader.py
│   │   │   │   └── deepseek_v2_model.py
│   │   │   ├── eagle/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── eagle_loader.py
│   │   │   │   └── eagle_model.py
│   │   │   ├── gemma/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gemma_loader.py
│   │   │   │   └── gemma_model.py
│   │   │   ├── gemma2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gemma2_loader.py
│   │   │   │   └── gemma2_model.py
│   │   │   ├── gemma3/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gemma3_loader.py
│   │   │   │   └── gemma3_model.py
│   │   │   ├── gpt2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gpt2_loader.py
│   │   │   │   └── gpt2_model.py
│   │   │   ├── gpt_bigcode/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gpt_bigcode_loader.py
│   │   │   │   └── gpt_bigcode_model.py
│   │   │   ├── gpt_j/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gpt_j_loader.py
│   │   │   │   └── gpt_j_model.py
│   │   │   ├── gpt_neox/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── gpt_neox_loader.py
│   │   │   │   └── gpt_neox_model.py
│   │   │   ├── internlm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── internlm_loader.py
│   │   │   │   └── internlm_model.py
│   │   │   ├── internlm2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── internlm2_loader.py
│   │   │   │   └── internlm2_model.py
│   │   │   ├── llama/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── llama_loader.py
│   │   │   │   └── llama_model.py
│   │   │   ├── llama4/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── llama4_loader.py
│   │   │   │   └── llama4_model.py
│   │   │   ├── llava/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── llava_loader.py
│   │   │   │   └── llava_model.py
│   │   │   ├── medusa/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── medusa_loader.py
│   │   │   │   └── medusa_model.py
│   │   │   ├── minicpm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── minicpm_loader.py
│   │   │   │   └── minicpm_model.py
│   │   │   ├── ministral3/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── ministral3_loader.py
│   │   │   │   └── ministral3_model.py
│   │   │   ├── mistral/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── mistral_loader.py
│   │   │   │   └── mistral_model.py
│   │   │   ├── mixtral/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── mixtral_loader.py
│   │   │   │   └── mixtral_model.py
│   │   │   ├── model.py
│   │   │   ├── model_preset.py
│   │   │   ├── nemotron/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── nemotron_loader.py
│   │   │   │   └── nemotron_model.py
│   │   │   ├── olmo/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── olmo_loader.py
│   │   │   │   └── olmo_model.py
│   │   │   ├── orion/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── orion_loader.py
│   │   │   │   └── orion_model.py
│   │   │   ├── phi/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── phi_loader.py
│   │   │   │   └── phi_model.py
│   │   │   ├── phi3/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── phi3_loader.py
│   │   │   │   └── phi3_model.py
│   │   │   ├── phi3v/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── phi3v_image.py
│   │   │   │   ├── phi3v_loader.py
│   │   │   │   └── phi3v_model.py
│   │   │   ├── qwen/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── qwen_loader.py
│   │   │   │   └── qwen_model.py
│   │   │   ├── qwen2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── qwen2_loader.py
│   │   │   │   └── qwen2_model.py
│   │   │   ├── qwen2_5_vl/
│   │   │   │   ├── __init__.py
│   │   │   │   └── qwen2_5_vl_model.py
│   │   │   ├── qwen2_moe/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── qwen2_moe_loader.py
│   │   │   │   └── qwen2_moe_model.py
│   │   │   ├── qwen3/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── qwen3_loader.py
│   │   │   │   └── qwen3_model.py
│   │   │   ├── qwen3_moe/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── qwen3_moe_loader.py
│   │   │   │   └── qwen3_moe_model.py
│   │   │   ├── rwkv5/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── rwkv5_loader.py
│   │   │   │   └── rwkv5_model.py
│   │   │   ├── rwkv6/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── rwkv6_loader.py
│   │   │   │   └── rwkv6_model.py
│   │   │   ├── stable_lm/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── stablelm_loader.py
│   │   │   │   └── stablelm_model.py
│   │   │   ├── starcoder2/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── starcoder2_loader.py
│   │   │   │   └── starcoder2_model.py
│   │   │   └── vision/
│   │   │       ├── __init__.py
│   │   │       ├── clip_vision.py
│   │   │       └── image_processing.py
│   │   ├── nn/
│   │   │   ├── __init__.py
│   │   │   ├── expert.py
│   │   │   ├── kv_cache.py
│   │   │   └── rnn_state.py
│   │   ├── op/
│   │   │   ├── __init__.py
│   │   │   ├── attention.py
│   │   │   ├── batch_matmul.py
│   │   │   ├── batch_spec_verify.py
│   │   │   ├── cutlass.py
│   │   │   ├── extern.py
│   │   │   ├── ft_gemm.py
│   │   │   ├── moe_matmul.py
│   │   │   ├── moe_misc.py
│   │   │   ├── mrope.py
│   │   │   ├── pipeline_parallel.py
│   │   │   ├── top_p_pivot.py
│   │   │   └── triton.py
│   │   ├── protocol/
│   │   │   ├── __init__.py
│   │   │   ├── conversation_protocol.py
│   │   │   ├── debug_protocol.py
│   │   │   ├── error_protocol.py
│   │   │   ├── generation_config.py
│   │   │   ├── microserving_protocol.py
│   │   │   ├── mlc_chat_config.py
│   │   │   └── openai_api_protocol.py
│   │   ├── quantization/
│   │   │   ├── __init__.py
│   │   │   ├── awq_quantization.py
│   │   │   ├── block_scale_quantization.py
│   │   │   ├── fp8_quantization.py
│   │   │   ├── ft_quantization.py
│   │   │   ├── group_quantization.py
│   │   │   ├── model_quantization.py
│   │   │   ├── no_quantization.py
│   │   │   ├── per_tensor_quantization.py
│   │   │   ├── quantization.py
│   │   │   └── utils.py
│   │   ├── router/
│   │   │   ├── __init__.py
│   │   │   └── router.py
│   │   ├── serve/
│   │   │   ├── __init__.py
│   │   │   ├── _ffi_api.py
│   │   │   ├── config.py
│   │   │   ├── data.py
│   │   │   ├── embedding_engine.py
│   │   │   ├── engine.py
│   │   │   ├── engine_base.py
│   │   │   ├── engine_utils.py
│   │   │   ├── entrypoints/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── debug_entrypoints.py
│   │   │   │   ├── metrics_entrypoints.py
│   │   │   │   ├── microserving_entrypoints.py
│   │   │   │   └── openai_entrypoints.py
│   │   │   ├── event_trace_recorder.py
│   │   │   ├── radix_tree.py
│   │   │   ├── request.py
│   │   │   ├── server/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── popen_server.py
│   │   │   │   └── server_context.py
│   │   │   └── sync_engine.py
│   │   ├── support/
│   │   │   ├── __init__.py
│   │   │   ├── argparse.py
│   │   │   ├── auto_config.py
│   │   │   ├── auto_device.py
│   │   │   ├── auto_target.py
│   │   │   ├── auto_weight.py
│   │   │   ├── config.py
│   │   │   ├── constants.py
│   │   │   ├── convert_tiktoken.py
│   │   │   ├── download_cache.py
│   │   │   ├── logging.py
│   │   │   ├── max_thread_check.py
│   │   │   ├── preshard.py
│   │   │   ├── random.py
│   │   │   ├── style.py
│   │   │   ├── tensor_parallel.py
│   │   │   └── tqdm.py
│   │   ├── testing/
│   │   │   ├── __init__.py
│   │   │   ├── debug_chat.py
│   │   │   ├── debug_compare.py
│   │   │   └── pytest_utils.py
│   │   └── tokenizers/
│   │       ├── __init__.py
│   │       ├── _ffi_api.py
│   │       ├── streamer.py
│   │       └── tokenizers.py
│   ├── requirements.txt
│   └── setup.py
├── scripts/
│   ├── build_mlc_for_docs.sh
│   ├── build_site.sh
│   ├── check_url_validity.py
│   ├── gh_deploy_site.sh
│   └── local_deploy_site.sh
├── site/
│   ├── .gitignore
│   ├── CNAME
│   ├── Gemfile
│   ├── _config.yml
│   ├── _includes/
│   │   ├── head.html
│   │   └── hero.html
│   ├── assets/
│   │   └── css/
│   │       └── hero.scss
│   ├── index.md
│   └── privacy.md
├── tests/
│   ├── README.md
│   ├── cpp/
│   │   └── conv_template_unittest.cc
│   └── python/
│       ├── __init__.py
│       ├── compiler_pass/
│       │   └── test_fuse_ft_dequantize_matmul_epilogue.py
│       ├── conftest.py
│       ├── conversation_template/
│       │   ├── test_conversation_protocol.py
│       │   └── test_llama_template.py
│       ├── integration/
│       │   └── test_model_compile.py
│       ├── json_ffi/
│       │   ├── test_json_ffi_engine.py
│       │   ├── test_json_ffi_engine_image.py
│       │   └── test_json_ffi_engine_mock.py
│       ├── loader/
│       │   ├── test_awq.py
│       │   └── test_huggingface.py
│       ├── model/
│       │   ├── test_gemma3.py
│       │   ├── test_gpt2.py
│       │   ├── test_gptNeox.py
│       │   ├── test_kv_cache.py
│       │   ├── test_llama.py
│       │   ├── test_llama_quantization.py
│       │   ├── test_mistral.py
│       │   ├── test_phi.py
│       │   └── test_qwen3_embedding.py
│       ├── op/
│       │   ├── test_batch_spec_verify.py
│       │   ├── test_fp8_block_matmul.py
│       │   ├── test_mrope.py
│       │   ├── test_top_p_pivot.py
│       │   ├── test_tree_attn.py
│       │   └── test_two_stage_softmax.py
│       ├── quantization/
│       │   ├── test_awq_quantization.py
│       │   └── test_group_quantization.py
│       ├── router/
│       │   └── test_router.py
│       ├── serve/
│       │   ├── evaluate_engine.py
│       │   ├── server/
│       │   │   ├── conftest.py
│       │   │   ├── test_embedding_server.py
│       │   │   ├── test_server.py
│       │   │   ├── test_server_function_call.py
│       │   │   └── test_server_image.py
│       │   ├── test_embedding_engine.py
│       │   ├── test_event_trace_recorder.py
│       │   ├── test_radix_tree.py
│       │   ├── test_serve_async_engine.py
│       │   ├── test_serve_async_engine_spec.py
│       │   ├── test_serve_engine.py
│       │   ├── test_serve_engine_grammar.py
│       │   ├── test_serve_engine_image.py
│       │   ├── test_serve_engine_mock.py
│       │   ├── test_serve_engine_prefix_cache.py
│       │   ├── test_serve_engine_rnn.py
│       │   ├── test_serve_engine_spec.py
│       │   └── test_serve_sync_engine.py
│       ├── support/
│       │   ├── test_auto_config.py
│       │   ├── test_auto_weight.py
│       │   ├── test_cli_convert_weight.py
│       │   └── test_convert_weight_lora_merge.py
│       └── tokenizers/
│           └── test_streamer.py
├── version.py
└── web/
    ├── Makefile
    ├── README.md
    ├── emcc/
    │   └── mlc_wasm_runtime.cc
    └── prep_emcc_deps.sh