Repository: mlc-ai/mlc-llm Branch: main Commit: 20d7fb309664 Files: 661 Total size: 4.0 MB Directory structure: gitextract_s4bq7ahm/ ├── .clang-format ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug-report.md │ │ ├── config.yml │ │ ├── documentation.md │ │ ├── feature-request.md │ │ ├── general.md │ │ ├── model-request.md │ │ ├── speed-report.md │ │ └── tracking.md │ └── workflows/ │ ├── documentation.yaml │ ├── update-relax.yaml │ └── windows-build.yaml ├── .gitignore ├── .gitmodules ├── .pre-commit-config.yaml ├── .pylintrc ├── CMakeLists.txt ├── CONTRIBUTORS.md ├── LICENSE ├── NOTICE ├── README.md ├── android/ │ ├── .gitignore │ ├── MLCChat/ │ │ ├── README.md │ │ ├── app/ │ │ │ ├── .gitignore │ │ │ ├── build.gradle │ │ │ ├── proguard-rules.pro │ │ │ └── src/ │ │ │ └── main/ │ │ │ ├── AndroidManifest.xml │ │ │ ├── java/ │ │ │ │ └── ai/ │ │ │ │ └── mlc/ │ │ │ │ └── mlcchat/ │ │ │ │ ├── AppViewModel.kt │ │ │ │ ├── ChatView.kt │ │ │ │ ├── MainActivity.kt │ │ │ │ ├── NavView.kt │ │ │ │ ├── StartView.kt │ │ │ │ └── ui/ │ │ │ │ └── theme/ │ │ │ │ ├── Color.kt │ │ │ │ ├── Theme.kt │ │ │ │ └── Type.kt │ │ │ └── res/ │ │ │ ├── drawable/ │ │ │ │ ├── ic_android_black_24dp.xml │ │ │ │ └── mlc_logo_108.xml │ │ │ ├── values/ │ │ │ │ ├── colors.xml │ │ │ │ ├── strings.xml │ │ │ │ └── themes.xml │ │ │ └── xml/ │ │ │ ├── backup_rules.xml │ │ │ └── data_extraction_rules.xml │ │ ├── build.gradle │ │ ├── bundle_weight.py │ │ ├── gradle/ │ │ │ └── wrapper/ │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ │ ├── gradle.properties │ │ ├── gradlew │ │ ├── gradlew.bat │ │ ├── mlc-package-config.json │ │ └── settings.gradle │ ├── MLCEngineExample/ │ │ ├── README.md │ │ ├── app/ │ │ │ ├── .gitignore │ │ │ ├── build.gradle │ │ │ ├── proguard-rules.pro │ │ │ └── src/ │ │ │ └── main/ │ │ │ ├── AndroidManifest.xml │ │ │ ├── java/ │ │ │ │ └── ai/ │ │ │ │ └── mlc/ │ │ │ │ └── mlcengineexample/ │ │ │ │ ├── MainActivity.kt │ │ │ │ └── ui/ │ │ │ │ └── theme/ │ │ │ │ ├── Color.kt │ │ │ │ ├── Theme.kt │ │ │ │ └── Type.kt │ │ │ └── res/ │ │ │ ├── drawable/ │ │ │ │ ├── ic_android_black_24dp.xml │ │ │ │ └── mlc_logo_108.xml │ │ │ ├── values/ │ │ │ │ ├── colors.xml │ │ │ │ ├── strings.xml │ │ │ │ └── themes.xml │ │ │ └── xml/ │ │ │ ├── backup_rules.xml │ │ │ └── data_extraction_rules.xml │ │ ├── build.gradle │ │ ├── bundle_weight.py │ │ ├── gradle/ │ │ │ └── wrapper/ │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ │ ├── gradle.properties │ │ ├── gradlew │ │ ├── gradlew.bat │ │ ├── mlc-package-config.json │ │ └── settings.gradle │ ├── README.md │ └── mlc4j/ │ ├── .gitignore │ ├── CMakeLists.txt │ ├── build.gradle │ ├── prepare_libs.py │ └── src/ │ ├── cpp/ │ │ └── tvm_runtime.h │ └── main/ │ ├── AndroidManifest.xml │ └── java/ │ └── ai/ │ └── mlc/ │ └── mlcllm/ │ ├── JSONFFIEngine.java │ ├── MLCEngine.kt │ └── OpenAIProtocol.kt ├── ci/ │ ├── bash.sh │ ├── build-environment.yaml │ ├── jenkinsfile.groovy │ └── task/ │ ├── black.sh │ ├── build_clean.sh │ ├── build_lib.sh │ ├── build_win.bat │ ├── clang-format.sh │ ├── isort.sh │ ├── mypy.sh │ ├── pylint.sh │ ├── test_model_compile.sh │ └── test_unittest.sh ├── cmake/ │ └── gen_cmake_config.py ├── cpp/ │ ├── base.h │ ├── json_ffi/ │ │ ├── conv_template.cc │ │ ├── conv_template.h │ │ ├── image_utils.cc │ │ ├── image_utils.h │ │ ├── json_ffi_engine.cc │ │ ├── json_ffi_engine.h │ │ ├── openai_api_protocol.cc │ │ └── openai_api_protocol.h │ ├── metadata/ │ │ ├── model.cc │ │ └── model.h │ ├── multi_gpu/ │ │ ├── builtin.cc │ │ └── multi_gpu_loader.cc │ ├── serve/ │ │ ├── config.cc │ │ ├── config.h │ │ ├── data.cc │ │ ├── data.h │ │ ├── draft_token_workspace_manager.cc │ │ ├── draft_token_workspace_manager.h │ │ ├── engine.cc │ │ ├── engine.h │ │ ├── engine_actions/ │ │ │ ├── action.cc │ │ │ ├── action.h │ │ │ ├── action_commons.cc │ │ │ ├── action_commons.h │ │ │ ├── auto_spec_decode.cc │ │ │ ├── batch_decode.cc │ │ │ ├── batch_draft.cc │ │ │ ├── batch_jumpforward.cc │ │ │ ├── batch_prefill_base.cc │ │ │ ├── batch_prefill_base.h │ │ │ ├── batch_verify.cc │ │ │ ├── disagg_prepare_recv.cc │ │ │ ├── disagg_remote_send.cc │ │ │ ├── eagle_batch_draft.cc │ │ │ ├── eagle_batch_verify.cc │ │ │ ├── eagle_new_request_prefill.cc │ │ │ └── new_request_prefill.cc │ │ ├── engine_state.cc │ │ ├── engine_state.h │ │ ├── event_trace_recorder.cc │ │ ├── event_trace_recorder.h │ │ ├── function_table.cc │ │ ├── function_table.h │ │ ├── logit_processor.cc │ │ ├── logit_processor.h │ │ ├── metrics.cc │ │ ├── metrics.h │ │ ├── model.cc │ │ ├── model.h │ │ ├── prefix_cache.cc │ │ ├── prefix_cache.h │ │ ├── radix_tree.cc │ │ ├── radix_tree.h │ │ ├── request.cc │ │ ├── request.h │ │ ├── request_state.cc │ │ ├── request_state.h │ │ ├── sampler/ │ │ │ ├── cpu_sampler.cc │ │ │ ├── gpu_sampler.cc │ │ │ └── sampler.h │ │ ├── threaded_engine.cc │ │ └── threaded_engine.h │ ├── support/ │ │ ├── debug_utils.h │ │ ├── dynamic_bitset.h │ │ ├── encoding.cc │ │ ├── encoding.h │ │ ├── json_parser.h │ │ ├── load_bytes_from_file.h │ │ ├── progress_bar.h │ │ ├── random.h │ │ ├── result.h │ │ ├── utils.h │ │ ├── vlm_utils.cc │ │ └── vlm_utils.h │ └── tokenizers/ │ ├── streamer.cc │ ├── streamer.h │ ├── tokenizers.cc │ └── tokenizers.h ├── docs/ │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── community/ │ │ ├── faq.rst │ │ └── guideline.rst │ ├── compilation/ │ │ ├── compile_models.rst │ │ ├── configure_quantization.rst │ │ ├── convert_weights.rst │ │ ├── define_new_models.rst │ │ └── package_libraries_and_weights.rst │ ├── conf.py │ ├── deploy/ │ │ ├── android.rst │ │ ├── cli.rst │ │ ├── ide_integration.rst │ │ ├── ios.rst │ │ ├── mlc_chat_config.rst │ │ ├── python_engine.rst │ │ ├── rest.rst │ │ └── webllm.rst │ ├── get_started/ │ │ ├── introduction.rst │ │ └── quick_start.rst │ ├── index.rst │ ├── install/ │ │ ├── conda.rst │ │ ├── emcc.rst │ │ ├── gpu.rst │ │ ├── mlc_llm.rst │ │ └── tvm.rst │ ├── make.bat │ ├── microserving/ │ │ └── tutorial.rst │ ├── privacy.rst │ └── requirements.txt ├── examples/ │ ├── python/ │ │ ├── microserving/ │ │ │ └── custom_router.py │ │ └── sample_mlc_engine.py │ └── rest/ │ ├── nodejs/ │ │ ├── README.MD │ │ ├── dotenv.example │ │ ├── package.json │ │ ├── sample_client.js │ │ ├── sample_langchain.ts │ │ ├── sample_openai.js │ │ └── tsconfig.json │ ├── python/ │ │ ├── sample_client.py │ │ ├── sample_langchain.py │ │ └── sample_openai.py │ └── resources/ │ ├── linux.txt │ └── state_of_the_union.txt ├── ios/ │ ├── .gitignore │ ├── MLCChat/ │ │ ├── MLCChat/ │ │ │ ├── Assets.xcassets/ │ │ │ │ ├── AccentColor.colorset/ │ │ │ │ │ └── Contents.json │ │ │ │ ├── AppIcon.appiconset/ │ │ │ │ │ └── Contents.json │ │ │ │ └── Contents.json │ │ │ ├── Common/ │ │ │ │ └── Constants.swift │ │ │ ├── Info.plist │ │ │ ├── MLCChat.entitlements │ │ │ ├── MLCChatApp.swift │ │ │ ├── Models/ │ │ │ │ ├── AppConfig.swift │ │ │ │ ├── ModelConfig.swift │ │ │ │ └── ParamsConfig.swift │ │ │ ├── Preview Content/ │ │ │ │ └── Preview Assets.xcassets/ │ │ │ │ └── Contents.json │ │ │ ├── States/ │ │ │ │ ├── AppState.swift │ │ │ │ ├── ChatState.swift │ │ │ │ └── ModelState.swift │ │ │ └── Views/ │ │ │ ├── ChatView.swift │ │ │ ├── ImageProcessing.swift │ │ │ ├── MessageView.swift │ │ │ ├── ModelView.swift │ │ │ └── StartView.swift │ │ ├── MLCChat.xcodeproj/ │ │ │ ├── project.pbxproj │ │ │ ├── project.xcworkspace/ │ │ │ │ ├── contents.xcworkspacedata │ │ │ │ └── xcshareddata/ │ │ │ │ ├── IDEWorkspaceChecks.plist │ │ │ │ ├── WorkspaceSettings.xcsettings │ │ │ │ └── swiftpm/ │ │ │ │ └── Package.resolved │ │ │ └── xcshareddata/ │ │ │ └── xcschemes/ │ │ │ └── MLCChat.xcscheme │ │ ├── README.md │ │ └── mlc-package-config.json │ ├── MLCEngineExample/ │ │ ├── MLCEngineExample/ │ │ │ ├── Assets.xcassets/ │ │ │ │ ├── AccentColor.colorset/ │ │ │ │ │ └── Contents.json │ │ │ │ ├── AppIcon.appiconset/ │ │ │ │ │ └── Contents.json │ │ │ │ └── Contents.json │ │ │ ├── ContentView.swift │ │ │ ├── MLCEngineExample.entitlements │ │ │ ├── MLCEngineExampleApp.swift │ │ │ └── Preview Content/ │ │ │ └── Preview Assets.xcassets/ │ │ │ └── Contents.json │ │ ├── MLCEngineExample.xcodeproj/ │ │ │ ├── project.pbxproj │ │ │ └── project.xcworkspace/ │ │ │ ├── contents.xcworkspacedata │ │ │ └── xcshareddata/ │ │ │ └── IDEWorkspaceChecks.plist │ │ ├── README.md │ │ └── mlc-package-config.json │ ├── MLCSwift/ │ │ ├── Package.swift │ │ ├── README.md │ │ └── Sources/ │ │ ├── ObjC/ │ │ │ ├── LLMEngine.mm │ │ │ └── include/ │ │ │ └── LLMEngine.h │ │ └── Swift/ │ │ ├── LLMEngine.swift │ │ └── OpenAIProtocol.swift │ ├── README.md │ └── prepare_libs.sh ├── pyproject.toml ├── python/ │ ├── mlc_llm/ │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── base.py │ │ ├── bench/ │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ ├── api_endpoint.py │ │ │ ├── dataset.py │ │ │ ├── evaluation/ │ │ │ │ ├── gsm8k.py │ │ │ │ └── mmlu.py │ │ │ ├── request_processor.py │ │ │ └── request_record.py │ │ ├── cli/ │ │ │ ├── __init__.py │ │ │ ├── calibrate.py │ │ │ ├── chat.py │ │ │ ├── check_device.py │ │ │ ├── compile.py │ │ │ ├── convert_weight.py │ │ │ ├── delivery.py │ │ │ ├── disco_remote_socket_session.py │ │ │ ├── gen_config.py │ │ │ ├── lib_delivery.py │ │ │ ├── model_metadata.py │ │ │ ├── package.py │ │ │ ├── router.py │ │ │ ├── serve.py │ │ │ └── worker.py │ │ ├── compiler_pass/ │ │ │ ├── __init__.py │ │ │ ├── attach_cuda_graph_alloc_init_func.py │ │ │ ├── attach_embedding_allocator.py │ │ │ ├── attach_logit_processor.py │ │ │ ├── attach_sampler.py │ │ │ ├── attach_softmax_with_temperature.py │ │ │ ├── attach_spec_decode_aux_funcs.py │ │ │ ├── attach_support_info.py │ │ │ ├── blas_dispatch.py │ │ │ ├── clean_up_tir_attrs.py │ │ │ ├── dispatch_kv_cache_creation.py │ │ │ ├── dispatch_triton_kernel.py │ │ │ ├── estimate_memory_usage.py │ │ │ ├── fuse_add_norm.py │ │ │ ├── fuse_dequantize_matmul_ewise.py │ │ │ ├── fuse_dequantize_take.py │ │ │ ├── fuse_dequantize_transpose.py │ │ │ ├── fuse_ft_dequantize_matmul_epilogue.py │ │ │ ├── fuse_transpose_matmul.py │ │ │ ├── lift_global_buffer_alloc.py │ │ │ ├── low_batch_specialization.py │ │ │ ├── pipeline.py │ │ │ ├── pipeline_parallel_rewrite.py │ │ │ └── scatter_tuple_get_item.py │ │ ├── contrib/ │ │ │ ├── __init__.py │ │ │ └── embeddings/ │ │ │ ├── __init__.py │ │ │ ├── embeddings.py │ │ │ └── openai.py │ │ ├── conversation_template/ │ │ │ ├── __init__.py │ │ │ ├── cohere.py │ │ │ ├── deepseek.py │ │ │ ├── dolly.py │ │ │ ├── gemma.py │ │ │ ├── glm.py │ │ │ ├── gorilla.py │ │ │ ├── gpt.py │ │ │ ├── hermes.py │ │ │ ├── llama.py │ │ │ ├── llava.py │ │ │ ├── llm_jp.py │ │ │ ├── ministral3.py │ │ │ ├── ministral3_reasoning.py │ │ │ ├── mistral.py │ │ │ ├── nemotron.py │ │ │ ├── oasst.py │ │ │ ├── olmo.py │ │ │ ├── orion.py │ │ │ ├── phi.py │ │ │ ├── qwen2.py │ │ │ ├── redpajama.py │ │ │ ├── registry.py │ │ │ ├── rwkv.py │ │ │ ├── stablelm.py │ │ │ ├── tinyllama.py │ │ │ └── wizardlm.py │ │ ├── interface/ │ │ │ ├── __init__.py │ │ │ ├── calibrate.py │ │ │ ├── chat.py │ │ │ ├── compile.py │ │ │ ├── compiler_flags.py │ │ │ ├── convert_weight.py │ │ │ ├── gen_config.py │ │ │ ├── help.py │ │ │ ├── jit.py │ │ │ ├── package.py │ │ │ ├── router.py │ │ │ └── serve.py │ │ ├── json_ffi/ │ │ │ ├── __init__.py │ │ │ └── engine.py │ │ ├── libinfo.py │ │ ├── loader/ │ │ │ ├── __init__.py │ │ │ ├── huggingface_loader.py │ │ │ ├── loader.py │ │ │ ├── mapping.py │ │ │ ├── standard_loader.py │ │ │ ├── stats.py │ │ │ └── utils.py │ │ ├── model/ │ │ │ ├── __init__.py │ │ │ ├── baichuan/ │ │ │ │ ├── __init__.py │ │ │ │ ├── baichuan_loader.py │ │ │ │ └── baichuan_model.py │ │ │ ├── bert/ │ │ │ │ ├── __init__.py │ │ │ │ ├── bert_loader.py │ │ │ │ └── bert_model.py │ │ │ ├── chatglm3/ │ │ │ │ ├── __init__.py │ │ │ │ ├── chatglm3_loader.py │ │ │ │ └── chatglm3_model.py │ │ │ ├── cohere/ │ │ │ │ ├── __init__.py │ │ │ │ ├── cohere_loader.py │ │ │ │ └── cohere_model.py │ │ │ ├── deepseek/ │ │ │ │ ├── __init__.py │ │ │ │ ├── deepseek_loader.py │ │ │ │ └── deepseek_model.py │ │ │ ├── deepseek_v2/ │ │ │ │ ├── __init__.py │ │ │ │ ├── deepseek_v2_loader.py │ │ │ │ └── deepseek_v2_model.py │ │ │ ├── eagle/ │ │ │ │ ├── __init__.py │ │ │ │ ├── eagle_loader.py │ │ │ │ └── eagle_model.py │ │ │ ├── gemma/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gemma_loader.py │ │ │ │ └── gemma_model.py │ │ │ ├── gemma2/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gemma2_loader.py │ │ │ │ └── gemma2_model.py │ │ │ ├── gemma3/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gemma3_loader.py │ │ │ │ └── gemma3_model.py │ │ │ ├── gpt2/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gpt2_loader.py │ │ │ │ └── gpt2_model.py │ │ │ ├── gpt_bigcode/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gpt_bigcode_loader.py │ │ │ │ └── gpt_bigcode_model.py │ │ │ ├── gpt_j/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gpt_j_loader.py │ │ │ │ └── gpt_j_model.py │ │ │ ├── gpt_neox/ │ │ │ │ ├── __init__.py │ │ │ │ ├── gpt_neox_loader.py │ │ │ │ └── gpt_neox_model.py │ │ │ ├── internlm/ │ │ │ │ ├── __init__.py │ │ │ │ ├── internlm_loader.py │ │ │ │ └── internlm_model.py │ │ │ ├── internlm2/ │ │ │ │ ├── __init__.py │ │ │ │ ├── internlm2_loader.py │ │ │ │ └── internlm2_model.py │ │ │ ├── llama/ │ │ │ │ ├── __init__.py │ │ │ │ ├── llama_loader.py │ │ │ │ └── llama_model.py │ │ │ ├── llama4/ │ │ │ │ ├── __init__.py │ │ │ │ ├── llama4_loader.py │ │ │ │ └── llama4_model.py │ │ │ ├── llava/ │ │ │ │ ├── __init__.py │ │ │ │ ├── llava_loader.py │ │ │ │ └── llava_model.py │ │ │ ├── medusa/ │ │ │ │ ├── __init__.py │ │ │ │ ├── medusa_loader.py │ │ │ │ └── medusa_model.py │ │ │ ├── minicpm/ │ │ │ │ ├── __init__.py │ │ │ │ ├── minicpm_loader.py │ │ │ │ └── minicpm_model.py │ │ │ ├── ministral3/ │ │ │ │ ├── __init__.py │ │ │ │ ├── ministral3_loader.py │ │ │ │ └── ministral3_model.py │ │ │ ├── mistral/ │ │ │ │ ├── __init__.py │ │ │ │ ├── mistral_loader.py │ │ │ │ └── mistral_model.py │ │ │ ├── mixtral/ │ │ │ │ ├── __init__.py │ │ │ │ ├── mixtral_loader.py │ │ │ │ └── mixtral_model.py │ │ │ ├── model.py │ │ │ ├── model_preset.py │ │ │ ├── nemotron/ │ │ │ │ ├── __init__.py │ │ │ │ ├── nemotron_loader.py │ │ │ │ └── nemotron_model.py │ │ │ ├── olmo/ │ │ │ │ ├── __init__.py │ │ │ │ ├── olmo_loader.py │ │ │ │ └── olmo_model.py │ │ │ ├── orion/ │ │ │ │ ├── __init__.py │ │ │ │ ├── orion_loader.py │ │ │ │ └── orion_model.py │ │ │ ├── phi/ │ │ │ │ ├── __init__.py │ │ │ │ ├── phi_loader.py │ │ │ │ └── phi_model.py │ │ │ ├── phi3/ │ │ │ │ ├── __init__.py │ │ │ │ ├── phi3_loader.py │ │ │ │ └── phi3_model.py │ │ │ ├── phi3v/ │ │ │ │ ├── __init__.py │ │ │ │ ├── phi3v_image.py │ │ │ │ ├── phi3v_loader.py │ │ │ │ └── phi3v_model.py │ │ │ ├── qwen/ │ │ │ │ ├── __init__.py │ │ │ │ ├── qwen_loader.py │ │ │ │ └── qwen_model.py │ │ │ ├── qwen2/ │ │ │ │ ├── __init__.py │ │ │ │ ├── qwen2_loader.py │ │ │ │ └── qwen2_model.py │ │ │ ├── qwen2_5_vl/ │ │ │ │ ├── __init__.py │ │ │ │ └── qwen2_5_vl_model.py │ │ │ ├── qwen2_moe/ │ │ │ │ ├── __init__.py │ │ │ │ ├── qwen2_moe_loader.py │ │ │ │ └── qwen2_moe_model.py │ │ │ ├── qwen3/ │ │ │ │ ├── __init__.py │ │ │ │ ├── qwen3_loader.py │ │ │ │ └── qwen3_model.py │ │ │ ├── qwen3_moe/ │ │ │ │ ├── __init__.py │ │ │ │ ├── qwen3_moe_loader.py │ │ │ │ └── qwen3_moe_model.py │ │ │ ├── rwkv5/ │ │ │ │ ├── __init__.py │ │ │ │ ├── rwkv5_loader.py │ │ │ │ └── rwkv5_model.py │ │ │ ├── rwkv6/ │ │ │ │ ├── __init__.py │ │ │ │ ├── rwkv6_loader.py │ │ │ │ └── rwkv6_model.py │ │ │ ├── stable_lm/ │ │ │ │ ├── __init__.py │ │ │ │ ├── stablelm_loader.py │ │ │ │ └── stablelm_model.py │ │ │ ├── starcoder2/ │ │ │ │ ├── __init__.py │ │ │ │ ├── starcoder2_loader.py │ │ │ │ └── starcoder2_model.py │ │ │ └── vision/ │ │ │ ├── __init__.py │ │ │ ├── clip_vision.py │ │ │ └── image_processing.py │ │ ├── nn/ │ │ │ ├── __init__.py │ │ │ ├── expert.py │ │ │ ├── kv_cache.py │ │ │ └── rnn_state.py │ │ ├── op/ │ │ │ ├── __init__.py │ │ │ ├── attention.py │ │ │ ├── batch_matmul.py │ │ │ ├── batch_spec_verify.py │ │ │ ├── cutlass.py │ │ │ ├── extern.py │ │ │ ├── ft_gemm.py │ │ │ ├── moe_matmul.py │ │ │ ├── moe_misc.py │ │ │ ├── mrope.py │ │ │ ├── pipeline_parallel.py │ │ │ ├── top_p_pivot.py │ │ │ └── triton.py │ │ ├── protocol/ │ │ │ ├── __init__.py │ │ │ ├── conversation_protocol.py │ │ │ ├── debug_protocol.py │ │ │ ├── error_protocol.py │ │ │ ├── generation_config.py │ │ │ ├── microserving_protocol.py │ │ │ ├── mlc_chat_config.py │ │ │ └── openai_api_protocol.py │ │ ├── quantization/ │ │ │ ├── __init__.py │ │ │ ├── awq_quantization.py │ │ │ ├── block_scale_quantization.py │ │ │ ├── fp8_quantization.py │ │ │ ├── ft_quantization.py │ │ │ ├── group_quantization.py │ │ │ ├── model_quantization.py │ │ │ ├── no_quantization.py │ │ │ ├── per_tensor_quantization.py │ │ │ ├── quantization.py │ │ │ └── utils.py │ │ ├── router/ │ │ │ ├── __init__.py │ │ │ └── router.py │ │ ├── serve/ │ │ │ ├── __init__.py │ │ │ ├── _ffi_api.py │ │ │ ├── config.py │ │ │ ├── data.py │ │ │ ├── embedding_engine.py │ │ │ ├── engine.py │ │ │ ├── engine_base.py │ │ │ ├── engine_utils.py │ │ │ ├── entrypoints/ │ │ │ │ ├── __init__.py │ │ │ │ ├── debug_entrypoints.py │ │ │ │ ├── metrics_entrypoints.py │ │ │ │ ├── microserving_entrypoints.py │ │ │ │ └── openai_entrypoints.py │ │ │ ├── event_trace_recorder.py │ │ │ ├── radix_tree.py │ │ │ ├── request.py │ │ │ ├── server/ │ │ │ │ ├── __init__.py │ │ │ │ ├── popen_server.py │ │ │ │ └── server_context.py │ │ │ └── sync_engine.py │ │ ├── support/ │ │ │ ├── __init__.py │ │ │ ├── argparse.py │ │ │ ├── auto_config.py │ │ │ ├── auto_device.py │ │ │ ├── auto_target.py │ │ │ ├── auto_weight.py │ │ │ ├── config.py │ │ │ ├── constants.py │ │ │ ├── convert_tiktoken.py │ │ │ ├── download_cache.py │ │ │ ├── logging.py │ │ │ ├── max_thread_check.py │ │ │ ├── preshard.py │ │ │ ├── random.py │ │ │ ├── style.py │ │ │ ├── tensor_parallel.py │ │ │ └── tqdm.py │ │ ├── testing/ │ │ │ ├── __init__.py │ │ │ ├── debug_chat.py │ │ │ ├── debug_compare.py │ │ │ └── pytest_utils.py │ │ └── tokenizers/ │ │ ├── __init__.py │ │ ├── _ffi_api.py │ │ ├── streamer.py │ │ └── tokenizers.py │ ├── requirements.txt │ └── setup.py ├── scripts/ │ ├── build_mlc_for_docs.sh │ ├── build_site.sh │ ├── check_url_validity.py │ ├── gh_deploy_site.sh │ └── local_deploy_site.sh ├── site/ │ ├── .gitignore │ ├── CNAME │ ├── Gemfile │ ├── _config.yml │ ├── _includes/ │ │ ├── head.html │ │ └── hero.html │ ├── assets/ │ │ └── css/ │ │ └── hero.scss │ ├── index.md │ └── privacy.md ├── tests/ │ ├── README.md │ ├── cpp/ │ │ └── conv_template_unittest.cc │ └── python/ │ ├── __init__.py │ ├── compiler_pass/ │ │ └── test_fuse_ft_dequantize_matmul_epilogue.py │ ├── conftest.py │ ├── conversation_template/ │ │ ├── test_conversation_protocol.py │ │ └── test_llama_template.py │ ├── integration/ │ │ └── test_model_compile.py │ ├── json_ffi/ │ │ ├── test_json_ffi_engine.py │ │ ├── test_json_ffi_engine_image.py │ │ └── test_json_ffi_engine_mock.py │ ├── loader/ │ │ ├── test_awq.py │ │ └── test_huggingface.py │ ├── model/ │ │ ├── test_gemma3.py │ │ ├── test_gpt2.py │ │ ├── test_gptNeox.py │ │ ├── test_kv_cache.py │ │ ├── test_llama.py │ │ ├── test_llama_quantization.py │ │ ├── test_mistral.py │ │ ├── test_phi.py │ │ └── test_qwen3_embedding.py │ ├── op/ │ │ ├── test_batch_spec_verify.py │ │ ├── test_fp8_block_matmul.py │ │ ├── test_mrope.py │ │ ├── test_top_p_pivot.py │ │ ├── test_tree_attn.py │ │ └── test_two_stage_softmax.py │ ├── quantization/ │ │ ├── test_awq_quantization.py │ │ └── test_group_quantization.py │ ├── router/ │ │ └── test_router.py │ ├── serve/ │ │ ├── evaluate_engine.py │ │ ├── server/ │ │ │ ├── conftest.py │ │ │ ├── test_embedding_server.py │ │ │ ├── test_server.py │ │ │ ├── test_server_function_call.py │ │ │ └── test_server_image.py │ │ ├── test_embedding_engine.py │ │ ├── test_event_trace_recorder.py │ │ ├── test_radix_tree.py │ │ ├── test_serve_async_engine.py │ │ ├── test_serve_async_engine_spec.py │ │ ├── test_serve_engine.py │ │ ├── test_serve_engine_grammar.py │ │ ├── test_serve_engine_image.py │ │ ├── test_serve_engine_mock.py │ │ ├── test_serve_engine_prefix_cache.py │ │ ├── test_serve_engine_rnn.py │ │ ├── test_serve_engine_spec.py │ │ └── test_serve_sync_engine.py │ ├── support/ │ │ ├── test_auto_config.py │ │ ├── test_auto_weight.py │ │ ├── test_cli_convert_weight.py │ │ └── test_convert_weight_lora_merge.py │ └── tokenizers/ │ └── test_streamer.py ├── version.py └── web/ ├── Makefile ├── README.md ├── emcc/ │ └── mlc_wasm_runtime.cc └── prep_emcc_deps.sh ================================================ FILE CONTENTS ================================================ ================================================ FILE: .clang-format ================================================ # Run the following command to reformat a file: # clang-format -i -style=Google # Or use clang-format-diff to only reformat the changed lines: # https://clang.llvm.org/docs/ClangFormat.html BasedOnStyle: Google DerivePointerAlignment: false ColumnLimit: 100 PointerAlignment: Left ================================================ FILE: .github/ISSUE_TEMPLATE/bug-report.md ================================================ --- name: "🐛 Bug Report" about: Submit a bug report to help us improve MLC-LLM title: '[Bug] ' labels: ['bug'] assignees: '' --- ## 🐛 Bug ## To Reproduce Steps to reproduce the behavior: 1. 1. 1. ## Expected behavior ## Environment - Platform (e.g. WebGPU/Vulkan/IOS/Android/CUDA): - Operating system (e.g. Ubuntu/Windows/MacOS/...): - Device (e.g. iPhone 12 Pro, PC+RTX 3090, ...) - How you installed MLC-LLM (`conda`, source): - How you installed TVM (`pip`, source): - Python version (e.g. 3.10): - GPU driver version (if applicable): - CUDA/cuDNN version (if applicable): - TVM Hash Tag (`python -c "import tvm; print('\n'.join(f'{k}: {v}' for k, v in tvm.support.libinfo().items()))"`, applicable if you compile models): - Any other relevant information: ## Additional context ================================================ FILE: .github/ISSUE_TEMPLATE/config.yml ================================================ blank_issues_enabled: false contact_links: - name: Check the MLC-LLM Documentation url: https://llm.mlc.ai/docs/ about: Our documentation might provide answers to your questions. - name: Chat on Discord url: https://discord.gg/9Xpy2HGBuD about: Join the Discord Server to live chat with the community. ================================================ FILE: .github/ISSUE_TEMPLATE/documentation.md ================================================ --- name: "\U0001F4DA Documentation" about: Report an issue related to https://llm.mlc.ai/docs/ title: '[Doc] ' labels: ['documentation'] assignees: '' --- ## 📚 Documentation ### Suggestion ### Bug - Link to the buggy documentation/tutorial: - Description of the bug: ================================================ FILE: .github/ISSUE_TEMPLATE/feature-request.md ================================================ --- name: "\U0001F680 Feature Request" about: Submit a proposal/request for a new MLC-LLM feature, or an enhancement on existing features. title: '[Feature Request] ' labels: ['feature request'] assignees: '' --- ## 🚀 Feature ## Motivation ## Alternatives ## Additional context ================================================ FILE: .github/ISSUE_TEMPLATE/general.md ================================================ --- name: "❓ General Questions" about: General questions you have about MLC-LLM. title: '[Question] ' labels: ['question'] assignees: '' --- ## ❓ General Questions ================================================ FILE: .github/ISSUE_TEMPLATE/model-request.md ================================================ --- name: "️️⚙️ Model Request" about: Request a new model in MLC-LLM title: '[Model Request] ' labels: ['new-models'] assignees: '' --- ## ⚙️ Request New Models - Link to an existing implementation (e.g. Hugging Face/Github): - Is this model architecture supported by MLC-LLM? (the list of [supported models](https://llm.mlc.ai/docs/prebuilt_models.html)) ## Additional context ================================================ FILE: .github/ISSUE_TEMPLATE/speed-report.md ================================================ --- name: " 🏎️ Speed Report" about: Submit a speed report of an model running in MLC-LLM title: '[Speed] ' labels: ['performance'] assignees: '' --- # 🏎️ Speed Report - The model code: - The model configuration (e.g. quantization mode, running data type, etc.): - Device (e.g. MacBook Pro M2, PC+RTX 3080): - OS (if applicable): - Encode speed (Token/s): - Decode speed (Token/s): - Memory usage (if applicable): ================================================ FILE: .github/ISSUE_TEMPLATE/tracking.md ================================================ --- name: "Tracking" about: A tracking issue that tracks ongoing item in the project title: '[Tracking] ' labels: ['status: tracking'] assignees: '' --- ## Overview ## Action Items - [ ] ## Links to Related Issues and PRs ================================================ FILE: .github/workflows/documentation.yaml ================================================ name: Build Docs on: push: branches: - main jobs: test_linux: name: Deploy Docs runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: submodules: recursive - name: Configuring build Environment run: | sudo apt-get update python -m pip install -U pip wheel - name: Setup Ruby uses: ruby/setup-ruby@v1 with: ruby-version: '3.0' - name: Installing dependencies run: | python -m pip install -r docs/requirements.txt gem install jekyll jekyll-remote-theme - name: Deploying on GitHub Pages if: github.ref == 'refs/heads/main' run: | git remote set-url origin https://x-access-token:${{ secrets.MLC_GITHUB_TOKEN }}@github.com/$GITHUB_REPOSITORY git config --global user.email "mlc-gh-actions-bot@nomail" git config --global user.name "mlc-gh-actions-bot" ./scripts/gh_deploy_site.sh ================================================ FILE: .github/workflows/update-relax.yaml ================================================ name: 'Relax Submodule Sync' on: workflow_dispatch: jobs: sync: name: 'Relax Submodule Sync' runs-on: ubuntu-latest defaults: run: shell: bash steps: - name: Checkout uses: actions/checkout@v4 with: submodules: true - name: Git Sumbodule Update run: | git submodule update --remote 3rdparty/tvm - name: Commit update env: GITHUB_TOKEN: ${{ secrets.MLC_GITHUB_TOKEN }} run: | git config --global user.name 'Git bot' git config --global user.email 'bot@noreply.github.com' git remote set-url origin https://$GITHUB_TOKEN@github.com/mlc-ai/mlc-llm git commit -am "Auto updated submodule references" && git push || echo "No changes to commit" ================================================ FILE: .github/workflows/windows-build.yaml ================================================ # GH actions. # We use it to cover windows builds # Jenkins is still the primary CI name: Windows CI on: push: branches: - main pull_request: branches: - main jobs: Windows: runs-on: windows-latest defaults: run: shell: 'cmd /C call {0}' steps: - name: Git config run: >- git config --system core.longpaths true - uses: actions/checkout@v3 with: submodules: 'recursive' - uses: conda-incubator/setup-miniconda@v3 with: activate-environment: mlc-llm-build channel-priority: strict environment-file: ci/build-environment.yaml auto-activate-base: false - name: Conda info run: | conda info conda list python --version - name: Build MLC-LLM run: >- ci/task/build_win.bat ================================================ FILE: .gitignore ================================================ tmp/ dist/ params/ debug/ *.bak # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class .DS_Store *.S # C extensions *.so build/ *.ll .npm # Distribution / packaging .Python env/ build/ build-*/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST .conda/ # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Generated by python/gen_requirements.py python/requirements/*.txt # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ docs/_staging/ # PyBuilder target/ /target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject *~ *.pyc *~ config.mk config.cmake Win32 *.dir perf *.wasm .emscripten ## IOS DerivedData/ ## Java *.class jvm/*/target/ jvm/*/*/target/ jvm/native/*/generated jvm/native/src/main/native/org_apache_tvm_native_c_api.h *.worksheet *.idea *.iml *.classpath *.project *.settings */node_modules/ ## Various settings *.pbxuser !default.pbxuser *.mode1v3 !default.mode1v3 *.mode2v3 !default.mode2v3 *.perspectivev3 !default.perspectivev3 xcuserdata/ .pkl_memoize_* .emscripten* .m2 # Compiled Dynamic libraries *.so *.dylib *.dll # Compiled Object files *.slo *.lo *.o *.obj # Precompiled Headers *.gch *.pch # Compiled Static libraries *.lai *.la *.a *.lib # Executables *.exe *.out *.app ## Other *.moved-aside *.xccheckout *.xcscmblueprint .DS_Store tags cscope* *.lock # vim temporary files *.swp *.swo # TVM generated code perf .bash_history # *.json *.params *.ro *.onnx *.h5 synset.txt cat.jpg cat.png docs.tgz cat.png *.mlmodel tvm_u.* tvm_t.* # Mac OS X .DS_Store # Jetbrain .idea .ipython .jupyter .nv .pylint.d .python_history .pytest_cache .local cmake-build-debug # Visual Studio .vs # Visual Studio Code .vscode # tmp file .nfs* # keys *.pem *.p12 *.pfx *.cer *.crt *.der # patch sentinel patched.txt # Python type checking .mypy_cache/ .pyre/ # pipenv files Pipfile Pipfile.lock # conda package artifacts conda/Dockerfile.cuda* conda/pkg .node_repl_history # nix files .envrc *.nix # Docker files .sudo_as_admin_successful # Downloaded models/datasets .tvm_test_data .dgl .caffe2 # Local docs build _docs/ jvm/target .config/configstore/ .ci-py-scripts/ # Generated Hexagon files src/runtime/hexagon/rpc/hexagon_rpc.h src/runtime/hexagon/rpc/hexagon_rpc_skel.c src/runtime/hexagon/rpc/hexagon_rpc_stub.c # Local tvm-site checkout tvm-site/ # Generated docs files gallery/how_to/work_with_microtvm/micro_tvmc.py # Test sample data files !tests/python/ci/sample_prs/*.json # Used in CI to communicate between Python and Jenkins .docker-image-names/ # Printed TIR code on disk *.tir # GDB history file .gdb_history dist ================================================ FILE: .gitmodules ================================================ [submodule "3rdparty/argparse"] path = 3rdparty/argparse url = https://github.com/p-ranav/argparse [submodule "3rdparty/tokenizers-cpp"] path = 3rdparty/tokenizers-cpp url = https://github.com/mlc-ai/tokenizers-cpp [submodule "3rdparty/googletest"] path = 3rdparty/googletest url = https://github.com/google/googletest.git [submodule "3rdparty/tvm"] path = 3rdparty/tvm url = https://github.com/mlc-ai/relax.git [submodule "3rdparty/stb"] path = 3rdparty/stb url = https://github.com/nothings/stb.git [submodule "3rdparty/xgrammar"] path = 3rdparty/xgrammar url = https://github.com/mlc-ai/xgrammar.git ================================================ FILE: .pre-commit-config.yaml ================================================ # To use: # # pre-commit run -a # # Or: # # pre-commit install # (runs every time you commit in git) # # To update this file: # # pre-commit autoupdate # # See https://github.com/pre-commit/pre-commit # Note the pre-commit hooks shoule only be used for formatting, but not for linting. # For linting consider using CI. repos: # Standard hooks - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: check-added-large-files - id: check-case-conflict - id: check-merge-conflict - id: check-symlinks - id: end-of-file-fixer - id: mixed-line-ending - id: requirements-txt-fixer - id: trailing-whitespace # Changes tabs to spaces - repo: https://github.com/Lucas-C/pre-commit-hooks rev: v1.5.5 hooks: - id: remove-tabs - id: remove-crlf # Formatters - repo: https://github.com/psf/black-pre-commit-mirror rev: 24.8.0 hooks: - id: black - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: - id: isort - repo: https://github.com/pre-commit/mirrors-clang-format rev: v19.1.1 hooks: - id: clang-format types_or: [c++, c, cuda] exclude: | (?x)^(.*cubin.cpp$ | .*fmha_cubin.h | 3rdparty/.*)$ - repo: https://github.com/cheshirekow/cmake-format-precommit rev: v0.6.13 hooks: - id: cmake-format additional_dependencies: [pyyaml>=5.1] ================================================ FILE: .pylintrc ================================================ [MESSAGES CONTROL] disable=too-many-positional-arguments,duplicate-code ================================================ FILE: CMakeLists.txt ================================================ cmake_minimum_required(VERSION 3.18) project(mlc_llm C CXX) include(CheckCXXCompilerFlag) if(MSVC) set(CMAKE_CXX_FLAGS "/fp:fast ${CMAKE_CXX_FLAGS}") else() set(CMAKE_CXX_FLAGS "-ffast-math ${CMAKE_CXX_FLAGS}") endif() if(EXISTS ${CMAKE_BINARY_DIR}/config.cmake) include(${CMAKE_BINARY_DIR}/config.cmake) else() if(EXISTS ${CMAKE_SOURCE_DIR}/config.cmake) include(${CMAKE_SOURCE_DIR}/config.cmake) endif() endif() if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "Build type" FORCE) message(STATUS "Setting default build type to " ${CMAKE_BUILD_TYPE}) endif(NOT CMAKE_BUILD_TYPE) option(MLC_HIDE_PRIVATE_SYMBOLS "Hide private symbols" ON) option(MLC_LLM_BUILD_PYTHON_MODULE "Build Python module with scikit-build-core" OFF) if(MLC_LLM_INSTALL_STATIC_LIB) set(BUILD_STATIC_RUNTIME ON) endif() set(MLC_VISIBILITY_FLAG "") if(MLC_HIDE_PRIVATE_SYMBOLS) set(HIDE_PRIVATE_SYMBOLS ON) if(NOT MSVC) set(MLC_VISIBILITY_FLAG "-fvisibility=hidden") endif() message(STATUS "Hide private symbols") endif() option(BUILD_CPP_TEST "Build cpp unittests" OFF) set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CXX_STANDARD 17) set(CMAKE_POSITION_INDEPENDENT_CODE ON) # tvm runtime config: minimize runtime components set(USE_RPC OFF) set(USE_MICRO OFF) set(USE_GRAPH_EXECUTOR OFF) set(USE_GRAPH_EXECUTOR_DEBUG OFF) set(USE_AOT_EXECUTOR OFF) set(USE_PROFILER OFF) set(USE_GTEST OFF) set(USE_LIBBACKTRACE OFF) set(BUILD_DUMMY_LIBTVM ON) if(NOT DEFINED TVM_SOURCE_DIR) if(DEFINED ENV{TVM_SOURCE_DIR}) set(TVM_SOURCE_DIR "$ENV{TVM_SOURCE_DIR}") else() set(TVM_SOURCE_DIR 3rdparty/tvm) endif(DEFINED ENV{TVM_SOURCE_DIR}) endif(NOT DEFINED TVM_SOURCE_DIR) message(STATUS "TVM_SOURCE_DIR: ${TVM_SOURCE_DIR}") add_subdirectory(${TVM_SOURCE_DIR} tvm EXCLUDE_FROM_ALL) set(MLC_LLM_RUNTIME_LINKER_LIB "") set(TOKENZIER_CPP_PATH 3rdparty/tokenizers-cpp) add_subdirectory(${TOKENZIER_CPP_PATH} tokenizers EXCLUDE_FROM_ALL) set(XGRAMMAR_PATH 3rdparty/xgrammar) tvm_file_glob(GLOB_RECURSE MLC_LLM_SRCS cpp/*.cc) tvm_file_glob(GLOB_RECURSE XGRAMMAR_SRCS ${XGRAMMAR_PATH}/cpp/*.cc) list(FILTER XGRAMMAR_SRCS EXCLUDE REGEX "${XGRAMMAR_PATH}/cpp/pybind/.*\\.cc") list(APPEND MLC_LLM_SRCS ${XGRAMMAR_SRCS}) add_library(mlc_llm_objs OBJECT ${MLC_LLM_SRCS}) set(MLC_LLM_INCLUDES ${TVM_SOURCE_DIR}/include ${TVM_SOURCE_DIR}/3rdparty/dlpack/include) set(MLC_LLM_COMPILE_DEFS ${MLC_LLM_COMPILE_DEFS} __STDC_FORMAT_MACROS=1) set(MLC_LLM_COMPILE_DEFS ${MLC_LLM_COMPILE_DEFS} XGRAMMAR_ENABLE_LOG_DEBUG=0) target_compile_definitions(mlc_llm_objs PRIVATE ${MLC_LLM_COMPILE_DEFS}) target_compile_definitions(mlc_llm_objs PRIVATE -DMLC_LLM_EXPORTS) target_include_directories(mlc_llm_objs PRIVATE ${MLC_LLM_INCLUDES}) target_include_directories(mlc_llm_objs PRIVATE 3rdparty/stb) target_include_directories(mlc_llm_objs PRIVATE ${TOKENZIER_CPP_PATH}/include) target_include_directories(mlc_llm_objs PRIVATE ${XGRAMMAR_PATH}/include) # xgrammar still depends on picojson - use its bundled copy target_include_directories(mlc_llm_objs PRIVATE ${XGRAMMAR_PATH}/3rdparty/picojson) target_link_libraries(mlc_llm_objs PRIVATE tvm_ffi_header) add_library(mlc_llm SHARED $) add_library(mlc_llm_static STATIC $) add_dependencies(mlc_llm_static tokenizers_cpp sentencepiece-static tokenizers_c tvm_runtime) set_target_properties(mlc_llm_static PROPERTIES OUTPUT_NAME mlc_llm) target_link_libraries(mlc_llm PUBLIC tvm_runtime) target_link_libraries(mlc_llm PRIVATE tokenizers_cpp) find_library(FLASH_ATTN_LIBRARY flash_attn HINTS ${TVM_SOURCE_DIR}/*/3rdparty/libflash_attn/src) if(FLASH_ATTN_LIBRARY STREQUAL "FLASH_ATTN_LIBRARY-NOTFOUND") message( WARNING "Cannot find libflash_attn. The model must not have been built with --use-flash-attn-mqa option." ) else() target_link_libraries(mlc_llm PUBLIC -Wl,--no-as-needed ${FLASH_ATTN_LIBRARY}) endif() if(CMAKE_BUILD_TYPE STREQUAL "Debug") target_compile_definitions(mlc_llm PRIVATE "TVM_LOG_DEBUG") target_compile_definitions(mlc_llm_objs PRIVATE "TVM_LOG_DEBUG") target_compile_definitions(mlc_llm_static PRIVATE "TVM_LOG_DEBUG") endif() if(BUILD_CPP_TEST) message(STATUS "Building cpp unittests") add_subdirectory(3rdparty/googletest) file(GLOB_RECURSE MLC_LLM_TEST_SRCS ${PROJECT_SOURCE_DIR}/tests/cpp/*unittest.cc) add_executable(mlc_llm_cpp_tests ${MLC_LLM_TEST_SRCS}) target_include_directories(mlc_llm_cpp_tests PRIVATE ${MLC_LLM_INCLUDES}) target_include_directories(mlc_llm_cpp_tests PRIVATE ${PROJECT_SOURCE_DIR}/cpp) target_include_directories( mlc_llm_cpp_tests PRIVATE ${gtest_SOURCE_DIR}/include ${gtest_SOURCE_DIR}) target_link_libraries(mlc_llm_cpp_tests PUBLIC mlc_llm gtest gtest_main) endif(BUILD_CPP_TEST) if(CMAKE_SYSTEM_NAME STREQUAL "Android") target_link_libraries(mlc_llm PRIVATE log) target_link_libraries(tokenizers_cpp PRIVATE log) endif() add_library(mlc_llm_module SHARED $) target_link_libraries(mlc_llm_module PUBLIC tvm) target_link_libraries(mlc_llm_module PRIVATE tokenizers_cpp) set_property( TARGET mlc_llm_module APPEND PROPERTY LINK_OPTIONS "${MLC_VISIBILITY_FLAG}") set_property( TARGET mlc_llm APPEND PROPERTY LINK_OPTIONS "${MLC_VISIBILITY_FLAG}") find_program(CARGO_EXECUTABLE cargo) if(NOT CARGO_EXECUTABLE) message(FATAL_ERROR "Cargo is not found! Please install cargo.") endif() # when this option is on, we install all static lib deps into lib if(MLC_LLM_INSTALL_STATIC_LIB) install(TARGETS mlc_llm_static tokenizers_cpp sentencepiece-static tvm_runtime LIBRARY DESTINATION lib${LIB_SUFFIX}) # tokenizers need special handling as it builds from rust if(MSVC) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/tokenizers/libtokenizers_c.lib DESTINATION lib${LIB_SUFFIX}) else() install(FILES ${CMAKE_CURRENT_BINARY_DIR}/tokenizers/libtokenizers_c.a DESTINATION lib${LIB_SUFFIX}) endif() else() install( TARGETS tvm_runtime mlc_llm mlc_llm_module mlc_llm_static tokenizers_cpp sentencepiece-static RUNTIME_DEPENDENCY_SET tokenizers_c RUNTIME DESTINATION bin LIBRARY DESTINATION lib${LIB_SUFFIX}) endif() # Python package installation configuration This section ensures that all # necessary files are installed for the Python wheel if(MLC_LLM_BUILD_PYTHON_MODULE) message(STATUS "Configuring Python package installation") # Set RPATH for mlc_llm and mlc_llm_module to find other libraries relatively if(APPLE) # macOS uses @loader_path set_target_properties(mlc_llm PROPERTIES INSTALL_RPATH "@loader_path") set_target_properties(mlc_llm_module PROPERTIES INSTALL_RPATH "@loader_path") elseif(LINUX) # Linux uses $ORIGIN set_target_properties(mlc_llm PROPERTIES INSTALL_RPATH "\$ORIGIN") set_target_properties(mlc_llm_module PROPERTIES INSTALL_RPATH "\$ORIGIN") endif() # Install compiled shared libraries install(TARGETS mlc_llm DESTINATION ".") install(TARGETS mlc_llm_module DESTINATION ".") install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/cpp/" DESTINATION "cpp/") install(DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/web/" DESTINATION "web/") install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/README.md" "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE" "${CMAKE_CURRENT_SOURCE_DIR}/NOTICE" DESTINATION ".") message(STATUS "Python package installation configured") endif() ================================================ FILE: CONTRIBUTORS.md ================================================ MLC LLM Contributors ==================== ## List of Contributors - [Full List of Contributors](https://github.com/mlc-ai/mlc-llm/graphs/contributors) ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: NOTICE ================================================ MLC LLM Copyright (c) 2023-2025 by MLC LLM Contributors ================================================ FILE: README.md ================================================
# MLC LLM [![Installation](https://img.shields.io/badge/docs-latest-green)](https://llm.mlc.ai/docs/) [![License](https://img.shields.io/badge/license-apache_2-blue)](https://github.com/mlc-ai/mlc-llm/blob/main/LICENSE) [![Join Discoard](https://img.shields.io/badge/Join-Discord-7289DA?logo=discord&logoColor=white)](https://discord.gg/9Xpy2HGBuD) [![Related Repository: WebLLM](https://img.shields.io/badge/Related_Repo-WebLLM-fafbfc?logo=github)](https://github.com/mlc-ai/web-llm/) **Universal LLM Deployment Engine with ML Compilation** [Get Started](https://llm.mlc.ai/docs/get_started/quick_start) | [Documentation](https://llm.mlc.ai/docs) | [Blog](https://blog.mlc.ai/)
## About MLC LLM is a machine learning compiler and high-performance deployment engine for large language models. The mission of this project is to enable everyone to develop, optimize, and deploy AI models natively on everyone's platforms. 
AMD GPU NVIDIA GPU Apple GPU Intel GPU
Linux / Win ✅ Vulkan, ROCm ✅ Vulkan, CUDA N/A ✅ Vulkan
macOS ✅ Metal (dGPU) N/A ✅ Metal ✅ Metal (iGPU)
Web Browser ✅ WebGPU and WASM
iOS / iPadOS ✅ Metal on Apple A-series GPU
Android ✅ OpenCL on Adreno GPU ✅ OpenCL on Mali GPU
MLC LLM compiles and runs code on MLCEngine -- a unified high-performance LLM inference engine across the above platforms. MLCEngine provides OpenAI-compatible API available through REST server, python, javascript, iOS, Android, all backed by the same engine and compiler that we keep improving with the community. ## Get Started Please visit our [documentation](https://llm.mlc.ai/docs/) to get started with MLC LLM. - [Installation](https://llm.mlc.ai/docs/install/mlc_llm) - [Quick start](https://llm.mlc.ai/docs/get_started/quick_start) - [Introduction](https://llm.mlc.ai/docs/get_started/introduction) ## Citation Please consider citing our project if you find it useful: ```bibtex @software{mlc-llm, author = {{MLC team}}, title = {{MLC-LLM}}, url = {https://github.com/mlc-ai/mlc-llm}, year = {2023-2025} } ``` The underlying techniques of MLC LLM include:
References (Click to expand) ```bibtex @inproceedings{tensorir, author = {Feng, Siyuan and Hou, Bohan and Jin, Hongyi and Lin, Wuwei and Shao, Junru and Lai, Ruihang and Ye, Zihao and Zheng, Lianmin and Yu, Cody Hao and Yu, Yong and Chen, Tianqi}, title = {TensorIR: An Abstraction for Automatic Tensorized Program Optimization}, year = {2023}, isbn = {9781450399166}, publisher = {Association for Computing Machinery}, address = {New York, NY, USA}, url = {https://doi.org/10.1145/3575693.3576933}, doi = {10.1145/3575693.3576933}, booktitle = {Proceedings of the 28th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2}, pages = {804–817}, numpages = {14}, keywords = {Tensor Computation, Machine Learning Compiler, Deep Neural Network}, location = {Vancouver, BC, Canada}, series = {ASPLOS 2023} } @inproceedings{metaschedule, author = {Shao, Junru and Zhou, Xiyou and Feng, Siyuan and Hou, Bohan and Lai, Ruihang and Jin, Hongyi and Lin, Wuwei and Masuda, Masahiro and Yu, Cody Hao and Chen, Tianqi}, booktitle = {Advances in Neural Information Processing Systems}, editor = {S. Koyejo and S. Mohamed and A. Agarwal and D. Belgrave and K. Cho and A. Oh}, pages = {35783--35796}, publisher = {Curran Associates, Inc.}, title = {Tensor Program Optimization with Probabilistic Programs}, url = {https://proceedings.neurips.cc/paper_files/paper/2022/file/e894eafae43e68b4c8dfdacf742bcbf3-Paper-Conference.pdf}, volume = {35}, year = {2022} } @inproceedings{tvm, author = {Tianqi Chen and Thierry Moreau and Ziheng Jiang and Lianmin Zheng and Eddie Yan and Haichen Shen and Meghan Cowan and Leyuan Wang and Yuwei Hu and Luis Ceze and Carlos Guestrin and Arvind Krishnamurthy}, title = {{TVM}: An Automated {End-to-End} Optimizing Compiler for Deep Learning}, booktitle = {13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)}, year = {2018}, isbn = {978-1-939133-08-3}, address = {Carlsbad, CA}, pages = {578--594}, url = {https://www.usenix.org/conference/osdi18/presentation/chen}, publisher = {USENIX Association}, month = oct, } ```
================================================ FILE: android/.gitignore ================================================ app/src/main/jni/*.h app/src/main/jni/*.cc app/src/main/obj *.iml .gradle /local.properties /.idea/caches /.idea/libraries /.idea/modules.xml /.idea/workspace.xml /.idea/navEditor.xml /.idea/assetWizardSettings.xml .DS_Store /build /captures .externalNativeBuild .cxx local.properties ================================================ FILE: android/MLCChat/README.md ================================================ # MLC-LLM Android Checkout [Documentation page](https://llm.mlc.ai/docs/deploy/android.html) for more information. - run `mlc_llm package` - open this `MLCChat/` folder as a project in Android Studio ================================================ FILE: android/MLCChat/app/.gitignore ================================================ /build /src/main/libs ================================================ FILE: android/MLCChat/app/build.gradle ================================================ plugins { id 'com.android.application' id 'org.jetbrains.kotlin.android' } android { namespace 'ai.mlc.mlcchat' compileSdk 35 defaultConfig { applicationId "ai.mlc.mlcchat" minSdk 26 targetSdk 33 versionCode 1 versionName "1.0" testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" vectorDrawables { useSupportLibrary true } } buildTypes { release { minifyEnabled false proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' } } compileOptions { sourceCompatibility JavaVersion.VERSION_1_8 targetCompatibility JavaVersion.VERSION_1_8 } kotlinOptions { jvmTarget = '1.8' } buildFeatures { compose true } composeOptions { kotlinCompilerExtensionVersion '1.4.3' } packagingOptions { resources { excludes += '/META-INF/{AL2.0,LGPL2.1}' } } } dependencies { implementation project(":mlc4j") implementation 'androidx.core:core-ktx:1.10.1' implementation 'androidx.lifecycle:lifecycle-runtime-ktx:2.6.1' implementation 'com.github.jeziellago:compose-markdown:0.5.2' implementation 'androidx.activity:activity-compose:1.7.1' implementation platform('androidx.compose:compose-bom:2022.10.00') implementation 'androidx.lifecycle:lifecycle-viewmodel-compose:2.6.1' implementation 'androidx.compose.ui:ui' implementation 'androidx.compose.ui:ui-graphics' implementation 'androidx.compose.ui:ui-tooling-preview' implementation 'androidx.compose.material3:material3:1.1.0' implementation 'androidx.compose.material:material-icons-extended' implementation 'androidx.appcompat:appcompat:1.6.1' implementation 'androidx.navigation:navigation-compose:2.5.3' implementation 'com.google.code.gson:gson:2.10.1' implementation fileTree(dir: 'src/main/libs', include: ['*.aar', '*.jar'], exclude: []) testImplementation 'junit:junit:4.13.2' androidTestImplementation 'androidx.test.ext:junit:1.1.5' androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1' androidTestImplementation platform('androidx.compose:compose-bom:2022.10.00') androidTestImplementation 'androidx.compose.ui:ui-test-junit4' debugImplementation 'androidx.compose.ui:ui-tooling' debugImplementation 'androidx.compose.ui:ui-test-manifest' } ================================================ FILE: android/MLCChat/app/proguard-rules.pro ================================================ # Add project specific ProGuard rules here. # You can control the set of applied configuration files using the # proguardFiles setting in build.gradle. # # For more details, see # http://developer.android.com/guide/developing/tools/proguard.html # If your project uses WebView with JS, uncomment the following # and specify the fully qualified class name to the JavaScript interface # class: #-keepclassmembers class fqcn.of.javascript.interface.for.webview { # public *; #} # Uncomment this to preserve the line number information for # debugging stack traces. #-keepattributes SourceFile,LineNumberTable # If you keep the line number information, uncomment this to # hide the original source file name. #-renamesourcefileattribute SourceFile ================================================ FILE: android/MLCChat/app/src/main/AndroidManifest.xml ================================================ ================================================ FILE: android/MLCChat/app/src/main/java/ai/mlc/mlcchat/AppViewModel.kt ================================================ package ai.mlc.mlcchat import ai.mlc.mlcllm.MLCEngine import ai.mlc.mlcllm.OpenAIProtocol import android.app.Application import android.content.ClipData import android.content.ClipboardManager import android.content.Context import android.os.Environment import android.widget.Toast import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.toMutableStateList import androidx.lifecycle.AndroidViewModel import androidx.lifecycle.viewModelScope import com.google.gson.Gson import com.google.gson.annotations.SerializedName import kotlinx.coroutines.launch import java.io.File import java.io.FileOutputStream import java.net.URL import java.nio.channels.Channels import java.util.UUID import java.util.concurrent.Executors import kotlin.concurrent.thread import ai.mlc.mlcllm.OpenAIProtocol.ChatCompletionMessage import ai.mlc.mlcllm.OpenAIProtocol.ChatCompletionMessageContent import android.app.Activity import kotlinx.coroutines.* import android.graphics.Bitmap import android.graphics.BitmapFactory import android.net.Uri import java.io.ByteArrayOutputStream import android.util.Base64 import android.util.Log class AppViewModel(application: Application) : AndroidViewModel(application) { val modelList = emptyList().toMutableStateList() val chatState = ChatState() val modelSampleList = emptyList().toMutableStateList() private var showAlert = mutableStateOf(false) private var alertMessage = mutableStateOf("") private var appConfig = AppConfig( emptyList().toMutableList(), emptyList().toMutableList() ) private val application = getApplication() private val appDirFile = application.getExternalFilesDir("") private val gson = Gson() private val modelIdSet = emptySet().toMutableSet() companion object { const val AppConfigFilename = "mlc-app-config.json" const val ModelConfigFilename = "mlc-chat-config.json" const val ParamsConfigFilename = "tensor-cache.json" const val ModelUrlSuffix = "resolve/main/" } init { loadAppConfig() } fun isShowingAlert(): Boolean { return showAlert.value } fun errorMessage(): String { return alertMessage.value } fun dismissAlert() { require(showAlert.value) showAlert.value = false } fun copyError() { require(showAlert.value) val clipboard = application.getSystemService(Context.CLIPBOARD_SERVICE) as ClipboardManager clipboard.setPrimaryClip(ClipData.newPlainText("MLCChat", errorMessage())) } private fun issueAlert(error: String) { showAlert.value = true alertMessage.value = error } fun requestDeleteModel(modelId: String) { deleteModel(modelId) issueAlert("Model: $modelId has been deleted") } private fun loadAppConfig() { val appConfigFile = File(appDirFile, AppConfigFilename) val jsonString: String = if (!appConfigFile.exists()) { application.assets.open(AppConfigFilename).bufferedReader().use { it.readText() } } else { appConfigFile.readText() } appConfig = gson.fromJson(jsonString, AppConfig::class.java) appConfig.modelLibs = emptyList().toMutableList() modelList.clear() modelIdSet.clear() modelSampleList.clear() for (modelRecord in appConfig.modelList) { appConfig.modelLibs.add(modelRecord.modelLib) val modelDirFile = File(appDirFile, modelRecord.modelId) val modelConfigFile = File(modelDirFile, ModelConfigFilename) if (modelConfigFile.exists()) { val modelConfigString = modelConfigFile.readText() val modelConfig = gson.fromJson(modelConfigString, ModelConfig::class.java) modelConfig.modelId = modelRecord.modelId modelConfig.modelLib = modelRecord.modelLib modelConfig.estimatedVramBytes = modelRecord.estimatedVramBytes addModelConfig(modelConfig, modelRecord.modelUrl, true) } else { downloadModelConfig( if (modelRecord.modelUrl.endsWith("/")) modelRecord.modelUrl else "${modelRecord.modelUrl}/", modelRecord, true ) } } } private fun updateAppConfig(action: () -> Unit) { action() val jsonString = gson.toJson(appConfig) val appConfigFile = File(appDirFile, AppConfigFilename) appConfigFile.writeText(jsonString) } private fun addModelConfig(modelConfig: ModelConfig, modelUrl: String, isBuiltin: Boolean) { require(!modelIdSet.contains(modelConfig.modelId)) modelIdSet.add(modelConfig.modelId) modelList.add( ModelState( modelConfig, modelUrl + if (modelUrl.endsWith("/")) "" else "/", File(appDirFile, modelConfig.modelId) ) ) if (!isBuiltin) { updateAppConfig { appConfig.modelList.add( ModelRecord( modelUrl, modelConfig.modelId, modelConfig.estimatedVramBytes, modelConfig.modelLib ) ) } } } private fun deleteModel(modelId: String) { val modelDirFile = File(appDirFile, modelId) modelDirFile.deleteRecursively() require(!modelDirFile.exists()) modelIdSet.remove(modelId) modelList.removeIf { modelState -> modelState.modelConfig.modelId == modelId } updateAppConfig { appConfig.modelList.removeIf { modelRecord -> modelRecord.modelId == modelId } } } private fun isModelConfigAllowed(modelConfig: ModelConfig): Boolean { if (appConfig.modelLibs.contains(modelConfig.modelLib)) return true viewModelScope.launch { issueAlert("Model lib ${modelConfig.modelLib} is not supported.") } return false } private fun downloadModelConfig( modelUrl: String, modelRecord: ModelRecord, isBuiltin: Boolean ) { thread(start = true) { try { val url = URL("${modelUrl}${ModelUrlSuffix}${ModelConfigFilename}") val tempId = UUID.randomUUID().toString() val tempFile = File( application.getExternalFilesDir(Environment.DIRECTORY_DOWNLOADS), tempId ) url.openStream().use { Channels.newChannel(it).use { src -> FileOutputStream(tempFile).use { fileOutputStream -> fileOutputStream.channel.transferFrom(src, 0, Long.MAX_VALUE) } } } require(tempFile.exists()) viewModelScope.launch { try { val modelConfigString = tempFile.readText() val modelConfig = gson.fromJson(modelConfigString, ModelConfig::class.java) modelConfig.modelId = modelRecord.modelId modelConfig.modelLib = modelRecord.modelLib modelConfig.estimatedVramBytes = modelRecord.estimatedVramBytes if (modelIdSet.contains(modelConfig.modelId)) { tempFile.delete() issueAlert("${modelConfig.modelId} has been used, please consider another local ID") return@launch } if (!isModelConfigAllowed(modelConfig)) { tempFile.delete() return@launch } val modelDirFile = File(appDirFile, modelConfig.modelId) val modelConfigFile = File(modelDirFile, ModelConfigFilename) tempFile.copyTo(modelConfigFile, overwrite = true) tempFile.delete() require(modelConfigFile.exists()) addModelConfig(modelConfig, modelUrl, isBuiltin) } catch (e: Exception) { viewModelScope.launch { issueAlert("Add model failed: ${e.localizedMessage}") } } } } catch (e: Exception) { viewModelScope.launch { issueAlert("Download model config failed: ${e.localizedMessage}") } } } } inner class ModelState( val modelConfig: ModelConfig, private val modelUrl: String, private val modelDirFile: File ) { var modelInitState = mutableStateOf(ModelInitState.Initializing) private var paramsConfig = ParamsConfig(emptyList()) val progress = mutableStateOf(0) val total = mutableStateOf(1) val id: UUID = UUID.randomUUID() private val remainingTasks = emptySet().toMutableSet() private val downloadingTasks = emptySet().toMutableSet() private val maxDownloadTasks = 3 private val gson = Gson() init { switchToInitializing() } private fun switchToInitializing() { val paramsConfigFile = File(modelDirFile, ParamsConfigFilename) if (paramsConfigFile.exists()) { loadParamsConfig() switchToIndexing() } else { downloadParamsConfig() } } private fun loadParamsConfig() { val paramsConfigFile = File(modelDirFile, ParamsConfigFilename) require(paramsConfigFile.exists()) val jsonString = paramsConfigFile.readText() paramsConfig = gson.fromJson(jsonString, ParamsConfig::class.java) } private fun downloadParamsConfig() { thread(start = true) { val url = URL("${modelUrl}${ModelUrlSuffix}${ParamsConfigFilename}") val tempId = UUID.randomUUID().toString() val tempFile = File(modelDirFile, tempId) url.openStream().use { Channels.newChannel(it).use { src -> FileOutputStream(tempFile).use { fileOutputStream -> fileOutputStream.channel.transferFrom(src, 0, Long.MAX_VALUE) } } } require(tempFile.exists()) val paramsConfigFile = File(modelDirFile, ParamsConfigFilename) tempFile.renameTo(paramsConfigFile) require(paramsConfigFile.exists()) viewModelScope.launch { loadParamsConfig() switchToIndexing() } } } fun handleStart() { switchToDownloading() } fun handlePause() { switchToPausing() } fun handleClear() { require( modelInitState.value == ModelInitState.Downloading || modelInitState.value == ModelInitState.Paused || modelInitState.value == ModelInitState.Finished ) switchToClearing() } private fun switchToClearing() { if (modelInitState.value == ModelInitState.Paused) { modelInitState.value = ModelInitState.Clearing clear() } else if (modelInitState.value == ModelInitState.Finished) { modelInitState.value = ModelInitState.Clearing if (chatState.modelName.value == modelConfig.modelId) { chatState.requestTerminateChat { clear() } } else { clear() } } else { modelInitState.value = ModelInitState.Clearing } } fun handleDelete() { require( modelInitState.value == ModelInitState.Downloading || modelInitState.value == ModelInitState.Paused || modelInitState.value == ModelInitState.Finished ) switchToDeleting() } private fun switchToDeleting() { if (modelInitState.value == ModelInitState.Paused) { modelInitState.value = ModelInitState.Deleting delete() } else if (modelInitState.value == ModelInitState.Finished) { modelInitState.value = ModelInitState.Deleting if (chatState.modelName.value == modelConfig.modelId) { chatState.requestTerminateChat { delete() } } else { delete() } } else { modelInitState.value = ModelInitState.Deleting } } private fun switchToIndexing() { modelInitState.value = ModelInitState.Indexing progress.value = 0 total.value = modelConfig.tokenizerFiles.size + paramsConfig.paramsRecords.size for (tokenizerFilename in modelConfig.tokenizerFiles) { val file = File(modelDirFile, tokenizerFilename) if (file.exists()) { ++progress.value } else { remainingTasks.add( DownloadTask( URL("${modelUrl}${ModelUrlSuffix}${tokenizerFilename}"), file ) ) } } for (paramsRecord in paramsConfig.paramsRecords) { val file = File(modelDirFile, paramsRecord.dataPath) if (file.exists()) { ++progress.value } else { remainingTasks.add( DownloadTask( URL("${modelUrl}${ModelUrlSuffix}${paramsRecord.dataPath}"), file ) ) } } if (progress.value < total.value) { switchToPaused() } else { switchToFinished() } } private fun switchToDownloading() { modelInitState.value = ModelInitState.Downloading for (downloadTask in remainingTasks) { if (downloadingTasks.size < maxDownloadTasks) { handleNewDownload(downloadTask) } else { return } } } private fun handleNewDownload(downloadTask: DownloadTask) { require(modelInitState.value == ModelInitState.Downloading) require(!downloadingTasks.contains(downloadTask)) downloadingTasks.add(downloadTask) thread(start = true) { val tempId = UUID.randomUUID().toString() val tempFile = File(modelDirFile, tempId) downloadTask.url.openStream().use { Channels.newChannel(it).use { src -> FileOutputStream(tempFile).use { fileOutputStream -> fileOutputStream.channel.transferFrom(src, 0, Long.MAX_VALUE) } } } require(tempFile.exists()) tempFile.renameTo(downloadTask.file) require(downloadTask.file.exists()) viewModelScope.launch { handleFinishDownload(downloadTask) } } } private fun handleNextDownload() { require(modelInitState.value == ModelInitState.Downloading) for (downloadTask in remainingTasks) { if (!downloadingTasks.contains(downloadTask)) { handleNewDownload(downloadTask) break } } } private fun handleFinishDownload(downloadTask: DownloadTask) { remainingTasks.remove(downloadTask) downloadingTasks.remove(downloadTask) ++progress.value require( modelInitState.value == ModelInitState.Downloading || modelInitState.value == ModelInitState.Pausing || modelInitState.value == ModelInitState.Clearing || modelInitState.value == ModelInitState.Deleting ) if (modelInitState.value == ModelInitState.Downloading) { if (remainingTasks.isEmpty()) { if (downloadingTasks.isEmpty()) { switchToFinished() } } else { handleNextDownload() } } else if (modelInitState.value == ModelInitState.Pausing) { if (downloadingTasks.isEmpty()) { switchToPaused() } } else if (modelInitState.value == ModelInitState.Clearing) { if (downloadingTasks.isEmpty()) { clear() } } else if (modelInitState.value == ModelInitState.Deleting) { if (downloadingTasks.isEmpty()) { delete() } } } private fun clear() { val files = modelDirFile.listFiles { dir, name -> !(dir == modelDirFile && name == ModelConfigFilename) } require(files != null) for (file in files) { file.deleteRecursively() require(!file.exists()) } val modelConfigFile = File(modelDirFile, ModelConfigFilename) require(modelConfigFile.exists()) switchToIndexing() } private fun delete() { modelDirFile.deleteRecursively() require(!modelDirFile.exists()) requestDeleteModel(modelConfig.modelId) } private fun switchToPausing() { modelInitState.value = ModelInitState.Pausing } private fun switchToPaused() { modelInitState.value = ModelInitState.Paused } private fun switchToFinished() { modelInitState.value = ModelInitState.Finished } fun startChat() { chatState.requestReloadChat( modelConfig, modelDirFile.absolutePath, ) } } inner class ChatState { val messages = emptyList().toMutableStateList() val report = mutableStateOf("") val modelName = mutableStateOf("") private var modelChatState = mutableStateOf(ModelChatState.Ready) @Synchronized get @Synchronized set private val engine = MLCEngine() private var historyMessages = mutableListOf() private var modelLib = "" private var modelPath = "" private val executorService = Executors.newSingleThreadExecutor() private val viewModelScope = CoroutineScope(Dispatchers.Main + Job()) private var imageUri: Uri? = null private fun mainResetChat() { imageUri = null executorService.submit { callBackend { engine.reset() } historyMessages = mutableListOf() viewModelScope.launch { clearHistory() switchToReady() } } } private fun clearHistory() { messages.clear() report.value = "" historyMessages.clear() } private fun switchToResetting() { modelChatState.value = ModelChatState.Resetting } private fun switchToGenerating() { modelChatState.value = ModelChatState.Generating } private fun switchToReloading() { modelChatState.value = ModelChatState.Reloading } private fun switchToReady() { modelChatState.value = ModelChatState.Ready } private fun switchToFailed() { modelChatState.value = ModelChatState.Falied } private fun callBackend(callback: () -> Unit): Boolean { try { callback() } catch (e: Exception) { viewModelScope.launch { val stackTrace = e.stackTraceToString() val errorMessage = e.localizedMessage appendMessage( MessageRole.Assistant, "MLCChat failed\n\nStack trace:\n$stackTrace\n\nError message:\n$errorMessage" ) switchToFailed() } return false } return true } fun requestResetChat() { require(interruptable()) interruptChat( prologue = { switchToResetting() }, epilogue = { mainResetChat() } ) } private fun interruptChat(prologue: () -> Unit, epilogue: () -> Unit) { // prologue runs before interruption // epilogue runs after interruption require(interruptable()) if (modelChatState.value == ModelChatState.Ready) { prologue() epilogue() } else if (modelChatState.value == ModelChatState.Generating) { prologue() executorService.submit { viewModelScope.launch { epilogue() } } } else { require(false) } } fun requestTerminateChat(callback: () -> Unit) { require(interruptable()) interruptChat( prologue = { switchToTerminating() }, epilogue = { mainTerminateChat(callback) } ) } private fun mainTerminateChat(callback: () -> Unit) { executorService.submit { callBackend { engine.unload() } viewModelScope.launch { clearHistory() switchToReady() callback() } } } private fun switchToTerminating() { modelChatState.value = ModelChatState.Terminating } fun requestReloadChat(modelConfig: ModelConfig, modelPath: String) { if (this.modelName.value == modelConfig.modelId && this.modelLib == modelConfig.modelLib && this.modelPath == modelPath) { return } require(interruptable()) interruptChat( prologue = { switchToReloading() }, epilogue = { mainReloadChat(modelConfig, modelPath) } ) } private fun mainReloadChat(modelConfig: ModelConfig, modelPath: String) { clearHistory() this.modelName.value = modelConfig.modelId this.modelLib = modelConfig.modelLib this.modelPath = modelPath executorService.submit { viewModelScope.launch { Toast.makeText(application, "Initialize...", Toast.LENGTH_SHORT).show() } if (!callBackend { engine.unload() engine.reload(modelPath, modelConfig.modelLib) }) return@submit viewModelScope.launch { Toast.makeText(application, "Ready to chat", Toast.LENGTH_SHORT).show() switchToReady() } } } fun requestImageBitmap(uri: Uri?) { require(chatable()) switchToGenerating() executorService.submit { imageUri = uri viewModelScope.launch { report.value = "Image process is done, ask any question." if (modelChatState.value == ModelChatState.Generating) switchToReady() } } } fun bitmapToURL(bm: Bitmap): String { val targetSize = 336 val scaledBitmap = Bitmap.createScaledBitmap(bm, targetSize, targetSize, true) val outputStream = ByteArrayOutputStream() scaledBitmap.compress(Bitmap.CompressFormat.JPEG, 100, outputStream) scaledBitmap.recycle() val imageBytes = outputStream.toByteArray() val imageBase64 = Base64.encodeToString(imageBytes, Base64.NO_WRAP) return "data:image/jpg;base64,$imageBase64" } fun requestGenerate(prompt: String, activity: Activity) { require(chatable()) switchToGenerating() appendMessage(MessageRole.User, prompt) appendMessage(MessageRole.Assistant, "") var content = ChatCompletionMessageContent(text=prompt) if (imageUri != null) { val uri = imageUri val bitmap = uri?.let { activity.contentResolver.openInputStream(it)?.use { input -> BitmapFactory.decodeStream(input) } } val imageBase64URL = bitmapToURL(bitmap!!) Log.v("requestGenerate", "image base64 url: $imageBase64URL") val parts = listOf( mapOf("type" to "text", "text" to prompt), mapOf("type" to "image_url", "image_url" to imageBase64URL) ) content = ChatCompletionMessageContent(parts=parts) imageUri = null } executorService.submit { historyMessages.add(ChatCompletionMessage( role = OpenAIProtocol.ChatCompletionRole.user, content = content )) viewModelScope.launch { val responses = engine.chat.completions.create( messages = historyMessages, stream_options = OpenAIProtocol.StreamOptions(include_usage = true) ) var finishReasonLength = false var streamingText = "" for (res in responses) { if (!callBackend { for (choice in res.choices) { choice.delta.content?.let { content -> streamingText += content.asText() } choice.finish_reason?.let { finishReason -> if (finishReason == "length") { finishReasonLength = true } } } updateMessage(MessageRole.Assistant, streamingText) res.usage?.let { finalUsage -> report.value = finalUsage.extra?.asTextLabel() ?: "" } if (finishReasonLength) { streamingText += " [output truncated due to context length limit...]" updateMessage(MessageRole.Assistant, streamingText) } }); } if (streamingText.isNotEmpty()) { historyMessages.add(ChatCompletionMessage( role = OpenAIProtocol.ChatCompletionRole.assistant, content = streamingText )) streamingText = "" } else { if (historyMessages.isNotEmpty()) { historyMessages.removeAt(historyMessages.size - 1) } } if (modelChatState.value == ModelChatState.Generating) switchToReady() } } } private fun appendMessage(role: MessageRole, text: String) { messages.add(MessageData(role, text)) } private fun updateMessage(role: MessageRole, text: String) { messages[messages.size - 1] = MessageData(role, text) } fun chatable(): Boolean { return modelChatState.value == ModelChatState.Ready } fun interruptable(): Boolean { return modelChatState.value == ModelChatState.Ready || modelChatState.value == ModelChatState.Generating || modelChatState.value == ModelChatState.Falied } } } enum class ModelInitState { Initializing, Indexing, Paused, Downloading, Pausing, Clearing, Deleting, Finished } enum class ModelChatState { Generating, Resetting, Reloading, Terminating, Ready, Falied } enum class MessageRole { Assistant, User } data class DownloadTask(val url: URL, val file: File) data class MessageData(val role: MessageRole, val text: String, val id: UUID = UUID.randomUUID(), var imageUri: Uri? = null) data class AppConfig( @SerializedName("model_libs") var modelLibs: MutableList, @SerializedName("model_list") val modelList: MutableList, ) data class ModelRecord( @SerializedName("model_url") val modelUrl: String, @SerializedName("model_id") val modelId: String, @SerializedName("estimated_vram_bytes") val estimatedVramBytes: Long?, @SerializedName("model_lib") val modelLib: String ) data class ModelConfig( @SerializedName("model_lib") var modelLib: String, @SerializedName("model_id") var modelId: String, @SerializedName("estimated_vram_bytes") var estimatedVramBytes: Long?, @SerializedName("tokenizer_files") val tokenizerFiles: List, @SerializedName("context_window_size") val contextWindowSize: Int, @SerializedName("prefill_chunk_size") val prefillChunkSize: Int, ) data class ParamsRecord( @SerializedName("dataPath") val dataPath: String ) data class ParamsConfig( @SerializedName("records") val paramsRecords: List ) ================================================ FILE: android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ChatView.kt ================================================ package ai.mlc.mlcchat import android.app.Activity import android.graphics.Bitmap import android.graphics.BitmapFactory import androidx.compose.foundation.Image import androidx.compose.foundation.background import androidx.compose.foundation.gestures.detectTapGestures import androidx.compose.foundation.layout.Arrangement import androidx.compose.foundation.layout.Column import androidx.compose.foundation.layout.IntrinsicSize import androidx.compose.foundation.layout.Row import androidx.compose.foundation.layout.aspectRatio import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.fillMaxWidth import androidx.compose.foundation.layout.height import androidx.compose.foundation.layout.padding import androidx.compose.foundation.layout.widthIn import androidx.compose.foundation.layout.wrapContentHeight import androidx.compose.foundation.layout.wrapContentWidth import androidx.compose.foundation.lazy.LazyColumn import androidx.compose.foundation.lazy.items import androidx.compose.foundation.lazy.rememberLazyListState import androidx.compose.foundation.shape.RoundedCornerShape import androidx.compose.foundation.text.selection.SelectionContainer import androidx.compose.material.icons.Icons import androidx.compose.material.icons.filled.AddAPhoto import androidx.compose.material.icons.filled.ArrowBack import androidx.compose.material.icons.filled.Photo import androidx.compose.material.icons.filled.Replay import androidx.compose.material.icons.filled.Send import androidx.compose.material3.Divider import androidx.compose.material3.ExperimentalMaterial3Api import androidx.compose.material3.Icon import androidx.compose.material3.IconButton import androidx.compose.material3.MaterialTheme import androidx.compose.material3.OutlinedTextField import androidx.compose.material3.Scaffold import androidx.compose.material3.Switch import androidx.compose.material3.Text import androidx.compose.material3.TopAppBar import androidx.compose.material3.TopAppBarDefaults import androidx.compose.runtime.Composable import androidx.compose.runtime.getValue import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.remember import androidx.compose.runtime.rememberCoroutineScope import androidx.compose.runtime.saveable.rememberSaveable import androidx.compose.runtime.setValue import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.graphics.asImageBitmap import androidx.compose.ui.input.pointer.pointerInput import androidx.compose.ui.platform.LocalFocusManager import androidx.compose.ui.text.style.TextAlign import androidx.compose.ui.tooling.preview.Preview import androidx.compose.ui.unit.dp import androidx.navigation.NavController import dev.jeziellago.compose.markdowntext.MarkdownText import kotlinx.coroutines.launch @ExperimentalMaterial3Api @Composable fun ChatView( navController: NavController, chatState: AppViewModel.ChatState, activity: Activity ) { val localFocusManager = LocalFocusManager.current (activity as MainActivity).chatState = chatState Scaffold(topBar = { TopAppBar( title = { Text( text = "MLCChat: " + chatState.modelName.value.split("-")[0], color = MaterialTheme.colorScheme.onPrimary ) }, colors = TopAppBarDefaults.topAppBarColors(containerColor = MaterialTheme.colorScheme.primary), navigationIcon = { IconButton( onClick = { navController.popBackStack() }, enabled = chatState.interruptable() ) { Icon( imageVector = Icons.Filled.ArrowBack, contentDescription = "back home page", tint = MaterialTheme.colorScheme.onPrimary ) } }, actions = { IconButton( onClick = { chatState.requestResetChat() activity.hasImage = false }, enabled = chatState.interruptable() ) { Icon( imageVector = Icons.Filled.Replay, contentDescription = "reset the chat", tint = MaterialTheme.colorScheme.onPrimary ) } }) }, modifier = Modifier.pointerInput(Unit) { detectTapGestures(onTap = { localFocusManager.clearFocus() }) }) { paddingValues -> Column( modifier = Modifier .fillMaxSize() .padding(paddingValues) .padding(horizontal = 10.dp) ) { val lazyColumnListState = rememberLazyListState() val coroutineScope = rememberCoroutineScope() Text( text = chatState.report.value, textAlign = TextAlign.Center, modifier = Modifier .fillMaxWidth() .wrapContentHeight() .padding(top = 5.dp) ) Divider(thickness = 1.dp, modifier = Modifier.padding(vertical = 5.dp)) LazyColumn( modifier = Modifier.weight(9f), verticalArrangement = Arrangement.spacedBy(5.dp, alignment = Alignment.Bottom), state = lazyColumnListState ) { coroutineScope.launch { lazyColumnListState.animateScrollToItem(chatState.messages.size) } items( items = chatState.messages, key = { message -> message.id }, ) { message -> MessageView(messageData = message, activity) } item { // place holder item for scrolling to the bottom } } Divider(thickness = 1.dp, modifier = Modifier.padding(top = 5.dp)) SendMessageView(chatState = chatState, activity) } } } @Composable fun MessageView(messageData: MessageData, activity: Activity?) { // default render the Assistant text as MarkdownText var useMarkdown by remember { mutableStateOf(true) } var localActivity : MainActivity = activity as MainActivity SelectionContainer { if (messageData.role == MessageRole.Assistant) { Column { if (messageData.text.isNotEmpty()) { Row( verticalAlignment = Alignment.CenterVertically, ) { Text( text = "Show as Markdown", color = MaterialTheme.colorScheme.onSecondaryContainer, modifier = Modifier .wrapContentWidth() .padding(end = 8.dp) .widthIn(max = 300.dp) ) Switch( checked = useMarkdown, onCheckedChange = { useMarkdown = it } ) } } Row( horizontalArrangement = Arrangement.Start, modifier = Modifier.fillMaxWidth() ) { if (useMarkdown) { MarkdownText( isTextSelectable = true, modifier = Modifier .wrapContentWidth() .background( color = MaterialTheme.colorScheme.secondaryContainer, shape = RoundedCornerShape(5.dp) ) .padding(5.dp) .widthIn(max = 300.dp), markdown = messageData.text, ) } else { Text( text = messageData.text, textAlign = TextAlign.Left, color = MaterialTheme.colorScheme.onSecondaryContainer, modifier = Modifier .wrapContentWidth() .background( color = MaterialTheme.colorScheme.secondaryContainer, shape = RoundedCornerShape(5.dp) ) .padding(5.dp) .widthIn(max = 300.dp) ) } } } } else { Row( horizontalArrangement = Arrangement.End, modifier = Modifier.fillMaxWidth() ) { if (messageData.imageUri != null) { val uri = messageData.imageUri val bitmap = uri?.let { activity.contentResolver.openInputStream(it)?.use { input -> BitmapFactory.decodeStream(input) } } val displayBitmap = bitmap?.let { Bitmap.createScaledBitmap(it, 224, 224, true) } if (displayBitmap != null) { Image( displayBitmap.asImageBitmap(), "", modifier = Modifier .wrapContentWidth() .background( color = MaterialTheme.colorScheme.secondaryContainer, shape = RoundedCornerShape(5.dp) ) .padding(5.dp) .widthIn(max = 300.dp) ) } if (!localActivity.hasImage) { localActivity.chatState.requestImageBitmap(messageData.imageUri) } localActivity.hasImage = true } else { Text( text = messageData.text, textAlign = TextAlign.Right, color = MaterialTheme.colorScheme.onPrimaryContainer, modifier = Modifier .wrapContentWidth() .background( color = MaterialTheme.colorScheme.primaryContainer, shape = RoundedCornerShape(5.dp) ) .padding(5.dp) .widthIn(max = 300.dp) ) } } } } } @ExperimentalMaterial3Api @Composable fun SendMessageView(chatState: AppViewModel.ChatState, activity: Activity) { val localFocusManager = LocalFocusManager.current val localActivity : MainActivity = activity as MainActivity Row( horizontalArrangement = Arrangement.spacedBy(5.dp), verticalAlignment = Alignment.CenterVertically, modifier = Modifier .height(IntrinsicSize.Max) .fillMaxWidth() .padding(bottom = 5.dp) ) { var text by rememberSaveable { mutableStateOf("") } OutlinedTextField( value = text, onValueChange = { text = it }, label = { Text(text = "Input") }, modifier = Modifier .weight(9f), ) IconButton( onClick = { activity.takePhoto() }, modifier = Modifier .aspectRatio(1f) .weight(1f), enabled = (chatState.chatable() && !localActivity.hasImage) ) { Icon( imageVector = Icons.Filled.AddAPhoto, contentDescription = "use camera", ) } IconButton( onClick = { activity.pickImageFromGallery() }, modifier = Modifier .aspectRatio(1f) .weight(1f), enabled = (chatState.chatable() && !localActivity.hasImage) ) { Icon( imageVector = Icons.Filled.Photo, contentDescription = "select image", ) } IconButton( onClick = { localFocusManager.clearFocus() chatState.requestGenerate(text, activity) text = "" }, modifier = Modifier .aspectRatio(1f) .weight(1f), enabled = (text != "" && chatState.chatable()) ) { Icon( imageVector = Icons.Filled.Send, contentDescription = "send message", ) } } } @Preview @Composable fun MessageViewPreviewWithMarkdown() { MessageView( messageData = MessageData( role = MessageRole.Assistant, text = """ # Sample Header * Markdown * [Link](https://example.com) Google """ ), null ) } ================================================ FILE: android/MLCChat/app/src/main/java/ai/mlc/mlcchat/MainActivity.kt ================================================ package ai.mlc.mlcchat import android.Manifest import android.content.ContentValues import android.content.pm.PackageManager import android.net.Uri import android.os.Build import android.os.Bundle import android.provider.MediaStore import android.util.Log import androidx.activity.ComponentActivity import androidx.activity.compose.setContent import androidx.activity.result.contract.ActivityResultContracts import androidx.annotation.RequiresApi import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.material3.ExperimentalMaterial3Api import androidx.compose.material3.Surface import androidx.compose.ui.Modifier import androidx.core.content.ContextCompat import ai.mlc.mlcchat.ui.theme.MLCChatTheme import java.text.SimpleDateFormat import java.util.Date import java.util.Locale import java.util.UUID class MainActivity : ComponentActivity() { var hasImage = false private val pickImageLauncher = registerForActivityResult( ActivityResultContracts.GetContent() ) { uri: Uri? -> uri?.let { Log.v("pickImageLauncher", "Selected image uri: $it") chatState.messages.add( MessageData( role = MessageRole.User, text = "", id = UUID.randomUUID(), imageUri = it ) ) } } private var cameraImageUri: Uri? = null private val takePictureLauncher = registerForActivityResult( ActivityResultContracts.TakePicture() ) { success: Boolean -> if (success && cameraImageUri != null) { Log.v("takePictureLauncher", "Camera image uri: $cameraImageUri") chatState.messages.add( MessageData( role = MessageRole.User, text = "", id = UUID.randomUUID(), imageUri = cameraImageUri ) ) } } private val requestPermissionLauncher = registerForActivityResult(ActivityResultContracts.RequestMultiplePermissions()) { permissions -> permissions.entries.forEach { Log.d("Permissions", "${it.key} = ${it.value}") } } lateinit var chatState: AppViewModel.ChatState @RequiresApi(Build.VERSION_CODES.TIRAMISU) @ExperimentalMaterial3Api override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) chatState = AppViewModel(this.application).ChatState() requestNeededPermissions() setContent { Surface( modifier = Modifier.fillMaxSize() ) { MLCChatTheme { NavView(this) } } } } private fun requestNeededPermissions() { val permissionsToRequest = mutableListOf() if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.TIRAMISU) { if (ContextCompat.checkSelfPermission( this, Manifest.permission.READ_MEDIA_IMAGES ) != PackageManager.PERMISSION_GRANTED ) { permissionsToRequest.add(Manifest.permission.READ_MEDIA_IMAGES) } if (ContextCompat.checkSelfPermission( this, Manifest.permission.CAMERA ) != PackageManager.PERMISSION_GRANTED ) { permissionsToRequest.add(Manifest.permission.CAMERA) } } else { if (ContextCompat.checkSelfPermission( this, Manifest.permission.READ_EXTERNAL_STORAGE ) != PackageManager.PERMISSION_GRANTED ) { permissionsToRequest.add(Manifest.permission.READ_EXTERNAL_STORAGE) } if (ContextCompat.checkSelfPermission( this, Manifest.permission.WRITE_EXTERNAL_STORAGE ) != PackageManager.PERMISSION_GRANTED ) { permissionsToRequest.add(Manifest.permission.WRITE_EXTERNAL_STORAGE) } if (ContextCompat.checkSelfPermission( this, Manifest.permission.CAMERA ) != PackageManager.PERMISSION_GRANTED ) { permissionsToRequest.add(Manifest.permission.CAMERA) } } if (permissionsToRequest.isNotEmpty()) { requestPermissionLauncher.launch(permissionsToRequest.toTypedArray()) } } fun pickImageFromGallery() { pickImageLauncher.launch("image/*") } fun takePhoto() { val contentValues = ContentValues().apply { val timeFormatter = SimpleDateFormat("yyyyMMdd_HHmmss", Locale.getDefault()) val fileName = "IMG_${timeFormatter.format(Date())}.jpg" put(MediaStore.Images.Media.DISPLAY_NAME, fileName) put(MediaStore.Images.Media.MIME_TYPE, "image/jpeg") put(MediaStore.Images.Media.DATE_ADDED, System.currentTimeMillis() / 1000) } cameraImageUri = contentResolver.insert( MediaStore.Images.Media.EXTERNAL_CONTENT_URI, contentValues ) takePictureLauncher.launch(cameraImageUri) } } ================================================ FILE: android/MLCChat/app/src/main/java/ai/mlc/mlcchat/NavView.kt ================================================ package ai.mlc.mlcchat import android.app.Activity import androidx.compose.material3.ExperimentalMaterial3Api import androidx.compose.runtime.Composable import androidx.lifecycle.viewmodel.compose.viewModel import androidx.navigation.compose.NavHost import androidx.navigation.compose.composable import androidx.navigation.compose.rememberNavController @ExperimentalMaterial3Api @Composable fun NavView(activity: Activity, appViewModel: AppViewModel = viewModel()) { val navController = rememberNavController() NavHost(navController = navController, startDestination = "home") { composable("home") { StartView(navController, appViewModel) } composable("chat") { ChatView(navController, appViewModel.chatState, activity) } } } ================================================ FILE: android/MLCChat/app/src/main/java/ai/mlc/mlcchat/StartView.kt ================================================ package ai.mlc.mlcchat import androidx.compose.foundation.gestures.detectTapGestures import androidx.compose.foundation.layout.Arrangement import androidx.compose.foundation.layout.Column import androidx.compose.foundation.layout.Row import androidx.compose.foundation.layout.aspectRatio import androidx.compose.foundation.layout.fillMaxSize import androidx.compose.foundation.layout.fillMaxWidth import androidx.compose.foundation.layout.height import androidx.compose.foundation.layout.padding import androidx.compose.foundation.layout.width import androidx.compose.foundation.layout.wrapContentHeight import androidx.compose.foundation.lazy.LazyColumn import androidx.compose.foundation.lazy.items import androidx.compose.foundation.text.selection.SelectionContainer import androidx.compose.material.icons.Icons import androidx.compose.material.icons.outlined.Chat import androidx.compose.material.icons.outlined.Delete import androidx.compose.material.icons.outlined.Download import androidx.compose.material.icons.outlined.Pause import androidx.compose.material.icons.outlined.Schedule import androidx.compose.material3.AlertDialog import androidx.compose.material3.Divider import androidx.compose.material3.ExperimentalMaterial3Api import androidx.compose.material3.Icon import androidx.compose.material3.IconButton import androidx.compose.material3.LinearProgressIndicator import androidx.compose.material3.MaterialTheme import androidx.compose.material3.OutlinedTextField import androidx.compose.material3.Scaffold import androidx.compose.material3.Text import androidx.compose.material3.TextButton import androidx.compose.material3.TopAppBar import androidx.compose.material3.TopAppBarDefaults import androidx.compose.runtime.Composable import androidx.compose.runtime.getValue import androidx.compose.runtime.mutableStateOf import androidx.compose.runtime.saveable.rememberSaveable import androidx.compose.runtime.setValue import androidx.compose.ui.Alignment import androidx.compose.ui.Modifier import androidx.compose.ui.input.pointer.pointerInput import androidx.compose.ui.platform.LocalFocusManager import androidx.compose.ui.text.style.TextAlign import androidx.compose.ui.unit.dp import androidx.navigation.NavController @ExperimentalMaterial3Api @Composable fun StartView( navController: NavController, appViewModel: AppViewModel ) { val localFocusManager = LocalFocusManager.current Scaffold( topBar = { TopAppBar( title = { Text(text = "MLCChat", color = MaterialTheme.colorScheme.onPrimary) }, colors = TopAppBarDefaults.topAppBarColors(containerColor = MaterialTheme.colorScheme.primary) ) }, modifier = Modifier.pointerInput(Unit) { detectTapGestures(onTap = { localFocusManager.clearFocus() }) } ) { paddingValues -> Column( modifier = Modifier .fillMaxSize() .padding(paddingValues) .padding(horizontal = 10.dp) ) { Text(text = "Model List", modifier = Modifier.padding(top = 10.dp)) LazyColumn() { items(items = appViewModel.modelList, key = { modelState -> modelState.id } ) { modelState -> ModelView( navController = navController, modelState = modelState, appViewModel = appViewModel ) } } } if (appViewModel.isShowingAlert()) { AlertDialog( onDismissRequest = { appViewModel.dismissAlert() }, onConfirmation = { appViewModel.copyError() }, error = appViewModel.errorMessage() ) } } } @ExperimentalMaterial3Api @Composable fun AlertDialog( onDismissRequest: () -> Unit, onConfirmation: () -> Unit, error: String, ) { AlertDialog( title = { Text(text = "Error") }, text = { Text(text = error) }, onDismissRequest = { onDismissRequest() }, confirmButton = { TextButton(onClick = { onConfirmation() }) { Text("Copy") } }, dismissButton = { TextButton(onClick = { onDismissRequest() }) { Text("Dismiss") } } ) } @Composable fun ModelView( navController: NavController, modelState: AppViewModel.ModelState, appViewModel: AppViewModel ) { var isDeletingModel by rememberSaveable { mutableStateOf(false) } Column( verticalArrangement = Arrangement.SpaceBetween, modifier = Modifier .wrapContentHeight() ) { Row( horizontalArrangement = Arrangement.spacedBy(5.dp), verticalAlignment = Alignment.CenterVertically, modifier = Modifier .fillMaxWidth() .wrapContentHeight() ) { Text( text = modelState.modelConfig.modelId, textAlign = TextAlign.Left, modifier = Modifier .wrapContentHeight() .weight(8f) ) Divider( modifier = Modifier .height(20.dp) .width(1.dp) ) if (modelState.modelInitState.value == ModelInitState.Paused) { IconButton( onClick = { modelState.handleStart() }, modifier = Modifier .aspectRatio(1f) .weight(1f) ) { Icon( imageVector = Icons.Outlined.Download, contentDescription = "start downloading", ) } } else if (modelState.modelInitState.value == ModelInitState.Downloading) { IconButton( onClick = { modelState.handlePause() }, modifier = Modifier .aspectRatio(1f) .weight(1f) ) { Icon( imageVector = Icons.Outlined.Pause, contentDescription = "pause downloading", ) } } else if (modelState.modelInitState.value == ModelInitState.Finished) { IconButton( onClick = { modelState.startChat() navController.navigate("chat") }, enabled = appViewModel.chatState.interruptable(), modifier = Modifier .aspectRatio(1f) .weight(1f) ) { Icon( imageVector = Icons.Outlined.Chat, contentDescription = "start chatting", ) } } else { IconButton( enabled = false, onClick = {}, modifier = Modifier .aspectRatio(1f) .weight(1f) ) { Icon( imageVector = Icons.Outlined.Schedule, contentDescription = "pending", ) } } if (modelState.modelInitState.value == ModelInitState.Downloading || modelState.modelInitState.value == ModelInitState.Paused || modelState.modelInitState.value == ModelInitState.Finished ) { IconButton( onClick = { isDeletingModel = true }, modifier = Modifier .aspectRatio(1f) .weight(1f) ) { Icon( imageVector = Icons.Outlined.Delete, contentDescription = "start downloading", tint = MaterialTheme.colorScheme.error ) } } } LinearProgressIndicator( progress = modelState.progress.value.toFloat() / modelState.total.value, modifier = Modifier.fillMaxWidth() ) if (isDeletingModel) { Row( horizontalArrangement = Arrangement.End, verticalAlignment = Alignment.CenterVertically, modifier = Modifier .fillMaxWidth() .wrapContentHeight() ) { TextButton(onClick = { isDeletingModel = false }) { Text(text = "cancel") } TextButton(onClick = { isDeletingModel = false modelState.handleClear() }) { Text(text = "clear data", color = MaterialTheme.colorScheme.error) } TextButton(onClick = { isDeletingModel = false modelState.handleDelete() }) { Text(text = "delete model", color = MaterialTheme.colorScheme.error) } } } } } ================================================ FILE: android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ui/theme/Color.kt ================================================ package ai.mlc.mlcchat.ui.theme import androidx.compose.ui.graphics.Color val Blue10 = Color(0xFF000F5E) val Blue20 = Color(0xFF001E92) val Blue30 = Color(0xFF002ECC) val Blue40 = Color(0xFF1546F6) val Blue80 = Color(0xFFB8C3FF) val Blue90 = Color(0xFFDDE1FF) val DarkBlue10 = Color(0xFF00036B) val DarkBlue20 = Color(0xFF000BA6) val DarkBlue30 = Color(0xFF1026D3) val DarkBlue40 = Color(0xFF3648EA) val DarkBlue80 = Color(0xFFBBC2FF) val DarkBlue90 = Color(0xFFDEE0FF) val Yellow10 = Color(0xFF261900) val Yellow20 = Color(0xFF402D00) val Yellow30 = Color(0xFF5C4200) val Yellow40 = Color(0xFF7A5900) val Yellow80 = Color(0xFFFABD1B) val Yellow90 = Color(0xFFFFDE9C) val Red10 = Color(0xFF410001) val Red20 = Color(0xFF680003) val Red30 = Color(0xFF930006) val Red40 = Color(0xFFBA1B1B) val Red80 = Color(0xFFFFB4A9) val Red90 = Color(0xFFFFDAD4) val Grey10 = Color(0xFF191C1D) val Grey20 = Color(0xFF2D3132) val Grey80 = Color(0xFFC4C7C7) val Grey90 = Color(0xFFE0E3E3) val Grey95 = Color(0xFFEFF1F1) val Grey99 = Color(0xFFFBFDFD) val BlueGrey30 = Color(0xFF45464F) val BlueGrey50 = Color(0xFF767680) val BlueGrey60 = Color(0xFF90909A) val BlueGrey80 = Color(0xFFC6C5D0) val BlueGrey90 = Color(0xFFE2E1EC) ================================================ FILE: android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ui/theme/Theme.kt ================================================ package ai.mlc.mlcchat.ui.theme import android.app.Activity import android.os.Build import androidx.compose.foundation.isSystemInDarkTheme import androidx.compose.material3.MaterialTheme import androidx.compose.material3.darkColorScheme import androidx.compose.material3.dynamicDarkColorScheme import androidx.compose.material3.dynamicLightColorScheme import androidx.compose.material3.lightColorScheme import androidx.compose.runtime.Composable import androidx.compose.runtime.SideEffect import androidx.compose.ui.graphics.Color import androidx.compose.ui.graphics.toArgb import androidx.compose.ui.platform.LocalContext import androidx.compose.ui.platform.LocalView import androidx.core.view.WindowCompat private val DarkColorScheme = darkColorScheme( primary = Blue80, onPrimary = Blue20, primaryContainer = Blue30, onPrimaryContainer = Blue90, inversePrimary = Blue40, secondary = DarkBlue80, onSecondary = DarkBlue20, secondaryContainer = DarkBlue30, onSecondaryContainer = DarkBlue90, tertiary = Yellow80, onTertiary = Yellow20, tertiaryContainer = Yellow30, onTertiaryContainer = Yellow90, error = Red80, onError = Red20, errorContainer = Red30, onErrorContainer = Red90, background = Grey10, onBackground = Grey90, surface = Grey10, onSurface = Grey80, inverseSurface = Grey90, inverseOnSurface = Grey20, surfaceVariant = BlueGrey30, onSurfaceVariant = BlueGrey80, outline = BlueGrey60 ) private val LightColorScheme = lightColorScheme( primary = Blue40, onPrimary = Color.White, primaryContainer = Blue90, onPrimaryContainer = Blue10, inversePrimary = Blue80, secondary = DarkBlue40, onSecondary = Color.White, secondaryContainer = DarkBlue90, onSecondaryContainer = DarkBlue10, tertiary = Yellow40, onTertiary = Color.White, tertiaryContainer = Yellow90, onTertiaryContainer = Yellow10, error = Red40, onError = Color.White, errorContainer = Red90, onErrorContainer = Red10, background = Grey99, onBackground = Grey10, surface = Grey99, onSurface = Grey10, inverseSurface = Grey20, inverseOnSurface = Grey95, surfaceVariant = BlueGrey90, onSurfaceVariant = BlueGrey30, outline = BlueGrey50 ) @Composable fun MLCChatTheme( darkTheme: Boolean = isSystemInDarkTheme(), // Dynamic color is available on Android 12+ dynamicColor: Boolean = true, content: @Composable () -> Unit ) { val colorScheme = when { dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> { val context = LocalContext.current if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context) } darkTheme -> DarkColorScheme else -> LightColorScheme } val view = LocalView.current if (!view.isInEditMode) { SideEffect { val window = (view.context as Activity).window window.statusBarColor = colorScheme.primary.toArgb() WindowCompat.getInsetsController(window, view).isAppearanceLightStatusBars = darkTheme } } MaterialTheme( colorScheme = colorScheme, typography = Typography, content = content ) } ================================================ FILE: android/MLCChat/app/src/main/java/ai/mlc/mlcchat/ui/theme/Type.kt ================================================ package ai.mlc.mlcchat.ui.theme import androidx.compose.material3.Typography import androidx.compose.ui.text.TextStyle import androidx.compose.ui.text.font.FontFamily import androidx.compose.ui.text.font.FontWeight import androidx.compose.ui.unit.sp // Set of Material typography styles to start with val Typography = Typography( bodyLarge = TextStyle( fontFamily = FontFamily.Default, fontWeight = FontWeight.Normal, fontSize = 16.sp, lineHeight = 24.sp, letterSpacing = 0.5.sp ) /* Other default text styles to override titleLarge = TextStyle( fontFamily = FontFamily.Default, fontWeight = FontWeight.Normal, fontSize = 22.sp, lineHeight = 28.sp, letterSpacing = 0.sp ), labelSmall = TextStyle( fontFamily = FontFamily.Default, fontWeight = FontWeight.Medium, fontSize = 11.sp, lineHeight = 16.sp, letterSpacing = 0.5.sp ) */ ) ================================================ FILE: android/MLCChat/app/src/main/res/drawable/ic_android_black_24dp.xml ================================================ ================================================ FILE: android/MLCChat/app/src/main/res/drawable/mlc_logo_108.xml ================================================ ================================================ FILE: android/MLCChat/app/src/main/res/values/colors.xml ================================================ #FFBB86FC #FF6200EE #FF3700B3 #FF03DAC5 #FF018786 #FF000000 #FFFFFFFF ================================================ FILE: android/MLCChat/app/src/main/res/values/strings.xml ================================================ MLCChat ================================================ FILE: android/MLCChat/app/src/main/res/values/themes.xml ================================================