Repository: NexaAI/nexa-sdk Branch: main Commit: 9b0e5e6d8d93 Files: 489 Total size: 5.8 MB Directory structure: gitextract_dr3ob_1u/ ├── .github/ │ ├── ISSUE_TEMPLATE/ │ │ ├── bug_report.yml │ │ ├── feature_request.yml │ │ └── model_request.yml │ └── workflows/ │ ├── build-linux.yml │ ├── build-macos.yml │ ├── build-windows.yml │ ├── lint.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── NOTICE ├── README.md ├── README_zh.md ├── bindings/ │ ├── android/ │ │ ├── .gitignore │ │ ├── README.md │ │ ├── README_zh.md │ │ ├── app/ │ │ │ ├── .gitignore │ │ │ ├── build.gradle.kts │ │ │ ├── libs/ │ │ │ │ ├── okdownload-core.aar │ │ │ │ ├── okdownload-ktx.aar │ │ │ │ ├── okdownload-okhttp.aar │ │ │ │ └── okdownload-sqlite.aar │ │ │ ├── proguard-rules.pro │ │ │ ├── src/ │ │ │ │ ├── androidTest/ │ │ │ │ │ └── java/ │ │ │ │ │ └── com/ │ │ │ │ │ └── nexa/ │ │ │ │ │ └── demo/ │ │ │ │ │ └── ExampleInstrumentedTest.kt │ │ │ │ ├── main/ │ │ │ │ │ ├── AndroidManifest.xml │ │ │ │ │ ├── assets/ │ │ │ │ │ │ └── model_list.json │ │ │ │ │ ├── java/ │ │ │ │ │ │ └── com/ │ │ │ │ │ │ └── nexa/ │ │ │ │ │ │ └── demo/ │ │ │ │ │ │ ├── ChatAdapter.kt │ │ │ │ │ │ ├── FileConfig.kt │ │ │ │ │ │ ├── GenerationConfigSample.kt │ │ │ │ │ │ ├── MainActivity.kt │ │ │ │ │ │ ├── MyApplication.kt │ │ │ │ │ │ ├── activity/ │ │ │ │ │ │ │ └── FileContentActivity.kt │ │ │ │ │ │ ├── bean/ │ │ │ │ │ │ │ ├── DownloadFileConfig.kt │ │ │ │ │ │ │ ├── DownloadState.kt │ │ │ │ │ │ │ ├── DownloadableFile.kt │ │ │ │ │ │ │ ├── ModelData.kt │ │ │ │ │ │ │ ├── NexaManifestBean.kt │ │ │ │ │ │ │ └── S3FileBean.kt │ │ │ │ │ │ ├── listeners/ │ │ │ │ │ │ │ └── CustomDialogInterface.kt │ │ │ │ │ │ ├── ui/ │ │ │ │ │ │ │ └── theme/ │ │ │ │ │ │ │ ├── Color.kt │ │ │ │ │ │ │ ├── Theme.kt │ │ │ │ │ │ │ └── Type.kt │ │ │ │ │ │ └── utils/ │ │ │ │ │ │ ├── BugFixTest.kt │ │ │ │ │ │ ├── ExecShell.java │ │ │ │ │ │ ├── ImgUtil.kt │ │ │ │ │ │ ├── KeyboardUtil.kt │ │ │ │ │ │ ├── KotlinUtil.kt │ │ │ │ │ │ ├── MD5Utils.kt │ │ │ │ │ │ ├── ModelFileListingUtil.kt │ │ │ │ │ │ ├── PermissionUtil.kt │ │ │ │ │ │ ├── SharePreferenceKeys.kt │ │ │ │ │ │ └── WavRecorder.kt │ │ │ │ │ └── res/ │ │ │ │ │ ├── color/ │ │ │ │ │ │ └── btn_enable_color.xml │ │ │ │ │ ├── drawable/ │ │ │ │ │ │ ├── bg_ai_message.xml │ │ │ │ │ │ ├── bg_audio_background.xml │ │ │ │ │ │ ├── bg_input_box_background.xml │ │ │ │ │ │ ├── bg_user_message.xml │ │ │ │ │ │ ├── btn_rounded_black_bg_18.xml │ │ │ │ │ │ ├── btn_rounded_border.xml │ │ │ │ │ │ ├── btn_rounded_border_18.xml │ │ │ │ │ │ ├── btn_rounded_border_gray.xml │ │ │ │ │ │ ├── ic_launcher_background.xml │ │ │ │ │ │ ├── ic_launcher_foreground.xml │ │ │ │ │ │ ├── icon_back.xml │ │ │ │ │ │ ├── icon_close.xml │ │ │ │ │ │ ├── icon_database.xml │ │ │ │ │ │ └── icon_file.xml │ │ │ │ │ ├── layout/ │ │ │ │ │ │ ├── activity_file_content.xml │ │ │ │ │ │ ├── activity_main.xml │ │ │ │ │ │ ├── dialog_select_plugin_id.xml │ │ │ │ │ │ ├── item_ai_message.xml │ │ │ │ │ │ ├── item_assistant_image_message.xml │ │ │ │ │ │ ├── item_audio_scroll.xml │ │ │ │ │ │ ├── item_image_item_message.xml │ │ │ │ │ │ ├── item_image_message.xml │ │ │ │ │ │ ├── item_image_scroll.xml │ │ │ │ │ │ ├── item_model.xml │ │ │ │ │ │ ├── item_profile_message.xml │ │ │ │ │ │ ├── item_user_message.xml │ │ │ │ │ │ └── menu_layout.xml │ │ │ │ │ ├── mipmap-anydpi-v26/ │ │ │ │ │ │ ├── ic_launcher.xml │ │ │ │ │ │ └── ic_launcher_round.xml │ │ │ │ │ ├── values/ │ │ │ │ │ │ ├── colors.xml │ │ │ │ │ │ ├── strings.xml │ │ │ │ │ │ └── themes.xml │ │ │ │ │ └── xml/ │ │ │ │ │ ├── backup_rules.xml │ │ │ │ │ ├── data_extraction_rules.xml │ │ │ │ │ └── file_paths.xml │ │ │ │ └── test/ │ │ │ │ └── java/ │ │ │ │ └── com/ │ │ │ │ └── nexa/ │ │ │ │ └── demo/ │ │ │ │ └── ExampleUnitTest.kt │ │ │ └── test │ │ ├── build.gradle.kts │ │ ├── gradle/ │ │ │ ├── libs.versions.toml │ │ │ └── wrapper/ │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ │ ├── gradle.properties │ │ ├── gradlew │ │ ├── gradlew.bat │ │ ├── settings.gradle.kts │ │ └── transform/ │ │ ├── .gitignore │ │ ├── build.gradle.kts │ │ ├── consumer-rules.pro │ │ ├── proguard-rules.pro │ │ └── src/ │ │ ├── androidTest/ │ │ │ └── java/ │ │ │ └── ai/ │ │ │ └── nexa/ │ │ │ └── transform/ │ │ │ └── ExampleInstrumentedTest.kt │ │ ├── main/ │ │ │ ├── AndroidManifest.xml │ │ │ └── java/ │ │ │ └── ai/ │ │ │ └── nexa/ │ │ │ └── transform/ │ │ │ ├── DownloadUtils.kt │ │ │ ├── ImageUtils.kt │ │ │ └── VlmContentTransfer.kt │ │ └── test/ │ │ └── java/ │ │ └── ai/ │ │ └── nexa/ │ │ └── transform/ │ │ └── ExampleUnitTest.kt │ ├── ios/ │ │ ├── .gitignore │ │ ├── README.md │ │ └── README_zh.md │ └── python/ │ ├── .gitignore │ ├── README.md │ ├── README_env_setup.md │ ├── README_zh.md │ ├── asr.py │ ├── cv_ocr.py │ ├── diarize.py │ ├── embedder.py │ ├── env_setup.ps1 │ ├── image_gen.py │ ├── llm.py │ ├── models.py │ ├── notebook/ │ │ ├── macos.ipynb │ │ ├── windows(arm64).ipynb │ │ └── windows(x64).ipynb │ ├── rerank.py │ ├── tts.py │ └── vlm.py ├── cookbook/ │ ├── .gitignore │ ├── PC/ │ │ ├── Agent-Granite/ │ │ │ ├── Python-Binding-Example/ │ │ │ │ ├── README.md │ │ │ │ ├── agent_nexa.py │ │ │ │ ├── gradio_ui.py │ │ │ │ └── requirements.txt │ │ │ ├── README.md │ │ │ └── Serve-Example/ │ │ │ ├── README.md │ │ │ ├── agent_nexa.py │ │ │ ├── gradio_ui.py │ │ │ └── requirements.txt │ │ ├── Multimodal-Qwen3VL/ │ │ │ ├── Python-Binding-Example/ │ │ │ │ ├── gradio_ui.py │ │ │ │ ├── vlm_service.py │ │ │ │ └── vlm_viewmodel.py │ │ │ └── README.md │ │ ├── RAG-LLM/ │ │ │ ├── Python-Binding-Example/ │ │ │ │ ├── README.md │ │ │ │ ├── gradio_ui.py │ │ │ │ └── rag_nexa.py │ │ │ ├── README.md │ │ │ └── Serve-Example/ │ │ │ ├── README.md │ │ │ ├── gradio_ui.py │ │ │ └── rag_nexa.py │ │ ├── RAG-VLM/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── gradio_ui.py │ │ │ ├── rag_nexa.py │ │ │ └── requirements.txt │ │ ├── Web-Agent-Qwen3VL/ │ │ │ └── README.md │ │ ├── function-calling/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── app/ │ │ │ │ ├── flask_ui.py │ │ │ │ ├── image_utils.py │ │ │ │ └── templates/ │ │ │ │ └── chat.html │ │ │ ├── main.py │ │ │ └── requirements.txt │ │ └── live-translate/ │ │ ├── README.md │ │ ├── app.py │ │ ├── index.html │ │ └── requirements.txt │ ├── README.md │ ├── README_zh.md │ ├── ag2/ │ │ ├── README.md │ │ ├── example.py │ │ └── requirements.txt │ ├── android/ │ │ └── RAG-LLM/ │ │ ├── .gitignore │ │ ├── README.md │ │ ├── app/ │ │ │ ├── .gitignore │ │ │ ├── build.gradle.kts │ │ │ ├── libs/ │ │ │ │ ├── okdownload-core.aar │ │ │ │ ├── okdownload-ktx.aar │ │ │ │ ├── okdownload-okhttp.aar │ │ │ │ └── okdownload-sqlite.aar │ │ │ ├── proguard-rules.pro │ │ │ ├── src/ │ │ │ │ ├── androidTest/ │ │ │ │ │ └── java/ │ │ │ │ │ └── com/ │ │ │ │ │ └── nexa/ │ │ │ │ │ └── demo/ │ │ │ │ │ └── ExampleInstrumentedTest.kt │ │ │ │ ├── main/ │ │ │ │ │ ├── AndroidManifest.xml │ │ │ │ │ ├── assets/ │ │ │ │ │ │ └── model_list.json │ │ │ │ │ ├── java/ │ │ │ │ │ │ └── com/ │ │ │ │ │ │ └── nexa/ │ │ │ │ │ │ └── demo/ │ │ │ │ │ │ ├── ChatAdapter.kt │ │ │ │ │ │ ├── FileConfig.kt │ │ │ │ │ │ ├── GenerationConfigSample.kt │ │ │ │ │ │ ├── MainActivity.kt │ │ │ │ │ │ ├── MyApplication.kt │ │ │ │ │ │ ├── RAGConfig.kt │ │ │ │ │ │ ├── activity/ │ │ │ │ │ │ │ ├── FileContentActivity.kt │ │ │ │ │ │ │ └── FolderActivity.kt │ │ │ │ │ │ ├── adapter/ │ │ │ │ │ │ │ ├── ChunkAdapter.kt │ │ │ │ │ │ │ ├── IndexViewPagerAdapter.kt │ │ │ │ │ │ │ ├── SelectImagesAdapter.kt │ │ │ │ │ │ │ ├── SelectVideosAdapter.kt │ │ │ │ │ │ │ └── ShowFileDirAdapter.kt │ │ │ │ │ │ ├── bean/ │ │ │ │ │ │ │ ├── DownloadFileConfig.kt │ │ │ │ │ │ │ ├── DownloadState.kt │ │ │ │ │ │ │ ├── DownloadableFile.kt │ │ │ │ │ │ │ ├── EmbedFileBean.kt │ │ │ │ │ │ │ ├── EmbedResultBean.kt │ │ │ │ │ │ │ ├── IndexSelectedDirBean.kt │ │ │ │ │ │ │ ├── ModelData.kt │ │ │ │ │ │ │ ├── NexaManifestBean.kt │ │ │ │ │ │ │ └── S3FileBean.kt │ │ │ │ │ │ ├── fragments/ │ │ │ │ │ │ │ ├── IndexFragment.kt │ │ │ │ │ │ │ ├── IndexedImagesFragment.kt │ │ │ │ │ │ │ ├── IndexedVideosFragment.kt │ │ │ │ │ │ │ └── UpdatePercentFragment.kt │ │ │ │ │ │ ├── listeners/ │ │ │ │ │ │ │ └── CustomDialogInterface.kt │ │ │ │ │ │ ├── ui/ │ │ │ │ │ │ │ └── theme/ │ │ │ │ │ │ │ ├── Color.kt │ │ │ │ │ │ │ ├── Theme.kt │ │ │ │ │ │ │ └── Type.kt │ │ │ │ │ │ ├── utils/ │ │ │ │ │ │ │ ├── BugFixTest.kt │ │ │ │ │ │ │ ├── DensityUtil.kt │ │ │ │ │ │ │ ├── ExecShell.java │ │ │ │ │ │ │ ├── GenerateEmbedStringsUtil.kt │ │ │ │ │ │ │ ├── ImgUtil.kt │ │ │ │ │ │ │ ├── KeyboardUtil.kt │ │ │ │ │ │ │ ├── KotlinUtil.kt │ │ │ │ │ │ │ ├── MD5Utils.kt │ │ │ │ │ │ │ ├── ModelFileListingUtil.kt │ │ │ │ │ │ │ ├── PermissionUtil.kt │ │ │ │ │ │ │ ├── SharePreferenceKeys.kt │ │ │ │ │ │ │ └── WavRecorder.kt │ │ │ │ │ │ └── widget/ │ │ │ │ │ │ └── WrapRadioGroup.kt │ │ │ │ │ └── res/ │ │ │ │ │ ├── color/ │ │ │ │ │ │ └── btn_enable_color.xml │ │ │ │ │ ├── drawable/ │ │ │ │ │ │ ├── bg_ai_message.xml │ │ │ │ │ │ ├── bg_audio_background.xml │ │ │ │ │ │ ├── bg_circle_gray.xml │ │ │ │ │ │ ├── bg_citations_bottom.xml │ │ │ │ │ │ ├── bg_index_bottom.xml │ │ │ │ │ │ ├── bg_input_box_background.xml │ │ │ │ │ │ ├── bg_round_gray.xml │ │ │ │ │ │ ├── bg_spinner_dropdown.xml │ │ │ │ │ │ ├── bg_topk_bottom.xml │ │ │ │ │ │ ├── bg_user_message.xml │ │ │ │ │ │ ├── btn_bg_enable.xml │ │ │ │ │ │ ├── btn_rounded_black_bg.xml │ │ │ │ │ │ ├── btn_rounded_black_bg_18.xml │ │ │ │ │ │ ├── btn_rounded_border.xml │ │ │ │ │ │ ├── btn_rounded_border_18.xml │ │ │ │ │ │ ├── btn_rounded_border_gray.xml │ │ │ │ │ │ ├── btn_rounded_border_gray_8dp.xml │ │ │ │ │ │ ├── btn_rounded_border_transparent.xml │ │ │ │ │ │ ├── ic_launcher_background.xml │ │ │ │ │ │ ├── ic_launcher_foreground.xml │ │ │ │ │ │ ├── icon_add.xml │ │ │ │ │ │ ├── icon_alert_yellow.xml │ │ │ │ │ │ ├── icon_arrow_down.xml │ │ │ │ │ │ ├── icon_arrow_down_white.xml │ │ │ │ │ │ ├── icon_arrow_right.xml │ │ │ │ │ │ ├── icon_back.xml │ │ │ │ │ │ ├── icon_check.xml │ │ │ │ │ │ ├── icon_clear.xml │ │ │ │ │ │ ├── icon_close.xml │ │ │ │ │ │ ├── icon_config.xml │ │ │ │ │ │ ├── icon_database.xml │ │ │ │ │ │ ├── icon_file.xml │ │ │ │ │ │ ├── icon_tag_cpu.xml │ │ │ │ │ │ ├── icon_tag_gpu.xml │ │ │ │ │ │ ├── icon_tag_npu.xml │ │ │ │ │ │ ├── icon_text.xml │ │ │ │ │ │ ├── icon_topk_config.xml │ │ │ │ │ │ ├── img_config.xml │ │ │ │ │ │ ├── img_config_false.xml │ │ │ │ │ │ ├── img_config_true.xml │ │ │ │ │ │ ├── rb_bg.xml │ │ │ │ │ │ ├── rb_bg_false.xml │ │ │ │ │ │ ├── rb_bg_true.xml │ │ │ │ │ │ └── thumb_topk.xml │ │ │ │ │ ├── layout/ │ │ │ │ │ │ ├── activity_file_content.xml │ │ │ │ │ │ ├── activity_folder.xml │ │ │ │ │ │ ├── activity_main.xml │ │ │ │ │ │ ├── dialog_config.xml │ │ │ │ │ │ ├── dialog_select_plugin_id.xml │ │ │ │ │ │ ├── dialog_topk_config.xml │ │ │ │ │ │ ├── fragment_index.xml │ │ │ │ │ │ ├── fragment_indexed_images.xml │ │ │ │ │ │ ├── fragment_indexed_videos.xml │ │ │ │ │ │ ├── item_ai_message.xml │ │ │ │ │ │ ├── item_audio_scroll.xml │ │ │ │ │ │ ├── item_citation.xml │ │ │ │ │ │ ├── item_image_item_message.xml │ │ │ │ │ │ ├── item_image_message.xml │ │ │ │ │ │ ├── item_image_scroll.xml │ │ │ │ │ │ ├── item_model.xml │ │ │ │ │ │ ├── item_profile_message.xml │ │ │ │ │ │ ├── item_select_file.xml │ │ │ │ │ │ ├── item_select_image.xml │ │ │ │ │ │ ├── item_spinner_dropdown.xml │ │ │ │ │ │ ├── item_user_message.xml │ │ │ │ │ │ ├── menu_layout.xml │ │ │ │ │ │ └── spinner_custom.xml │ │ │ │ │ ├── mipmap-anydpi-v26/ │ │ │ │ │ │ ├── ic_launcher.xml │ │ │ │ │ │ └── ic_launcher_round.xml │ │ │ │ │ ├── values/ │ │ │ │ │ │ ├── attrs.xml │ │ │ │ │ │ ├── colors.xml │ │ │ │ │ │ ├── ids.xml │ │ │ │ │ │ ├── strings.xml │ │ │ │ │ │ ├── styles.xml │ │ │ │ │ │ └── themes.xml │ │ │ │ │ └── xml/ │ │ │ │ │ ├── backup_rules.xml │ │ │ │ │ ├── data_extraction_rules.xml │ │ │ │ │ └── file_paths.xml │ │ │ │ └── test/ │ │ │ │ └── java/ │ │ │ │ └── com/ │ │ │ │ └── nexa/ │ │ │ │ └── demo/ │ │ │ │ └── ExampleUnitTest.kt │ │ │ └── test │ │ ├── build.gradle.kts │ │ ├── gradle/ │ │ │ ├── libs.versions.toml │ │ │ └── wrapper/ │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ │ ├── gradle.properties │ │ ├── gradlew │ │ ├── gradlew.bat │ │ ├── settings.gradle.kts │ │ └── transform/ │ │ ├── .gitignore │ │ ├── build.gradle.kts │ │ ├── consumer-rules.pro │ │ ├── proguard-rules.pro │ │ └── src/ │ │ ├── androidTest/ │ │ │ └── java/ │ │ │ └── ai/ │ │ │ └── nexa/ │ │ │ └── transform/ │ │ │ └── ExampleInstrumentedTest.kt │ │ ├── main/ │ │ │ ├── AndroidManifest.xml │ │ │ └── java/ │ │ │ └── ai/ │ │ │ └── nexa/ │ │ │ └── transform/ │ │ │ ├── DownloadUtils.kt │ │ │ ├── ImageUtils.kt │ │ │ └── VlmContentTransfer.kt │ │ └── test/ │ │ └── java/ │ │ └── ai/ │ │ └── nexa/ │ │ └── transform/ │ │ └── ExampleUnitTest.kt │ ├── clawd/ │ │ ├── README.md │ │ └── clawdbot.example.json │ ├── crewai/ │ │ ├── README.md │ │ ├── example.py │ │ └── requirements.txt │ ├── docker/ │ │ └── RAG-VLM/ │ │ ├── Dockerfile │ │ ├── README.md │ │ ├── gradio_ui.py │ │ ├── requirements.txt │ │ └── start.sh │ ├── edsl/ │ │ ├── README.md │ │ ├── example.py │ │ └── requirements.txt │ ├── langchain/ │ │ ├── README.md │ │ ├── example.py │ │ └── requirements.txt │ └── llama-index/ │ ├── README.md │ ├── config.py │ ├── example.py │ └── requirements.txt ├── docs/ │ ├── README.md │ ├── integrate-ag2.md │ ├── integrate-crewai.md │ ├── integrate-langchain.md │ ├── integrate-llamaindex.md │ └── integrate-openwebui.md ├── pyproject.toml ├── runner/ │ ├── .gitignore │ ├── Makefile │ ├── README.md │ ├── README_zh.md │ ├── cmd/ │ │ ├── nexa-cli/ │ │ │ ├── common/ │ │ │ │ ├── log.go │ │ │ │ ├── process.go │ │ │ │ ├── repl.go │ │ │ │ ├── util_darwin.go │ │ │ │ ├── util_linux.go │ │ │ │ └── util_windows.go │ │ │ ├── config.go │ │ │ ├── function_call.go │ │ │ ├── infer.go │ │ │ ├── logic/ │ │ │ │ └── cv.go │ │ │ ├── main.go │ │ │ ├── model.go │ │ │ ├── model_test.go │ │ │ ├── run.go │ │ │ ├── serve.go │ │ │ ├── update.go │ │ │ ├── version.go │ │ │ └── version_test.go │ │ └── nexa-launcher/ │ │ └── main.go │ ├── go.mod │ ├── go.sum │ ├── internal/ │ │ ├── config/ │ │ │ ├── config.go │ │ │ ├── config_test.go │ │ │ ├── model_mapping.go │ │ │ └── model_mapping_test.go │ │ ├── downloader/ │ │ │ └── http.go │ │ ├── model_hub/ │ │ │ ├── model_hub.go │ │ │ ├── model_hub_hf.go │ │ │ ├── model_hub_localfs.go │ │ │ ├── model_hub_modelscope.go │ │ │ ├── model_hub_s3.go │ │ │ ├── model_hub_test.go │ │ │ └── model_hub_volces.go │ │ ├── readline/ │ │ │ ├── action.go │ │ │ ├── buffer.go │ │ │ ├── history.go │ │ │ ├── readline.go │ │ │ ├── terminal.go │ │ │ ├── terminal_darwin.go │ │ │ ├── terminal_linux.go │ │ │ └── terminal_windows.go │ │ ├── record/ │ │ │ ├── record.go │ │ │ └── stream_record.go │ │ ├── render/ │ │ │ ├── progressbar.go │ │ │ ├── spinner.go │ │ │ ├── theme.go │ │ │ └── theme_test.go │ │ ├── store/ │ │ │ ├── config.go │ │ │ ├── errors.go │ │ │ ├── lock.go │ │ │ ├── manager.go │ │ │ └── model.go │ │ └── types/ │ │ ├── model.go │ │ └── utils.go │ ├── nexa-sdk/ │ │ ├── .gitignore │ │ ├── asr.go │ │ ├── common.go │ │ ├── cv.go │ │ ├── diarize.go │ │ ├── embedder.go │ │ ├── image_gen.go │ │ ├── llm.go │ │ ├── ml.go │ │ ├── reranker.go │ │ ├── tts.go │ │ └── vlm.go │ ├── release/ │ │ ├── darwin/ │ │ │ ├── Info.plist │ │ │ ├── entitlements.plist │ │ │ ├── nexa-icon.icns │ │ │ └── scripts/ │ │ │ ├── notarize.sh │ │ │ ├── postinstall │ │ │ ├── preinstall │ │ │ ├── prepackage.sh │ │ │ ├── sign.sh │ │ │ └── uninstall.sh │ │ ├── linux/ │ │ │ ├── Dockerfile │ │ │ ├── install.sh │ │ │ └── package.sh │ │ └── windows/ │ │ ├── nexa_installer.iss │ │ └── nexa_launcher.py │ ├── server/ │ │ ├── docs/ │ │ │ ├── swagger.go │ │ │ ├── swagger.yaml │ │ │ └── ui/ │ │ │ ├── index.css │ │ │ ├── index.html │ │ │ ├── oauth2-redirect.html │ │ │ ├── swagger-initializer.js │ │ │ ├── swagger-ui-bundle.js │ │ │ ├── swagger-ui-es-bundle-core.js │ │ │ ├── swagger-ui-es-bundle.js │ │ │ ├── swagger-ui-standalone-preset.js │ │ │ ├── swagger-ui.css │ │ │ └── swagger-ui.js │ │ ├── handler/ │ │ │ ├── audio.go │ │ │ ├── audio_test.go │ │ │ ├── chat.go │ │ │ ├── cv.go │ │ │ ├── embeddings.go │ │ │ ├── images.go │ │ │ ├── model.go │ │ │ └── reranking.go │ │ ├── middleware/ │ │ │ ├── auth.go │ │ │ ├── cors.go │ │ │ └── gil.go │ │ ├── route.go │ │ ├── server.go │ │ ├── service/ │ │ │ ├── keepalive.go │ │ │ └── service.go │ │ └── utils/ │ │ ├── common.go │ │ └── common_test.go │ └── tests/ │ ├── .gitattributes │ ├── cases/ │ │ ├── __init__.py │ │ ├── asr.py │ │ ├── audio_multi_round.py │ │ ├── base.py │ │ ├── cv.py │ │ ├── image_multi_round.py │ │ ├── multi_round.py │ │ ├── reranker.py │ │ └── single_round.py │ ├── run.py │ └── scripts/ │ ├── __init__.py │ ├── config.py │ ├── log.py │ └── utils.py └── solutions/ ├── README.md └── embedneural/ ├── README.md ├── gradio_ui.py ├── nexa_client.py ├── requirements.txt ├── search.py ├── style.py └── viewmodel.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.yml ================================================ name: "Bug report" description: "Something isn't working right with NexaSDK" labels: ["bug"] body: - type: markdown attributes: value: | Thanks for reporting a bug in NexaSDK 🙏 Before filing: - Search existing issues to see if this has already been reported. - Try the latest release, if possible. - type: checkboxes id: checks attributes: label: "Pre-checks" options: - label: "I searched existing issues" required: true - label: "I’m using the latest NexaSDK release" required: false - type: textarea id: what-happened attributes: label: "What happened?" description: "Describe the bug and what you expected to happen." placeholder: | When I do sth with model X on backend Y, it does Z. I expected it to ... render: text validations: required: true - type: textarea id: steps-to-reproduce attributes: label: "Steps to reproduce" description: "Minimal steps or code to trigger the issue." placeholder: | 1. Install NexaSDK using ... 2. Run: ... 3. See error: ... render: markdown validations: required: true - type: textarea id: logs attributes: label: "Logs & stack traces" description: "Paste any relevant logs or error messages." placeholder: | ```text ``` render: text - type: textarea id: models attributes: label: "Model(s) and quantization" description: "Which model(s) and quantization are you using?" placeholder: | e.g. Ministral-3-3B-Instruct-2512 Q4_K_M, AutoNeural-SA8295P int4, Qwen2.5 fp16 render: text - type: input id: nexasdk-version attributes: label: "NexaSDK version" description: 'Output of `nexa version` or `python -c "import nexa; print(nexa.__version__)"` or similar.' placeholder: | e.g. NexaSDK Bridge Version: v1.0.31 NexaSDK CLI Version: v0.2.60 validations: required: true - type: input id: install-method attributes: label: "Install method" description: "How did you install NexaSDK?" placeholder: "pip, conda, docker, from source, other" validations: required: true - type: input id: os attributes: label: "OS and version" placeholder: "e.g. Windows 11, Ubuntu 22.04, macOS 15" validations: required: true - type: input id: hardware attributes: label: "Hardware / accelerator" description: "CPU/GPU/NPU and device details." placeholder: "e.g. RTX 4090, Snapdragon X Elite NPU, SA8295P, M4 Max" - type: textarea id: extra attributes: label: "Additional context" description: "Anything else that might help (configs, screenshots, links)." render: markdown ================================================ FILE: .github/ISSUE_TEMPLATE/feature_request.yml ================================================ name: "Feature request" description: "Request a new feature or improvement for NexaSDK" labels: ["enhancement"] body: - type: markdown attributes: value: | Thanks for helping improve NexaSDK 🙌 Use this template for concrete improvements to the SDK. - type: checkboxes id: checks attributes: label: "Pre-checks" options: - label: "I searched existing issues and discussions" required: true - type: textarea id: problem attributes: label: "What problem are you trying to solve?" description: "Describe your use case and what’s painful today." placeholder: | I’m building X (e.g. local copilot on Snapdragon PC) and I’m blocked by Y in NexaSDK. render: text validations: required: true - type: textarea id: solution attributes: label: "What would you like NexaSDK to do?" description: "Describe your ideal solution (APIs, CLI flags, configs, etc.)." placeholder: | Example: - Add a `nexa run --backend npu` option that ... - Provide a Python helper `nexa.auto_backend(...)` that ... render: markdown validations: required: true - type: textarea id: alternatives attributes: label: "Alternatives you've considered" description: "Workarounds or other tools you’ve tried." placeholder: | I can currently do this by ..., but it’s slow / complex / fragile. render: text - type: textarea id: impact attributes: label: "Who does this help, and how much?" description: "Rough sense of impact (devx, perf, device coverage, etc.)." placeholder: | This would be useful for all Windows NPU developers using NexaSDK because ... render: text - type: textarea id: extra attributes: label: "Additional context" description: "Links, mock APIs, diagrams, prior art, etc." render: markdown ================================================ FILE: .github/ISSUE_TEMPLATE/model_request.yml ================================================ name: "Model request" description: "Request support for a new model or variant in NexaSDK" labels: ["model-request"] body: - type: markdown attributes: value: | Request support for a new model to be added to NexaSDK. Please include as much detail as you can so we can prioritize properly. - type: checkboxes id: checks attributes: label: "Pre-checks" options: - label: "I searched existing issues and discussions for this model" required: true - type: input id: model-name attributes: label: "Model name" description: "Name of the model you’d like supported." placeholder: "e.g. Ministral-8x7B, Qwen3-VL-32B, etc." validations: required: true - type: input id: model-source attributes: label: "Model source / link" description: "Link to the model (Hugging Face, GitHub, vendor page, etc.)." placeholder: "https://huggingface.co/..." validations: required: true - type: textarea id: use-cases attributes: label: "Primary use cases" description: "What are you planning to build with this model?" placeholder: | e.g. On-device coding assistant, in-car multimodal assistant, document RAG, etc. render: text validations: required: true - type: textarea id: hardware attributes: label: "Target hardware / backend" description: "Where do you want this model to run?" placeholder: | e.g. Snapdragon X Elite NPU, SA8295P, RTX 4080, Apple M4, CPU-only, etc. render: text validations: required: true - type: textarea id: extra attributes: label: "Additional context" description: "Benchmarks, papers, licensing notes, or anything else that helps us evaluate." render: markdown ================================================ FILE: .github/workflows/build-linux.yml ================================================ name: NexaCLI on Linux on: push: tags: - "v*" jobs: build-cli: name: Build ${{ matrix.runner }} runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: include: - runner: ubuntu-22.04 arch: x86_64 - runner: ubuntu-22.04-arm arch: arm64 env: VERSION: ${{ github.ref_name }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup Go uses: actions/setup-go@v5 with: go-version: 1.25.4 cache-dependency-path: runner/go.sum - name: Download runtime run: | cd runner make download - name: Build Nexa Cli run: | cd runner make build rm build/ml.h - name: Upload Artifact uses: actions/upload-artifact@v4 with: name: "nexa-cli_linux_${{ matrix.arch }}" path: runner/build include-hidden-files: true package-release: name: Package Release ${{ matrix.runner }} needs: build-cli runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: include: - runner: ubuntu-22.04 arch: x86_64 - runner: ubuntu-22.04-arm arch: arm64 env: VERSION: ${{ github.ref_name }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Download build artifacts uses: actions/download-artifact@v4 with: name: "nexa-cli_linux_${{ matrix.arch }}" path: artifacts - name: Create Self-Contained Installer Script id: package run: | chmod +x runner/release/linux/package.sh ./runner/release/linux/package.sh artifacts \ runner/release/linux/install.sh \ nexa-cli_linux_${{ matrix.arch }}.sh - name: Create GitHub Release uses: softprops/action-gh-release@v2 with: repository: NexaAI/nexa-sdk tag_name: ${{ env.VERSION }} files: | nexa-cli_linux_${{ matrix.arch }}.sh draft: ${{ contains(github.ref, '-rc') }} generate_release_notes: ${{ matrix.arch == 'x86_64' }} make_latest: ${{ !contains(github.ref, '-rc') }} env: GITHUB_TOKEN: ${{ secrets.ZACK_ACCESS_TOKEN }} - name: Configure AWS credentials if: ${{ !contains(github.ref, '-rc') }} uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-west-1 - name: Sync to S3 if: ${{ !contains(github.ref, '-rc') }} shell: bash run: | aws s3 cp "nexa-cli_linux_${{ matrix.arch }}.sh" "s3://nexa-model-hub-bucket/public/nexa_sdk/downloads/nexa-cli_linux_${{ matrix.arch }}.sh" docker-release: name: Docker Release needs: build-cli runs-on: ubuntu-22.04 env: VERSION: ${{ github.ref_name }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Download build artifacts uses: actions/download-artifact@v4 with: path: artifacts - name: Prepare Build Context run: | mkdir -p runner/release/linux mv artifacts/nexa-cli_linux_x86_64 runner/release/linux/nexa_sdk.amd64 mv artifacts/nexa-cli_linux_arm64 runner/release/linux/nexa_sdk.arm64 chmod +x runner/release/linux/*/nexa chmod +x runner/release/linux/*/nexa-cli - name: Login to Docker Hub uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - name: Set up QEMU uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build and Push Docker Image (Base) uses: docker/build-push-action@v6 with: context: runner/release/linux push: true platforms: linux/amd64,linux/arm64 tags: | nexa4ai/nexasdk${{ contains(github.ref, '-rc') && '-test' || '' }}:${{ env.VERSION }} ${{ !contains(github.ref, '-rc') && 'nexa4ai/nexasdk:latest' || '' }} - name: Build and Push Docker Image (Cuda) uses: docker/build-push-action@v6 with: context: runner/release/linux push: true platforms: linux/amd64 build-args: | BASE_IMAGE=nvidia/cuda:12.9.1-runtime-ubuntu22.04 tags: | nexa4ai/nexasdk${{ contains(github.ref, '-rc') && '-test' || '' }}:${{ env.VERSION }}-cuda ${{ !contains(github.ref, '-rc') && 'nexa4ai/nexasdk:latest-cuda' || '' }} ================================================ FILE: .github/workflows/build-macos.yml ================================================ name: NexaCLI on MacOS on: push: tags: - "v*" jobs: build-cli: name: macos_${{ matrix.arch }} runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: include: - runner: macos-15-intel arch: x86_64 - runner: macos-15 arch: arm64 env: VERSION: ${{ github.ref_name }} steps: - name: Checkout repository uses: actions/checkout@v4 - uses: actions/setup-go@v5 with: go-version: 1.25.4 cache-dependency-path: runner/go.sum - name: Download runtime run: | cd runner make download - name: Build Nexa Cli run: | cd runner make build rm build/ml.h - name: Upload Artifact uses: actions/upload-artifact@v4 with: name: nexa-cli_macos_${{ matrix.arch }} path: runner/build include-hidden-files: true package-and-release: name: Package for macos_${{ matrix.arch }} needs: build-cli runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: include: - runner: macos-15-intel arch: x86_64 - runner: macos-15 arch: arm64 env: VERSION: ${{ github.ref_name }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Download build artifacts uses: actions/download-artifact@v4 with: name: nexa-cli_macos_${{ matrix.arch }} path: artifacts - name: Prepare file structure for packaging id: prep_files run: | chmod +x runner/release/darwin/scripts/prepackage.sh bash runner/release/darwin/scripts/prepackage.sh "${{ env.VERSION }}" - name: Fix dylib Linkages (RPATH) run: | RESOURCES_PATH="${{ env.APP_PATH }}/Contents/Resources" install_name_tool -add_rpath "@loader_path" "${RESOURCES_PATH}/nexa-cli" - name: Import Code Signing Certificates env: APP_CERTIFICATE_BASE64: ${{ secrets.APPLE_CERTIFICATE_BASE64 }} APP_CERTIFICATE_PASSWORD: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }} INSTALLER_CERTIFICATE_BASE64: ${{ secrets.APPLE_INSTALLER_CERTIFICATE_BASE64 }} INSTALLER_CERTIFICATE_PASSWORD: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }} run: | echo $APP_CERTIFICATE_BASE64 | base64 --decode > app_certificate.p12 echo $INSTALLER_CERTIFICATE_BASE64 | base64 --decode > installer_certificate.p12 security create-keychain -p "" build.keychain security default-keychain -s build.keychain security unlock-keychain -p "" build.keychain security import app_certificate.p12 -k build.keychain -P $APP_CERTIFICATE_PASSWORD -T /usr/bin/codesign security import installer_certificate.p12 -k build.keychain -P $INSTALLER_CERTIFICATE_PASSWORD -T /usr/bin/productsign security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "" build.keychain - name: Sign binaries and libraries run: | chmod +x runner/release/darwin/scripts/sign.sh bash runner/release/darwin/scripts/sign.sh "${{ env.APP_PATH }}" "${{ secrets.APPLE_ID_APPLICATION }}" - name: Build PKG run: | pkgbuild --root "${{ steps.prep_files.outputs.STAGING_DIR }}" \ --scripts "${{ steps.prep_files.outputs.SCRIPTS_DIR }}" \ --identifier "com.nexaai.nexa-sdk" \ --version "${{ env.VERSION }}" \ --install-location / \ "artifacts/nexa-cli_macos_${{ matrix.arch }}-unsigned.pkg" - name: Productsign PKG run: | productsign --sign "${{ secrets.APPLE_ID_INSTALLER }}" --timestamp "artifacts/nexa-cli_macos_${{ matrix.arch }}-unsigned.pkg" "artifacts/nexa-cli_macos_${{ matrix.arch }}.pkg" pkgutil --check-signature "artifacts/nexa-cli_macos_${{ matrix.arch }}.pkg" rm "artifacts/nexa-cli_macos_${{ matrix.arch }}-unsigned.pkg" - name: Notarize & Staple PKG run: | chmod +x runner/release/darwin/scripts/notarize.sh bash runner/release/darwin/scripts/notarize.sh \ "artifacts/nexa-cli_macos_${{ matrix.arch }}.pkg" \ "${{ secrets.APPLE_ID }}" \ "${{ secrets.APPLE_ID_PASSWORD }}" \ "${{ secrets.APPLE_TEAM_ID }}" - name: Create GitHub Release uses: softprops/action-gh-release@v2 with: repository: NexaAI/nexa-sdk tag_name: ${{ env.VERSION }} files: | artifacts/nexa-cli_macos_${{ matrix.arch }}.pkg draft: ${{ contains(github.ref, '-rc') }} generate_release_notes: false make_latest: ${{ !contains(github.ref, '-rc') }} env: GITHUB_TOKEN: ${{ secrets.ZACK_ACCESS_TOKEN }} - name: Configure AWS credentials if: ${{ !contains(github.ref, '-rc') }} uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-west-1 - name: Sync to S3 if: ${{ !contains(github.ref, '-rc') }} shell: bash run: | aws s3 cp "artifacts/nexa-cli_macos_${{ matrix.arch }}.pkg" "s3://nexa-model-hub-bucket/public/nexa_sdk/downloads/nexa-cli_macos_${{ matrix.arch }}.pkg" ================================================ FILE: .github/workflows/build-windows.yml ================================================ name: NexaCLI on Windows on: push: tags: - "v*" jobs: build-cli: name: windows_${{ matrix.arch }} runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: include: - runner: sdk-windows-8-core-2 arch: x86_64 - runner: windows-11-arm arch: arm64 env: VERSION: ${{ github.ref_name }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup Go uses: actions/setup-go@v5 with: go-version: 1.25.4 cache-dependency-path: runner/go.sum - name: Setup MSYS2 (ARM64) if: matrix.arch == 'arm64' uses: msys2/setup-msys2@v2 with: msystem: CLANGARM64 location: C:/Tools update: true install: mingw-w64-clang-aarch64-clang - name: Setup Compile Environment (ARM64) if: matrix.arch == 'arm64' shell: bash run: | echo "CC=C:/Tools/msys64/clangarm64/bin/clang.exe" >> $GITHUB_ENV echo "CXX=C:/Tools/msys64/clangarm64/bin/clang++.exe" >> $GITHUB_ENV - name: Download runtime shell: bash run: | cd runner make download ARCH=${{ matrix.arch }} ls build - name: Build Nexa Cli shell: bash run: | cd runner make build ARCH=${{ matrix.arch }} rm -f build/ml.h - name: Upload Artifact uses: actions/upload-artifact@v4 with: name: nexa-cli_windows_${{ matrix.arch }} path: runner/build include-hidden-files: true build-installer: name: Build Windows Installer & Release needs: build-cli runs-on: ${{ matrix.runner }} strategy: fail-fast: false matrix: include: - runner: sdk-windows-8-core-2 arch: x86_64 - runner: windows-11-arm arch: arm64 env: VERSION: ${{ github.ref_name }} ARCH: ${{ matrix.arch }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Download build artifacts uses: actions/download-artifact@v4 with: name: nexa-cli_windows_${{ matrix.arch }} path: artifacts - name: Build launcher with PyInstaller run: | pip install pyinstaller pyinstaller --onefile --noconsole --distpath artifacts --name "nexa-cli-launcher" --icon="runner/release/windows/nexa_logo.ico" runner/release/windows/nexa_launcher.py # === Install Inno Setup (before compiling installers) === - name: Install Inno Setup shell: pwsh run: | if (Get-Command ISCC.exe -ErrorAction SilentlyContinue) { return } if (Get-Command choco -ErrorAction SilentlyContinue) { choco install innosetup -y --no-progress } else { winget install --id JRSoftware.InnoSetup -e --accept-source-agreements --accept-package-agreements } $inno = "${env:ProgramFiles(x86)}\Inno Setup 6" if (Test-Path "$inno\ISCC.exe") { "$inno" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append } - name: Sign executables with AzureSignTool shell: pwsh run: | dotnet tool install --global AzureSignTool Get-ChildItem -Path "artifacts" -Recurse -Include *.exe, *.dll | ForEach-Object { azuresigntool sign ` -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" ` -kvc "${{ secrets.AZURE_CERT_NAME }}" ` -kvi "${{ secrets.AZURE_CLIENT_ID }}" ` -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" ` --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" ` -tr http://timestamp.globalsign.com/tsa/advanced ` -td sha256 ` $_.FullName } - name: Prepare Installer Files (X86_64) if: matrix.arch == 'x86_64' shell: bash run: | cp -r artifacts artifacts_cuda rm -r artifacts_cuda/nexaai* rm -r artifacts/nexa_cuda* - name: Compile Installer (CUDA) (X86_64) if: matrix.arch == 'x86_64' uses: Minionguyjpro/Inno-Setup-Action@v1.2.6 with: path: runner/release/windows/nexa_installer.iss options: /O+ env: ORT: "_cuda" - name: Compile Installer uses: Minionguyjpro/Inno-Setup-Action@v1.2.6 with: path: runner/release/windows/nexa_installer.iss options: /O+ env: ORT: "" - name: Sign Installer (ARM64) if: matrix.arch == 'arm64' shell: pwsh run: | azuresigntool sign ` -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" ` -kvc "${{ secrets.AZURE_CERT_NAME }}" ` -kvi "${{ secrets.AZURE_CLIENT_ID }}" ` -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" ` --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" ` -tr http://timestamp.globalsign.com/tsa/advanced ` -td sha256 ` "nexa-cli_windows_${{ matrix.arch }}.exe" - name: Sign Installer (X86_64) if: matrix.arch == 'x86_64' run: | azuresigntool sign ` -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" ` -kvc "${{ secrets.AZURE_CERT_NAME }}" ` -kvi "${{ secrets.AZURE_CLIENT_ID }}" ` -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" ` --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" ` -tr http://timestamp.globalsign.com/tsa/advanced ` -td sha256 ` "nexa-cli_windows_${{ matrix.arch }}_cuda.exe" azuresigntool sign ` -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" ` -kvc "${{ secrets.AZURE_CERT_NAME }}" ` -kvi "${{ secrets.AZURE_CLIENT_ID }}" ` -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" ` --azure-key-vault-tenant-id "${{ secrets.AZURE_TENANT_ID }}" ` -tr http://timestamp.globalsign.com/tsa/advanced ` -td sha256 ` "nexa-cli_windows_${{ matrix.arch }}.exe" - name: Create GitHub Release uses: softprops/action-gh-release@v2 with: repository: NexaAI/nexa-sdk tag_name: ${{ env.VERSION }} files: | nexa-cli_windows_${{ matrix.arch }}_cuda.exe nexa-cli_windows_${{ matrix.arch }}.exe draft: ${{ contains(github.ref, '-rc') }} generate_release_notes: false make_latest: ${{ !contains(github.ref, '-rc') }} env: GITHUB_TOKEN: ${{ secrets.ZACK_ACCESS_TOKEN }} - name: Configure AWS credentials if: ${{ !contains(github.ref, '-rc') }} uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} aws-region: us-west-1 - name: Sync to S3 if: ${{ !contains(github.ref, '-rc') }} shell: bash run: | aws s3 cp "nexa-cli_windows_${{ matrix.arch }}.exe" "s3://nexa-model-hub-bucket/public/nexa_sdk/downloads/nexa-cli_windows_${{ matrix.arch }}.exe" - name: Sync to S3 (CUDA) (X86_64) if: ${{ !contains(github.ref, '-rc') && matrix.arch == 'x86_64' }} shell: bash run: | aws s3 cp "nexa-cli_windows_${{ matrix.arch }}_cuda.exe" "s3://nexa-model-hub-bucket/public/nexa_sdk/downloads/nexa-cli_windows_${{ matrix.arch }}_cuda.exe" ================================================ FILE: .github/workflows/lint.yml ================================================ name: Lint on: pull_request: branches: [main] paths: - 'runner/**' - '.github/workflows/lint.yml' jobs: lint: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 - uses: actions/setup-go@v5 with: go-version: '1.25' cache-dependency-path: runner/go.sum - name: go fmt working-directory: runner run: | unformatted=$(gofmt -s -l .) [ -z "$unformatted" ] || (echo "Unformatted: $unformatted"; exit 1) - name: Download runtime working-directory: runner run: make download - name: go vet working-directory: runner run: go vet ./... ================================================ FILE: .github/workflows/test.yml ================================================ name: Test on: # pull_request: # branches: [main, main-test] workflow_dispatch: jobs: test-llm: name: ${{ matrix.os }} / ${{ matrix.backend }} runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [macos-15] backend: [llama-cpp-metal] steps: - name: Checkout code uses: actions/checkout@v4 - name: Setup Go uses: actions/setup-go@v5 with: go-version: 1.24.3 cache-dependency-path: runner/go.sum - name: Download backend runtime run: make download BRIDGE_BACKEND=${{ matrix.backend }} - name: Cache Nexa model files uses: actions/cache@v4 with: path: ~/.cache/models/Qwen3-0.6B-Q8_0.gguf key: Qwen3-0.6B-Q8_0.gguf restore-keys: | Qwen3-0.6B-Q8_0.gguf - name: Download model shell: bash run: | MODEL_PATH="$HOME/.cache/models/Qwen3-0.6B-Q8_0.gguf" if [ ! -f "$MODEL_PATH" ]; then mkdir -p "$(dirname "$MODEL_PATH")" wget -nv -O "$MODEL_PATH" https://huggingface.co/nexaml/Qwen3-0.6B/resolve/main/Qwen3-0.6B-Q8_0.gguf else echo "Model already cached, skip download." fi - name: Test LLM Multi-Turn Chat shell: bash run: | if [[ "$RUNNER_OS" == "macOS" ]]; then export DYLD_LIBRARY_PATH="$PWD/build/lib/${{ matrix.backend }}" elif [[ "$RUNNER_OS" == "Linux" ]]; then export LD_LIBRARY_PATH="$PWD/build/lib/${{ matrix.backend }}" elif [[ "$RUNNER_OS" == "Windows" ]]; then export PATH="$PWD/build/lib/${{ matrix.backend }};$PATH" fi export CGO_LDFLAGS="-L$PWD/build/lib/${{ matrix.backend }}" cd runner/nexa-sdk && go test -v -run "Test.*LLM|TestChatMultiTurn" ================================================ FILE: .gitignore ================================================ .DS_Store .vscode/ tempnexaai-env/ ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2024-2026 Nexa AI, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: NOTICE ================================================ # Third-Party Licenses This document provides information about third-party libraries used in the NexaSDK project and their respective licenses. All libraries listed below are licensed under permissive open-source licenses (MIT, Apache 2.0, BSD, or equivalent) that are compatible with NexaSDK's Apache 2.0 license. --- ## Go Dependencies ### gin-gonic/gin - **License**: MIT License - **URL**: https://github.com/gin-gonic/gin - **Description**: HTTP web framework written in Go ### spf13/cobra - **License**: Apache License 2.0 - **URL**: https://github.com/spf13/cobra - **Description**: Commander for modern Go CLI applications ### spf13/viper - **License**: MIT License - **URL**: https://github.com/spf13/viper - **Description**: Go configuration with fangs ### AWS SDK for Go v2 - **License**: Apache License 2.0 - **URL**: https://github.com/aws/aws-sdk-go-v2 - **Description**: AWS SDK for the Go programming language - **Packages**: aws-sdk-go-v2, aws-sdk-go-v2/config, aws-sdk-go-v2/service/s3 ### bytedance/sonic - **License**: Apache License 2.0 - **URL**: https://github.com/bytedance/sonic - **Description**: Blazingly fast JSON serializing & deserializing library ### charmbracelet/huh - **License**: MIT License - **URL**: https://github.com/charmbracelet/huh - **Description**: Build terminal forms and prompts ### charmbracelet/lipgloss - **License**: MIT License - **URL**: https://github.com/charmbracelet/lipgloss - **Description**: Style definitions for nice terminal layouts ### charmbracelet/bubbletea - **License**: MIT License - **URL**: https://github.com/charmbracelet/bubbletea - **Description**: Powerful little TUI framework ### dustin/go-humanize - **License**: MIT License - **URL**: https://github.com/dustin/go-humanize - **Description**: Formatters for units to human friendly sizes ### gofrs/flock - **License**: BSD 3-Clause License - **URL**: https://github.com/gofrs/flock - **Description**: Thread-safe file locking library ### chzyer/readline - **License**: MIT License - **URL**: https://github.com/chzyer/readline - **Description**: Pure Go readline library with support for command line editing and history ### openai/openai-go - **License**: Apache License 2.0 - **URL**: https://github.com/openai/openai-go - **Description**: OpenAI Go API library ### schollz/progressbar - **License**: MIT License - **URL**: https://github.com/schollz/progressbar - **Description**: Simple command line progress bar ### valyala/fasthttp - **License**: MIT License - **URL**: https://github.com/valyala/fasthttp - **Description**: Fast HTTP implementation for Go --- ## C/C++ Dependencies ### ggml - **License**: MIT License - **Copyright**: Copyright (c) 2023-2024 The ggml authors - **URL**: https://github.com/ggml-org/ggml - **Description**: Tensor library for machine learning - **License File**: https://github.com/ggml-org/ggml/blob/master/LICENSE ### pybind11 - **License**: BSD 3-Clause License - **Copyright**: Copyright (c) 2016 Wenzel Jakob - **URL**: https://github.com/pybind/pybind11 - **Description**: Seamless operability between C++11 and Python - **License File**: [nexasdk-bridge/third-party/pybind11/LICENSE](https://github.com/pybind/pybind11/blob/master/LICENSE) ### mlx-audio - **License**: MIT License - **Copyright**: Copyright (c) 2024 Prince Canuma - **URL**: https://github.com/Blaizzy/mlx-audio - **Description**: Audio processing with MLX - **License File**: [nexasdk-bridge/third-party/mlx-audio/LICENSE](https://github.com/Blaizzy/mlx-audio/blob/main/LICENSE) ### mlx-vlm - **License**: MIT License - **Copyright**: Copyright © 2023 Apple Inc. - **URL**: https://github.com/Blaizzy/mlx-vlm - **Description**: Vision-language models with MLX - **License File**: [nexasdk-bridge/third-party/mlx-vlm/LICENSE](https://github.com/Blaizzy/mlx-vlm/blob/main/LICENSE) ### xtensor - **License**: BSD 3-Clause License - **Copyright**: Copyright (c) 2016 Johan Mabille, Sylvain Corlay and Wolf Vollprecht, Copyright (c) 2016 QuantStack - **URL**: https://github.com/xtensor-stack/xtensor - **Description**: C++ tensors with broadcasting and lazy computing - **License File**: https://github.com/xtensor-stack/xtensor/blob/master/LICENSE ### cpp-httplib - **License**: MIT License - **Copyright**: Copyright (c) 2017 yhirose - **URL**: https://github.com/yhirose/cpp-httplib - **Description**: C++ single-file header-only HTTP/HTTPS library - **Used in**: llama.cpp - **License File**: [nexasdk-bridge/third-party/llama.cpp/licenses/LICENSE-httplib](https://github.com/ggerganov/llama.cpp/blob/master/licenses/LICENSE-httplib) ### nlohmann/json - **License**: MIT License - **Copyright**: Copyright (c) 2013-2025 Niels Lohmann - **URL**: https://github.com/nlohmann/json - **Description**: JSON for Modern C++ - **Used in**: llama.cpp - **License File**: [nexasdk-bridge/third-party/llama.cpp/licenses/LICENSE-jsonhpp](https://github.com/ggerganov/llama.cpp/blob/master/licenses/LICENSE-jsonhpp) --- ## Python Dependencies ### gradio - **License**: Apache License 2.0 - **URL**: https://github.com/gradio-app/gradio - **Description**: Build machine learning web apps ### requests - **License**: Apache License 2.0 - **URL**: https://github.com/psf/requests - **Description**: HTTP library for Python ### Flask - **License**: BSD 3-Clause License - **URL**: https://github.com/pallets/flask - **Description**: Lightweight WSGI web application framework ### faiss-cpu - **License**: MIT License - **URL**: https://github.com/facebookresearch/faiss - **Description**: Library for efficient similarity search and clustering of dense vectors ### sentence-transformers - **License**: Apache License 2.0 - **URL**: https://github.com/UKPLab/sentence-transformers - **Description**: Compute dense vector representations for sentences and paragraphs ### langchain - **License**: MIT License - **URL**: https://github.com/langchain-ai/langchain - **Description**: Building applications with LLMs through composability ### pydantic - **License**: MIT License - **URL**: https://github.com/pydantic/pydantic - **Description**: Data validation using Python type hints ### python-docx - **License**: MIT License - **URL**: https://github.com/python-openxml/python-docx - **Description**: Create and modify Word documents with Python ### psutil - **License**: BSD 3-Clause License - **URL**: https://github.com/giampaolo/psutil - **Description**: Cross-platform library for process and system monitoring ### openai - **License**: Apache License 2.0 - **URL**: https://github.com/openai/openai-python - **Description**: Python library for the OpenAI API ### mcp - **License**: MIT License - **URL**: https://github.com/modelcontextprotocol/python-sdk - **Description**: Model Context Protocol Python SDK --- ## Android/Java Dependencies (from bindings/android) ### AndroidX Libraries - **License**: Apache License 2.0 - **URL**: https://developer.android.com/jetpack/androidx - **Description**: Android Jetpack libraries - **Packages**: core-ktx, lifecycle-runtime-ktx, activity-compose, compose-bom, appcompat, constraintlayout ### Kotlin Standard Library - **License**: Apache License 2.0 - **URL**: https://github.com/JetBrains/kotlin - **Description**: Kotlin programming language standard library ### AWS Android SDK S3 - **License**: Apache License 2.0 - **URL**: https://github.com/aws/aws-sdk-android - **Description**: AWS SDK for Android - S3 support ### OkHttp - **License**: Apache License 2.0 - **URL**: https://github.com/square/okhttp - **Description**: HTTP client for Android and Java ### Gson - **License**: Apache License 2.0 - **URL**: https://github.com/google/gson - **Description**: Java serialization/deserialization library to convert Java Objects into JSON ### Glide - **License**: BSD, MIT, and Apache License 2.0 (Multi-license) - **URL**: https://github.com/bumptech/glide - **Description**: Fast and efficient image loading library for Android ### Kotlinx Serialization - **License**: Apache License 2.0 - **URL**: https://github.com/Kotlin/kotlinx.serialization - **Description**: Multiplatform serialization for Kotlin ### Kotlinx Coroutines - **License**: Apache License 2.0 - **URL**: https://github.com/Kotlin/kotlinx.coroutines - **Description**: Library support for Kotlin coroutines ### Material Components for Android - **License**: Apache License 2.0 - **URL**: https://github.com/material-components/material-components-android - **Description**: Modular and customizable Material Design UI components --- ## Summary All third-party libraries used in NexaSDK are licensed under permissive open-source licenses that allow for commercial and non-commercial use, modification, and distribution. The primary licenses used are: - **MIT License**: Highly permissive, allows almost unrestricted use - **Apache License 2.0**: Permissive license with patent grant and attribution requirements - **BSD 3-Clause License**: Permissive license with attribution requirements ================================================ FILE: README.md ================================================
Nexa AI Banner

简体中文 | English

🤝 Supported chipmakers

NexaSDK for Mobile - #1 Product of the Day NexaAI/nexa-sdk - #1 Repository of the Day

Documentation Vote for Next Models X account Join us on Discord Join us on Slack

# NexaSDK **NexaSDK lets you build the smartest and fastest on-device AI with minimum energy.** It is a highly performant local inference framework that runs the latest multimodal AI models locally on NPU, GPU, and CPU - across Android, Windows, Linux, macOS, and iOS devices with a few lines of code. NexaSDK supports latest models **weeks or months before anyone else** — Qwen3-VL, DeepSeek-OCR, Gemma3n (Vision), and more. > ⭐ **Star this repo** to keep up with exciting updates and new releases about latest on-device AI capabilities. ## 🏆 Recognized Milestones - **Qualcomm** featured us **3 times** in official blogs. - [Innovating Multimodal AI on Qualcomm Hexagon NPU](https://www.qualcomm.com/developer/blog/2025/09/omnineural-4b-nexaml-qualcomm-hexagon-npu). - [First-ever Day-0 model support on Qualcomm Hexagon NPU for compute and mobile platforms, Auto and IoT](https://www.qualcomm.com/developer/blog/2025/10/granite-4-0-to-the-edge-on-device-ai-for-real-world-performance). - [A simple way to bring on-device AI to smartphones with Snapdragon](https://www.qualcomm.com/developer/blog/2025/11/nexa-ai-for-android-simple-way-to-bring-on-device-ai-to-smartphones-with-snapdragon) - **Qwen** featured us for [Day-0 Qwen3-VL support on NPU, GPU, and CPU](https://x.com/Alibaba_Qwen/status/1978154384098754943). We were 3 weeks ahead of Ollama and llama.cpp on GGUF support, and no one else supports it on NPU to date. - **IBM** featured our NexaML inference engine alongside vLLM, llama.cpp, and MLX in [official IBM blog](https://www.ibm.com/new/announcements/ibm-granite-4-0-hyper-efficient-high-performance-hybrid-models) and also for Day-0 Granite 4.0 support. - **Google** featured us for [EmbeddingGemma Day-0 NPU support](https://x.com/googleaidevs/status/1969188152049889511). - **AMD** featured us for [enabling SDXL-turbo image generation on AMD NPU](https://www.amd.com/en/developer/resources/technical-articles/2025/advancing-ai-with-nexa-ai--image-generation-on-amd-npu-with-sdxl.html). - **NVIDIA** featured Hyperlink, a viral local AI app powered by NexaSDK, in their [official blog](https://blogs.nvidia.com/blog/rtx-ai-garage-nexa-hyperlink-local-agent/). - **Microsoft** presented us on stage at Microsoft Ignite 2025 as [official partner](https://www.linkedin.com/posts/mixen_excited-to-celebrate-our-developer-partnerships-activity-7396601602327007232-AmCR?utm_source=share&utm_medium=member_desktop&rcm=ACoAAChXnS8B4gqbBLUlWfwt-ck0XAv472NzT4k). - **Intel** featured us for [Intel NPU support in NexaSDK](https://www.linkedin.com/posts/intel-software_ai-ondeviceai-nexasdk-activity-7376337062087667712-xw7i?utm_source=share&utm_medium=member_desktop&rcm=ACoAAChXnS8B4gqbBLUlWfwt-ck0XAv472NzT4k). ## 🚀 Quick Start | Platform | Links | | --------------- | ----------------------------------------------------------------------------------------- | | 🖥️ CLI | [Quick Start](#-cli) | [Docs](https://docs.nexa.ai/en/nexa-sdk-go/NexaCLI) | | 🐍 Python | [Quick Start](#-python-sdk) | [Docs](https://docs.nexa.ai/en/nexa-sdk-python/overview) | | 🤖 Android | [Quick Start](#-android-sdk) | [Docs](https://docs.nexa.ai/en/nexa-sdk-android/overview) | | 🐳 Linux Docker | [Quick Start](#-linux-docker) | [Docs](https://docs.nexa.ai/en/nexa-sdk-docker/overview) | | 🍎 iOS | [Quick Start](#-ios-sdk) | [Docs](https://docs.nexa.ai/en/nexa-sdk-ios/overview) | --- ### 🖥️ CLI **Download:** | Windows | macOS | Linux | | -------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------- | | [arm64 (Qualcomm NPU)](https://public-storage.nexa4ai.com/nexa_sdk/downloads/nexa-cli_windows_arm64.exe) | [arm64 (Apple Silicon)](https://public-storage.nexa4ai.com/nexa_sdk/downloads/nexa-cli_macos_arm64.pkg) | [arm64](https://github.com/NexaAI/nexa-sdk/releases/latest/download/nexa-cli_linux_arm64.sh) | | [x64 (Intel/AMD NPU)](https://public-storage.nexa4ai.com/nexa_sdk/downloads/nexa-cli_windows_x86_64.exe) | [x64](https://public-storage.nexa4ai.com/nexa_sdk/downloads/nexa-cli_macos_x86_64.pkg) | [x64](https://github.com/NexaAI/nexa-sdk/releases/latest/download/nexa-cli_linux_x86_64.sh) | **Run your first model:** ```bash # Chat with Qwen3 nexa infer ggml-org/Qwen3-1.7B-GGUF # Multimodal: drag images into the CLI nexa infer NexaAI/Qwen3-VL-4B-Instruct-GGUF # NPU (Windows arm64 with Snapdragon X Elite) nexa infer NexaAI/OmniNeural-4B ``` - **Models:** LLM, Multimodal, ASR, OCR, Rerank, Object Detection, Image Generation, Embedding - **Formats:** GGUF, MLX, NEXA - **NPU Models:** [Model Hub](https://sdk.nexa.ai/model) - 📖 [CLI Reference Docs](https://docs.nexa.ai/en/nexa-sdk-go/NexaCLI) --- ### 🐍 Python SDK ```bash pip install nexaai ``` ```python from nexaai import LLM, GenerationConfig, ModelConfig, LlmChatMessage llm = LLM.from_(model="NexaAI/Qwen3-0.6B-GGUF", config=ModelConfig()) conversation = [ LlmChatMessage(role="user", content="Hello, tell me a joke") ] prompt = llm.apply_chat_template(conversation) for token in llm.generate_stream(prompt, GenerationConfig(max_tokens=100)): print(token, end="", flush=True) ``` - **Models:** LLM, Multimodal, ASR, OCR, Rerank, Object Detection, Image Generation, Embedding - **Formats:** GGUF, MLX, NEXA - **NPU Models:** [Model Hub](https://sdk.nexa.ai/model) - 📖 [Python SDK Docs](https://docs.nexa.ai/en/nexa-sdk-python/quickstart) --- ### 🤖 Android SDK Add to your `app/AndroidManifest.xml` ```xml ``` Add to your `build.gradle.kts`: ```kotlin dependencies { implementation("ai.nexa:core:0.0.19") } ``` ```kotlin // Initialize SDK NexaSdk.getInstance().init(this) // Load and run model VlmWrapper.builder() .vlmCreateInput(VlmCreateInput( model_name = "omni-neural", model_path = "/data/data/your.app/files/models/OmniNeural-4B/files-1-1.nexa", plugin_id = "npu", config = ModelConfig() )) .build() .onSuccess { vlm -> vlm.generateStreamFlow("Hello!", GenerationConfig()).collect { print(it) } } ``` - **Requirements:** Android minSdk 27, Qualcomm Snapdragon 8 Gen 4 Chip - **Models:** LLM, Multimodal, ASR, OCR, Rerank, Embedding - **NPU Models:** [Supported Models](https://docs.nexa.ai/en/nexa-sdk-android/overview#supported-models) - 📖 [Android SDK Docs](https://docs.nexa.ai/en/nexa-sdk-android/quickstart) --- ### 🐳 Linux Docker ```bash docker pull nexa4ai/nexasdk:latest export NEXA_TOKEN="your_token_here" docker run --rm -it --privileged \ -e NEXA_TOKEN \ nexa4ai/nexasdk:latest infer NexaAI/Granite-4.0-h-350M-NPU ``` - **Requirements:** Qualcomm Dragonwing IQ9, ARM64 systems - **Models:** LLM, VLM, ASR, CV, Rerank, Embedding - **NPU Models:** [Supported Models](https://docs.nexa.ai/en/nexa-sdk-docker/overview#supported-models) - 📖 [Linux Docker Docs](https://docs.nexa.ai/en/nexa-sdk-docker/quickstart) --- ### 🍎 iOS SDK Download [NexaSdk.xcframework](https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/ios/latest/NexaSdk.xcframework.zip) and add to your Xcode project. ```swift import NexaSdk // Example: Speech Recognition let asr = try Asr(plugin: .ane) try await asr.load(from: modelURL) let result = try await asr.transcribe(options: .init(audioPath: "audio.wav")) print(result.asrResult.transcript) ``` - **Requirements:** iOS 17.0+ / macOS 15.0+, Swift 5.9+ - **Models:** LLM, ASR, OCR, Rerank, Embedding - **ANE Models:** [Apple Neural Engine Models](https://huggingface.co/collections/NexaAI/apple-neural-engine) - 📖 [iOS SDK Docs](https://docs.nexa.ai/en/nexa-sdk-ios/quickstart) ## ⚙️ Features & Comparisons
| Features | **NexaSDK** | **Ollama** | **llama.cpp** | **LM Studio** | | ---------------------------------------- | ---------------------------------------------------------- | ---------- | ------------- | ------------- | | NPU support | ✅ NPU-first | ❌ | ❌ | ❌ | | Android/iOS SDK support | ✅ NPU/GPU/CPU support | ⚠️ | ⚠️ | ❌ | | Linux support (Docker image) | ✅ | ✅ | ✅ | ❌ | | Day-0 model support in GGUF, MLX, NEXA | ✅ | ❌ | ⚠️ | ❌ | | Full multimodality support | ✅ Image, Audio, Text, Embedding, Rerank, ASR, TTS | ⚠️ | ⚠️ | ⚠️ | | Cross-platform support | ✅ Desktop, Mobile (Android, iOS), Automotive, IoT (Linux) | ⚠️ | ⚠️ | ⚠️ | | One line of code to run | ✅ | ✅ | ⚠️ | ✅ | | OpenAI-compatible API + Function calling | ✅ | ✅ | ✅ | ✅ |

Legend: ✅ Supported   |   ⚠️ Partial or limited support   |   ❌ No

## 🙏 Acknowledgements We would like to thank the following projects: - [ggml](https://github.com/ggml-org/ggml) - [mlx-lm](https://github.com/ml-explore/mlx-lm) - [mlx-vlm](https://github.com/Blaizzy/mlx-vlm) - [mlx-audio](https://github.com/Blaizzy/mlx-audio) ## 📄 License NexaSDK uses a dual licensing model: ### CPU/GPU Components Licensed under [Apache License 2.0](LICENSE). ### NPU Components - **Personal Use**: Free license key available from [Nexa AI Model Hub](https://sdk.nexa.ai/model). Each key activates 1 device for NPU usage. - **Commercial Use**: Contact [hello@nexa.ai](mailto:hello@nexa.ai) for licensing. ## 🤝 Contact & Community Support ### Business Inquiries For model launching partner, business inquiries, or any other questions, please schedule a call with us [here](https://nexa.ai/book-a-call). ### Community & Support Want more model support, backend support, device support or other features? We'd love to hear from you! Feel free to [submit an issue](https://github.com/NexaAI/nexa-sdk/issues) on our GitHub repository with your requests, suggestions, or feedback. Your input helps us prioritize what to build next. Join our community: - [Discord](https://discord.gg/thRu2HaK4D) - [Slack](https://join.slack.com/t/nexaai/shared_invite/zt-30a8yfv8k-1JqAXv~OjKJKLqvbKqHJxA) - **[Nexa Wishlist](https://sdk.nexa.ai/wishlist)** — Request and vote for the models you want to run on-device. ## 🏆 Nexa × Qualcomm On-Device Bounty Program Round 1: Build a working Android AI app that runs fully on-device on Qualcomm Hexagon NPU with NexaSDK. Timeline (PT): Jan 15 → Feb 15 Prizes: $6,500 cash prize, Qualcomm official spotlight, flagship Snapdragon device, expert mentorship, and more 👉 Join & details: [https://sdk.nexa.ai/bounty](https://sdk.nexa.ai/bounty) ================================================ FILE: README_zh.md ================================================
Nexa AI Banner

简体中文 | English

🤝 支持的芯片厂商

NexaSDK for Mobile - #1 Product of the Day NexaAI/nexa-sdk - #1 Repository of the Day

Documentation Vote for Next Models X account Join us on Discord Join us on Slack

# NexaSDK **NexaSDK 让你用极低能耗打造最快、最聪明的本地 AI。** 它是一套高性能本地推理框架,只需几行代码即可在 Android、Windows、Linux、macOS 与 iOS 的 NPU、GPU、CPU 上运行最新的多模态 AI 模型。 NexaSDK 往往能比其他人提前数周甚至数月支持最新模型 —— Qwen3-VL、DeepSeek-OCR、Gemma3n(视觉版)等。 > ⭐ **star本仓库**,及时获取最新的本地 AI 能力更新与发布。 ## 🏆 重要里程碑 - **Qualcomm** 官方博客中 3 次重点介绍我们。 - [在 Qualcomm Hexagon NPU 上创新多模态 AI](https://www.qualcomm.com/developer/blog/2025/09/omnineural-4b-nexaml-qualcomm-hexagon-npu)。 - [Qualcomm Hexagon NPU 在计算与移动平台(汽车与 IoT)上的首个 Day-0 模型支持](https://www.qualcomm.com/developer/blog/2025/10/granite-4-0-to-the-edge-on-device-ai-for-real-world-performance)。 - [在 Snapdragon 手机上体验端侧 AI 的简单方式](https://www.qualcomm.com/developer/blog/2025/11/nexa-ai-for-android-simple-way-to-bring-on-device-ai-to-smartphones-with-snapdragon) - **Qwen** 为我们在 NPU、GPU、CPU 上 [Day-0 支持 Qwen3-VL](https://x.com/Alibaba_Qwen/status/1978154384098754943) 做了官方推荐。我们在 GGUF 支持上领先 Ollama 和 llama.cpp 3 周,并且目前只有我们支持 NPU。 - **IBM** 在[官方博客](https://www.ibm.com/new/announcements/ibm-granite-4-0-hyper-efficient-high-performance-hybrid-models)中,将 NexaML 推理引擎与 vLLM、llama.cpp、MLX 并列介绍,感谢我们 Day-0 支持 Granite 4.0。 - **Google** 认可我们对 EmbeddingGemma Day-0 的 NPU 支持([官方致谢](https://x.com/googleaidevs/status/1969188152049889511))。 - **AMD** 在[官方博客](https://www.amd.com/en/developer/resources/technical-articles/2025/advancing-ai-with-nexa-ai--image-generation-on-amd-npu-with-sdxl.html)报道我们在 AMD NPU 上实现 SDXL-turbo 图像生成。 - **NVIDIA** 在[官方博客](https://blogs.nvidia.com/blog/rtx-ai-garage-nexa-hyperlink-local-agent/)中介绍了由 NexaSDK 支撑的火爆本地 AI 应用 Hyperlink。 - **Microsoft** 在 Microsoft Ignite 2025 上台展示了我们作为[官方合作伙伴](https://www.linkedin.com/posts/mixen_excited-to-celebrate-our-developer-partnerships-activity-7396601602327007232-AmCR?utm_source=share&utm_medium=member_desktop&rcm=ACoAAChXnS8B4gqbBLUlWfwt-ck0XAv472NzT4k)。 - **Intel** 在[官方帖子](https://www.linkedin.com/posts/intel-software_ai-ondeviceai-nexasdk-activity-7376337062087667712-xw7i?utm_source=share&utm_medium=member_desktop&rcm=ACoAAChXnS8B4gqbBLUlWfwt-ck0XAv472NzT4k)中提到我们对 Intel NPU 的支持。 ## 🚀 快速开始 | 平台 | 链接 | | --------------- | ---------------------------------------------------------------------------------------------- | | 🖥️ CLI | [快速开始](#-cli) | [文档](https://docs.nexa.ai/en/nexa-sdk-go/NexaCLI) | | 🐍 Python | [快速开始](#-python-sdk) | [文档](https://docs.nexa.ai/en/nexa-sdk-python/overview) | | 🤖 Android | [快速开始](#-android-sdk) | [文档](https://docs.nexa.ai/en/nexa-sdk-android/overview) | | 🐳 Linux Docker | [快速开始](#-linux-docker) | [文档](https://docs.nexa.ai/en/nexa-sdk-docker/overview) | | 🍎 iOS | [快速开始](#-ios-sdk) | [文档](https://docs.nexa.ai/en/nexa-sdk-ios/overview) | --- ### 🖥️ CLI **下载:** | Windows | macOS | Linux | | -------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------- | | [arm64 (Qualcomm NPU)](https://public-storage.nexa4ai.com/nexa_sdk/downloads/nexa-cli_windows_arm64.exe) | [arm64 (Apple Silicon)](https://public-storage.nexa4ai.com/nexa_sdk/downloads/nexa-cli_macos_arm64.pkg) | [arm64](https://github.com/NexaAI/nexa-sdk/releases/latest/download/nexa-cli_linux_arm64.sh) | | [x64 (Intel/AMD NPU)](https://public-storage.nexa4ai.com/nexa_sdk/downloads/nexa-cli_windows_x86_64.exe) | [x64](https://public-storage.nexa4ai.com/nexa_sdk/downloads/nexa-cli_macos_x86_64.pkg) | [x64](https://github.com/NexaAI/nexa-sdk/releases/latest/download/nexa-cli_linux_x86_64.sh) | **运行你的第一个模型:** ```bash # 与 Qwen3 对话 nexa infer ggml-org/Qwen3-1.7B-GGUF # 多模态:在 CLI 中拖入图片 nexa infer NexaAI/Qwen3-VL-4B-Instruct-GGUF # NPU(Windows arm64,Snapdragon X Elite) nexa infer NexaAI/OmniNeural-4B ``` - **模型类型:** LLM、多模态、ASR、OCR、Rerank、目标检测、图像生成、Embedding - **格式:** GGUF、MLX、NEXA - **NPU 模型:** [Model Hub](https://sdk.nexa.ai/model) - 📖 [CLI 参考文档](https://docs.nexa.ai/en/nexa-sdk-go/NexaCLI) --- ### 🐍 Python SDK ```bash pip install nexaai ``` ```python from nexaai import LLM, GenerationConfig, ModelConfig, LlmChatMessage llm = LLM.from_(model="NexaAI/Qwen3-0.6B-GGUF", config=ModelConfig()) conversation = [ LlmChatMessage(role="user", content="Hello, tell me a joke") ] prompt = llm.apply_chat_template(conversation) for token in llm.generate_stream(prompt, GenerationConfig(max_tokens=100)): print(token, end="", flush=True) ``` - **模型类型:** LLM、多模态、ASR、OCR、Rerank、目标检测、图像生成、Embedding - **格式:** GGUF、MLX、NEXA - **NPU 模型:** [Model Hub](https://sdk.nexa.ai/model) - 📖 [Python SDK 文档](https://docs.nexa.ai/en/nexa-sdk-python/quickstart) --- ### 🤖 Android SDK 在 `app/AndroidManifest.xml` 中添加: ```xml ``` 在 `build.gradle.kts` 中添加: ```kotlin dependencies { implementation("ai.nexa:core:0.0.19") } ``` ```kotlin // 初始化 SDK NexaSdk.getInstance().init(this) // 加载并运行模型 VlmWrapper.builder() .vlmCreateInput(VlmCreateInput( model_name = "omni-neural", model_path = "/data/data/your.app/files/models/OmniNeural-4B/files-1-1.nexa", plugin_id = "npu", config = ModelConfig() )) .build() .onSuccess { vlm -> vlm.generateStreamFlow("Hello!", GenerationConfig()).collect { print(it) } } ``` - **要求:** Android minSdk 27,Qualcomm Snapdragon 8 Gen 4 芯片 - **模型类型:** LLM、多模态、ASR、OCR、Rerank、Embedding - **NPU 模型:** [Supported Models](https://docs.nexa.ai/en/nexa-sdk-android/overview#supported-models) - 📖 [Android SDK 文档](https://docs.nexa.ai/en/nexa-sdk-android/quickstart) --- ### 🐳 Linux Docker ```bash docker pull nexa4ai/nexasdk:latest export NEXA_TOKEN="your_token_here" docker run --rm -it --privileged \ -e NEXA_TOKEN \ nexa4ai/nexasdk:latest infer NexaAI/Granite-4.0-h-350M-NPU ``` - **要求:** Qualcomm Dragonwing IQ9,ARM64 系统 - **模型类型:** LLM、VLM、ASR、CV、Rerank、Embedding - **NPU 模型:** [Supported Models](https://docs.nexa.ai/en/nexa-sdk-docker/overview#supported-models) - 📖 [Linux Docker 文档](https://docs.nexa.ai/en/nexa-sdk-docker/quickstart) --- ### 🍎 iOS SDK 下载 [NexaSdk.xcframework](https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/ios/latest/NexaSdk.xcframework.zip) 并添加到 Xcode 项目。 ```swift import NexaSdk // 示例:语音识别 let asr = try Asr(plugin: .ane) try await asr.load(from: modelURL) let result = try await asr.transcribe(options: .init(audioPath: "audio.wav")) print(result.asrResult.transcript) ``` - **要求:** iOS 17.0+ / macOS 15.0+,Swift 5.9+ - **模型类型:** LLM、ASR、OCR、Rerank、Embedding - **ANE 模型:** [Apple Neural Engine Models](https://huggingface.co/collections/NexaAI/apple-neural-engine) - 📖 [iOS SDK 文档](https://docs.nexa.ai/en/nexa-sdk-ios/quickstart) ## ⚙️ 功能与对比
| 功能 | **NexaSDK** | **Ollama** | **llama.cpp** | **LM Studio** | | -------------------------------------- | ---------------------------------------------------------- | ---------- | ------------- | ------------- | | NPU 支持 | ✅ NPU 优先 | ❌ | ❌ | ❌ | | Android/iOS SDK 支持 | ✅ NPU/GPU/CPU 支持 | ⚠️ | ⚠️ | ❌ | | Linux 支持(Docker 镜像) | ✅ | ✅ | ✅ | ❌ | | Day-0 支持 GGUF、MLX、NEXA | ✅ | ❌ | ⚠️ | ❌ | | 完整多模态支持 | ✅ 图像、音频、文本、Embedding、Rerank、ASR、TTS | ⚠️ | ⚠️ | ⚠️ | | 跨平台支持 | ✅ 桌面、移动(Android、iOS)、车载、IoT(Linux) | ⚠️ | ⚠️ | ⚠️ | | 一行代码即可运行 | ✅ | ✅ | ⚠️ | ✅ | | OpenAI 兼容 API + Function calling | ✅ | ✅ | ✅ | ✅ |

图例: ✅ Supported   |   ⚠️ Partial or limited support   |   ❌ No

## 🙏 致谢 我们感谢以下项目: - [ggml](https://github.com/ggml-org/ggml) - [mlx-lm](https://github.com/ml-explore/mlx-lm) - [mlx-vlm](https://github.com/Blaizzy/mlx-vlm) - [mlx-audio](https://github.com/Blaizzy/mlx-audio) ## 📄 许可证 NexaSDK 采用双重许可模式: ### CPU/GPU 组件 基于 [Apache License 2.0](LICENSE)。 ### NPU 组件 - **个人使用**:可从 [Nexa AI Model Hub](https://sdk.nexa.ai/model) 免费获取许可密钥。每个密钥激活 1 台设备的 NPU 使用。 - **商业使用**:联系 [hello@nexa.ai](mailto:hello@nexa.ai) 获取授权。 ## 🤝 联系与社区支持 ### 商务合作 如需模型发布合作、商务洽谈或其他问题,请在[此处](https://nexa.ai/book-a-call)安排会议。 ### 社区与支持 想要更多模型支持、后端支持、设备支持或新功能?我们很乐意听到你的声音! 欢迎在 GitHub [提交 issue](https://github.com/NexaAI/nexa-sdk/issues),提出你的需求、建议或反馈。你的意见帮助我们确定优先级。 加入社区: - [Discord](https://discord.gg/thRu2HaK4D) - [Slack](https://join.slack.com/t/nexaai/shared_invite/zt-30a8yfv8k-1JqAXv~OjKJKLqvbKqHJxA) - **[Nexa Wishlist](https://sdk.nexa.ai/wishlist)** —— 提交并为你想要运行在本地的模型投票。 ================================================ FILE: bindings/android/.gitignore ================================================ # Gradle files .gradle/ build/ arm64-v8a/ .DS_Store # Local configuration file (sdk path, etc) local.properties # Log/OS Files *.log # Android Studio generated files and folders captures/ .externalNativeBuild/ .cxx/ *.aab *.apk output-metadata.json # IntelliJ *.iml .idea/ misc.xml deploymentTargetDropDown.xml render.experimental.xml # Keystore files *.jks *.keystore # Google Services (e.g. APIs or Firebase) google-services.json # Android Profiling *.hprof app/libs/app-release.aar app/libs/app-debug.aar app/release/ repo/ ================================================ FILE: bindings/android/README.md ================================================ # Nexa Android SDK Demo App [![Product Hunt](https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=1049998&theme=dark&period=daily&t=1765991451976)](https://www.producthunt.com/products/nexasdk-for-mobile) > 📣 **NexaSDK for Android** is highlighted by [Qualcomm blog](https://www.qualcomm.com/developer/blog/2025/11/nexa-ai-for-android-simple-way-to-bring-on-device-ai-to-smartphones-with-snapdragon) as "a simple way to bring on-device AI to smartphones with Snapdragon" ## Overview The Nexa AI Android SDK enables on-device AI inference for Android applications with NPU acceleration. Run Large Language Models (LLMs), Vision-Language Models (VLMs), Embeddings, Speech Recognition (ASR), Reranking, and Computer Vision models on Android devices with support for NPU, GPU, and CPU inference. 📖 For full documentation, see [Android SDK Doc](https://docs.nexa.ai/en/nexa-sdk-android/overview). ## Device Compatibility ### Supported Hardware | Component | Requirement | |-----------|-------------| | **NPU** | Qualcomm Snapdragon 8 Gen 4 (optimized) | | **GPU** | Qualcomm Adreno GPU | | **CPU** | ARM64-v8a | | **RAM** | 4GB+ recommended | | **Storage** | 100MB - 4GB (varies by model) | ### Minimum Requirements - Android API Level 27+ (Android 8.1 Oreo) - Architecture: ARM64-v8a ## Quick Start with APK ### Standard Demo App Download and install the pre-built APK: ```bash # Download: https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/android-demo-release/nexaai-demo-app.apk adb install nexaai-demo-app.apk ``` ### GPT-OSS NPU Demo For running GPT-OSS model on Qualcomm NPU: ```bash # Download: https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/gpt-oss-android-demo/nexaai-gpt-oss-npu.apk adb install nexaai-gpt-oss-npu.apk ``` ## Build from Source > 🎬 Watch the [tutorial video](https://www.linkedin.com/feed/update/urn:li:activity:7394055404562935809) showing how to run the sample app in 40 seconds. ### Step-by-Step Instructions 1. **Clone the repository** ```bash git clone https://github.com/NexaAI/nexa-sdk/ ``` 2. **Open in Android Studio** Open the `bindings/android` folder in Android Studio. 3. **Download a model** Follow the [Android SDK Doc](https://docs.nexa.ai/en/nexa-sdk-android/overview) to download a model. Below are some examples to download: - [Granite-4.0-h-350M-NPU](https://huggingface.co/NexaAI/Granite-4.0-h-350M-NPU-mobile) - [Granite-4-Micro-NPU](https://huggingface.co/NexaAI/Granite-4-Micro-NPU-mobile) - [OmniNeural-4B](https://huggingface.co/NexaAI/OmniNeural-4B-mobile) Place the model in the app's data directory: ``` /data/data/com.nexa.demo/files/models/ ``` 4. **Build and run** the app in Android Studio ## Tutorial: LFM2-24B-A2B-Preview-GGUF This walkthrough uses the [LFM2-24B-A2B-Preview-GGUF](https://huggingface.co/NexaAI/LFM2-24B-A2B-Preview-GGUF) model in the demo app. 1. **Install the app** Install the demo app (APK or build from source as above). 2. **Select the model** Open the model selector (dropdown next to the model name) and choose **LFM2-24B-A2B-Preview-GGUF**. LFM2-24B-A2B-Preview-GGUF in demo app 3. **Download** Tap **Download** to fetch the model to your device. Wait until the download finishes. 4. **Load** Tap **Load**. A **load model config** dialog appears: choose **CPU**, **GPU**, or **NPU** (for Qualcomm NPU), then tap **SURE**. Once the model is loaded, the chat area becomes available. load model config 5. **Chat** Type your message in the input field at the bottom, then tap **Send** to get a response. Use **Clear** to clear the input or conversation as needed. Chat with model response ================================================ FILE: bindings/android/README_zh.md ================================================ # Nexa Android SDK 演示 App [![Product Hunt](https://api.producthunt.com/widgets/embed-image/v1/top-post-badge.svg?post_id=1049998&theme=dark&period=daily&t=1765991451976)](https://www.producthunt.com/products/nexasdk-for-mobile) > 📣 **NexaSDK for Android** 被 [Qualcomm 博客](https://www.qualcomm.com/developer/blog/2025/11/nexa-ai-for-android-simple-way-to-bring-on-device-ai-to-smartphones-with-snapdragon) 评价为"将端侧 AI 引入 Snapdragon 智能手机的简易方案" ## 概述 Nexa AI Android SDK 让 Android 应用可以在设备端使用 NPU 加速进行 AI 推理。支持在 Android 设备上运行大型语言模型(LLMs)、视觉语言模型(VLMs)、嵌入模型、语音识别(ASR)、重排序和计算机视觉模型,并可在 NPU、GPU 或 CPU 上进行推理。 📖 完整文档请参考 [Android SDK 文档](https://docs.nexa.ai/cn/nexa-sdk-android/overview)。 ## 设备兼容性 ### 支持的硬件 | 组件 | 要求 | |-----------|-------------| | **NPU** | 高通骁龙 8 Gen 4(已优化) | | **GPU** | 高通 Adreno GPU | | **CPU** | ARM64-v8a | | **RAM** | 推荐 4GB 及以上 | | **存储** | 100MB - 4GB(具体取决于模型) | ### 最低要求 - Android API 等级 27 及以上(Android 8.1 Oreo) - 架构:ARM64-v8a ## 快速开始(APK 安装) ### 标准演示 App 下载并安装预编译的 APK: ```bash # 下载地址: https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/android-demo-release/nexaai-demo-app.apk adb install nexaai-demo-app.apk ``` ### GPT-OSS NPU 演示 在高通 NPU 上运行 GPT-OSS 模型: ```bash # 下载地址: https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/gpt-oss-android-demo/nexaai-gpt-oss-npu.apk adb install nexaai-gpt-oss-npu.apk ``` ## 从源码编译 > 🎬 观看 [教程视频](https://www.linkedin.com/feed/update/urn:li:activity:7394055404562935809),只需 40 秒即可体验如何运行示例 App。 ### 详细步骤 1. **克隆仓库** ```bash git clone https://github.com/NexaAI/nexa-sdk/ ``` 2. **用 Android Studio 打开** 用 Android Studio 打开 `bindings/android` 文件夹。 3. **下载模型** 按照 [Android SDK 文档](https://docs.nexa.ai/cn/nexa-sdk-android/overview) 的说明下载模型。以下是一些可下载的示例: - [Granite-4.0-h-350M-NPU](https://huggingface.co/NexaAI/Granite-4.0-h-350M-NPU-mobile) - [Granite-4-Micro-NPU](https://huggingface.co/NexaAI/Granite-4-Micro-NPU-mobile) - [OmniNeural-4B](https://huggingface.co/NexaAI/OmniNeural-4B-mobile) 将模型放到 App 的数据目录: ``` /data/data/com.nexa.demo/files/models/ ``` 4. **编译并运行** 在 Android Studio 中编译并运行应用 ================================================ FILE: bindings/android/app/.gitignore ================================================ /build /src/main/assets/model_list_backup.json /src/main/assets/model_list_local.json ================================================ FILE: bindings/android/app/build.gradle.kts ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. plugins { alias(libs.plugins.android.application) alias(libs.plugins.kotlin.android) alias(libs.plugins.kotlin.compose) id("org.jetbrains.kotlin.plugin.serialization") version "1.9.23" } android { namespace = "com.nexa.demo" compileSdk = 36 signingConfigs { create("release") { // Note: For production builds, use environment variables or local.properties // Example: storePassword = System.getenv("KEYSTORE_PASSWORD") ?: "" storeFile = file("test") storePassword = project.findProperty("KEYSTORE_PASSWORD")?.toString() ?: "123456" keyAlias = "test" keyPassword = project.findProperty("KEY_PASSWORD")?.toString() ?: "123456" } } defaultConfig { applicationId = "com.nexa.demo" minSdk = 27 targetSdk = 36 versionCode = 1 versionName = "1.0" testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" } buildTypes { release { isMinifyEnabled = false proguardFiles( getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro" ) signingConfig = signingConfigs.getByName("release") } debug { isMinifyEnabled = false proguardFiles( getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro" ) signingConfig = signingConfigs.getByName("release") } } compileOptions { sourceCompatibility = JavaVersion.VERSION_11 targetCompatibility = JavaVersion.VERSION_11 } kotlinOptions { jvmTarget = "11" } // sourceSets { // getByName("main") { // jniLibs.srcDirs("src/main/jniLibs") // } // } packagingOptions { jniLibs.useLegacyPackaging = true } buildFeatures { viewBinding = true dataBinding = true compose = true buildConfig = true } } val bridgePathExist = gradle.extra["bridgePathExist"] as Boolean print("bridgePathExist: $bridgePathExist\n") dependencies { // ===== NEXA CLOUD SDK ===== // Using cloud SDK instead of local bridge - latest version implementation("ai.nexa:core:+") // ===== NEXA CLOUD SDK END ===== implementation(project(":transform")) implementation(":okdownload-core@aar") implementation(":okdownload-sqlite@aar") implementation(":okdownload-okhttp@aar") implementation(":okdownload-ktx@aar") implementation(kotlin("reflect")) implementation(libs.glide) implementation(libs.gson) implementation(libs.markwon.core) implementation(libs.markwon.strikethrough) implementation(libs.markwon.tables) implementation(libs.markwon.linkify) implementation(libs.markwon.latex) implementation(libs.markwon.inline.parser) implementation(libs.recyclerview) implementation(libs.toaster) implementation(libs.material) implementation(libs.imm.bar) implementation(libs.imm.bar.ktx) implementation(libs.auto.size) implementation(libs.okhttp) implementation(libs.kotlinx.serialization.json) implementation(libs.androidx.core.ktx) implementation(libs.androidx.lifecycle.runtime.ktx) implementation(libs.androidx.activity.compose) implementation(platform(libs.androidx.compose.bom)) implementation(libs.androidx.ui) implementation(libs.androidx.ui.graphics) implementation(libs.androidx.ui.tooling.preview) implementation(libs.androidx.material3) implementation(libs.androidx.appcompat) implementation(libs.androidx.activity) implementation(libs.androidx.constraintlayout) testImplementation(libs.junit) androidTestImplementation(libs.androidx.junit) androidTestImplementation(libs.androidx.espresso.core) androidTestImplementation(platform(libs.androidx.compose.bom)) androidTestImplementation(libs.androidx.ui.test.junit4) debugImplementation(libs.androidx.ui.tooling) debugImplementation(libs.androidx.ui.test.manifest) } ================================================ FILE: bindings/android/app/proguard-rules.pro ================================================ # Copyright 2024-2026 Nexa AI, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Add project specific ProGuard rules here. # You can control the set of applied configuration files using the # proguardFiles setting in build.gradle. # # For more details, see # http://developer.android.com/guide/developing/tools/proguard.html # If your project uses WebView with JS, uncomment the following # and specify the fully qualified class name to the JavaScript interface # class: #-keepclassmembers class fqcn.of.javascript.interface.for.webview { # public *; #} # Uncomment this to preserve the line number information for # debugging stack traces. #-keepattributes SourceFile,LineNumberTable # If you keep the line number information, uncomment this to # hide the original source file name. #-renamesourcefileattribute SourceFile ================================================ FILE: bindings/android/app/src/androidTest/java/com/nexa/demo/ExampleInstrumentedTest.kt ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.nexa.demo import androidx.test.platform.app.InstrumentationRegistry import androidx.test.ext.junit.runners.AndroidJUnit4 import org.junit.Test import org.junit.runner.RunWith import org.junit.Assert.* /** * Instrumented test, which will execute on an Android device. * * See [testing documentation](http://d.android.com/tools/testing). */ @RunWith(AndroidJUnit4::class) class ExampleInstrumentedTest { @Test fun useAppContext() { // Context of the app under test. val appContext = InstrumentationRegistry.getInstrumentation().targetContext assertEquals("com.nexa.demo", appContext.packageName) } } ================================================ FILE: bindings/android/app/src/main/AndroidManifest.xml ================================================ ================================================ FILE: bindings/android/app/src/main/assets/model_list.json ================================================ [ { "id": "Llama3.2-3B-NPU-Turbo-NPU", "displayName": "Llama3.2-3B-NPU-Turbo-NPU", "modelName": "files-1-2.nexa", "versionCode": 1, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/Llama3.2-3B-NPU-Turbo-NPU-mobile/", "modelUrl": "files-1-2.nexa" }, { "id": "Llama-3.2-3B-Instruct-GGUF", "displayName": "Llama-3.2-3B-Instruct-GGUF", "modelName": "Llama-3.2-3B-Instruct-Q4_0.gguf", "type": "chat", "versionCode": 1, "pluginIds": 17, "modelUrl": "https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_0.gguf" }, { "id": "Qwen3-4B-Instruct-2507-npu", "displayName": "Qwen3-4B-Instruct-2507-npu", "modelName": "files-1-1.nexa", "versionCode": 1, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/Qwen3-4B-Instruct-2507-npu-mobile/", "modelUrl": "files-1-1.nexa" }, { "id": "gpt-oss-20b-GGUF", "displayName": "gpt-oss-20b-GGUF", "modelName": "gpt-oss-20b-Q4_0.gguf", "type": "chat", "versionCode": 1, "pluginIds": 17, "modelUrl": "https://huggingface.co/unsloth/gpt-oss-20b-GGUF/resolve/main/gpt-oss-20b-Q4_0.gguf" }, { "id": "Qwen3-4B-GGUF", "displayName": "Qwen3-4B-GGUF", "modelName": "Qwen3-4B-Q4_0.gguf", "type": "chat", "versionCode": 1, "pluginIds": 17, "modelUrl": "https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q4_0.gguf" }, { "id": "Granite-4-Micro-NPU", "displayName": "Granite-4.0-Micro 3B", "modelName": "files-1-2.nexa", "versionCode": 1, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/Granite-4-Micro-NPU-mobile/", "modelUrl": "files-1-2.nexa" }, { "id": "LFM2-1.2B-npu", "displayName": "LFM2-1.2B-npu", "modelName": "files-1-2.nexa", "versionCode": 1, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/LFM2-1.2B-npu-mobile/", "modelUrl": "files-1-2.nexa" }, { "id": "LFM2.5-1.2B-mobile", "displayName": "LFM2.5-1.2B-npu", "modelName": "files-1-2.nexa", "versionCode": 1, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/LFM2.5-1.2B-mobile/", "modelUrl": "files-1-2.nexa" }, { "id": "OmniNeural-4B", "displayName": "OmniNeural-4B", "modelName": "files-1-1.nexa", "versionCode": 1, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/OmniNeural-4B-mobile/", "modelUrl": "files-1-1.nexa" }, { "id": "paddleocr-npu", "displayName": "paddleocr-npu", "modelName": "weights-1-1.nexa", "versionCode": 1, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/paddleocr-npu-mobile/", "modelUrl": "weights-1-1.nexa" }, { "id": "yolo26x-npu", "displayName": "yolo26x-npu", "modelName": "weights-1-1.nexa", "versionCode": 1, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/yolo26x-npu-mobile/", "modelUrl": "weights-1-1.nexa" }, { "id": "depth-anything-v2-npu", "displayName": "depth-anything-v2-npu", "modelName": "weights-1-1.nexa", "versionCode": 1, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/depth-anything-v2-npu-mobile/", "modelUrl": "weights-1-1.nexa" }, { "id": "embeddinggemma-300m-npu", "displayName": "embeddinggemma-300m-npu", "modelName": "files-1-2.nexa", "versionCode": 1, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/embeddinggemma-300m-npu-mobile/", "modelUrl": "files-1-2.nexa" }, { "id": "LFM2-1.2B-GGUF-GGUF", "displayName": "LFM2-1.2B-GGUF-GGUF", "modelName": "LFM2-1.2B-Q4_0.gguf", "type": "chat", "versionCode": 1, "pluginIds": 17, "modelUrl": "https://huggingface.co/LiquidAI/LFM2-1.2B-GGUF/resolve/main/LFM2-1.2B-Q4_0.gguf" }, { "id": "Llama-3.2-1B-Instruct-GGUF", "displayName": "Llama-3.2-1B-Instruct-GGUF", "modelName": "Llama-3.2-1B-Instruct-Q4_0.gguf", "type": "chat", "versionCode": 1, "pluginIds": 17, "modelUrl": "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" }, { "id": "LFM2-24B-A2B-Preview-GGUF", "displayName": "LFM2-24B-A2B-Preview-GGUF", "modelName": "LFM2-24B-A2B-Preview-Q4_0.gguf", "type": "chat", "versionCode": 1, "pluginIds": 17, "modelUrl": "https://huggingface.co/NexaAI/LFM2-24B-A2B-Preview-GGUF/resolve/main/LFM2-24B-A2B-Preview-Q4_0.gguf" } ] ================================================ FILE: bindings/android/app/src/main/java/com/nexa/demo/ChatAdapter.kt ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.nexa.demo import android.content.Intent import android.graphics.BitmapFactory import android.net.Uri import android.text.method.LinkMovementMethod import android.view.LayoutInflater import android.view.View import android.view.ViewGroup import android.widget.ImageView import android.widget.LinearLayout import android.widget.TextView import androidx.recyclerview.widget.RecyclerView import io.noties.markwon.Markwon import io.noties.markwon.ext.latex.JLatexMathPlugin import io.noties.markwon.ext.strikethrough.StrikethroughPlugin import io.noties.markwon.ext.tables.TablePlugin import io.noties.markwon.inlineparser.MarkwonInlineParserPlugin import io.noties.markwon.linkify.LinkifyPlugin import java.io.File data class Message( val content: String, val type: MessageType, val images: List = emptyList(), val audio: List = emptyList() ) enum class MessageType(val value: Int) { USER(0), ASSISTANT(1), PROFILE(2), IMAGES(3), ASSISTANT_IMAGES(4); companion object { fun from(value: Int): MessageType = entries.firstOrNull { it.value == value } ?: PROFILE } } class ChatAdapter(private val messages: List) : RecyclerView.Adapter() { override fun getItemViewType(position: Int): Int { val message = messages[position] return message.type.value } override fun onCreateViewHolder(parent: ViewGroup, viewType: Int): RecyclerView.ViewHolder { val inflater = LayoutInflater.from(parent.context) val type = MessageType.from(viewType) return when (type) { MessageType.USER -> UserViewHolder(inflater.inflate(R.layout.item_user_message, parent, false)) MessageType.ASSISTANT -> AiViewHolder(inflater.inflate(R.layout.item_ai_message, parent, false)) MessageType.IMAGES -> ImagesViewHolder(inflater.inflate(R.layout.item_image_message, parent, false)) MessageType.ASSISTANT_IMAGES -> ImagesViewHolder( inflater.inflate( R.layout.item_assistant_image_message, parent, false ) ) else -> ProfileViewHolder(inflater.inflate(R.layout.item_profile_message, parent, false)) } } override fun onBindViewHolder(holder: RecyclerView.ViewHolder, position: Int) { val message = messages[position] if (holder is UserViewHolder) holder.bind(message) if (holder is AiViewHolder) holder.bind(message) if (holder is ImagesViewHolder) holder.bind(message) if (holder is ProfileViewHolder) holder.bind(message) } override fun getItemCount() = messages.size class UserViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) { private val tvMessage: TextView = itemView.findViewById(R.id.tv_message) fun bind(message: Message) { tvMessage.text = message.content } } class AiViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) { private val tvMessage: TextView = itemView.findViewById(R.id.tv_message) private val markwon: Markwon = Markwon.builder(itemView.context) .usePlugin(StrikethroughPlugin.create()) .usePlugin(TablePlugin.create(itemView.context)) .usePlugin(LinkifyPlugin.create()) .usePlugin(MarkwonInlineParserPlugin.create()) .usePlugin(JLatexMathPlugin.create(tvMessage.textSize) { builder -> builder.inlinesEnabled(true) builder.blocksEnabled(true) }) .build() fun bind(message: Message) { markwon.setMarkdown(tvMessage, message.content.trim()) tvMessage.movementMethod = LinkMovementMethod.getInstance() } } class ProfileViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) { private val tvMessage: TextView = itemView.findViewById(R.id.tv_message) fun bind(message: Message) { tvMessage.text = message.content } private fun dpToPx(dp: Int, context: android.content.Context): Int { return (dp * context.resources.displayMetrics.density).toInt() } } class ImagesViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) { private val imageContainer: LinearLayout = itemView.findViewById(R.id.image_container) fun bind(message: Message) { val savedImageFiles = message.images imageContainer.removeAllViews() val context = itemView.context for (file in savedImageFiles) { val itemView = LayoutInflater.from(context) .inflate(R.layout.item_image_item_message, imageContainer, false) val ivImage = itemView.findViewById(R.id.iv_image) val bitmap = BitmapFactory.decodeFile(file.absolutePath) if (bitmap != null) { ivImage.setImageBitmap(bitmap) } imageContainer.addView(itemView) } } } } ================================================ FILE: bindings/android/app/src/main/java/com/nexa/demo/FileConfig.kt ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.nexa.demo import android.content.Context import java.io.File class FileConfig { companion object { val modelsDir = fun(context: Context): File { return File(context.filesDir, "models").apply { if (!exists()) mkdirs() } } } } ================================================ FILE: bindings/android/app/src/main/java/com/nexa/demo/GenerationConfigSample.kt ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.nexa.demo import com.nexa.sdk.bean.GenerationConfig import com.nexa.sdk.bean.SamplerConfig // Configuration sample for generation with defaults compatible with bridge // maxTokens: 0 = no limit, generates until model's natural stopping point data class GenerationConfigSample( var maxTokens: Int = 2048, var stopWords: List? = null, var stopCount: Int = 0, var nPast: Int = 0, var imagePaths: List? = null, var imageCount: Int = 0, var audioPaths: List? = null, var audioCount: Int = 0, var systemPrompt: String = "" ) { // Convert to GenerationConfig with minimal sampler setup for bridge compatibility // Sampler config uses bridge defaults (no custom parameters applied) fun toGenerationConfig(grammarString:String? = null): GenerationConfig { return GenerationConfig( maxTokens = this.maxTokens, stopWords = this.stopWords?.toTypedArray(), stopCount = this.stopCount, nPast = this.nPast, //samplerConfig = SamplerConfig( // grammarString = grammarString // All other sampler parameters use bridge defaults // No temperature, topK, topP, penalties applied //), imagePaths = this.imagePaths?.toTypedArray(), imageCount = this.imageCount, audioPaths = this.audioPaths?.toTypedArray(), audioCount = this.audioCount ) } } ================================================ FILE: bindings/android/app/src/main/java/com/nexa/demo/MainActivity.kt ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.nexa.demo import android.Manifest import android.app.Activity import android.content.Context import android.content.DialogInterface import android.content.Intent import android.content.SharedPreferences import android.content.pm.PackageManager import android.graphics.Bitmap import android.graphics.BitmapFactory import android.graphics.Canvas import android.graphics.Color import android.graphics.Paint import android.graphics.RectF import android.net.Uri import android.os.Bundle import android.os.Environment import android.provider.MediaStore import android.system.Os import android.util.Log import android.view.LayoutInflater import android.view.View import android.view.inputmethod.InputMethodManager import android.widget.AdapterView import android.widget.Button import android.widget.EditText import android.widget.HorizontalScrollView import android.widget.ImageButton import android.widget.ImageView import android.widget.LinearLayout import android.widget.PopupWindow import android.widget.ProgressBar import android.widget.SimpleAdapter import android.widget.Spinner import android.widget.TextView import android.widget.Toast import androidx.appcompat.app.AlertDialog import androidx.core.app.ActivityCompat import androidx.core.content.ContextCompat import androidx.core.content.FileProvider import androidx.fragment.app.FragmentActivity import androidx.recyclerview.widget.RecyclerView import com.google.android.material.bottomsheet.BottomSheetDialog import com.gyf.immersionbar.ktx.immersionBar import com.hjq.toast.Toaster import com.liulishuo.okdownload.DownloadContext import com.liulishuo.okdownload.DownloadTask import com.liulishuo.okdownload.OkDownload import com.liulishuo.okdownload.core.cause.EndCause import com.liulishuo.okdownload.core.connection.DownloadOkHttp3Connection import com.liulishuo.okdownload.kotlin.listener.createDownloadContextListener import com.liulishuo.okdownload.kotlin.listener.createListener1 import com.nexa.demo.bean.DownloadableFile import com.nexa.demo.bean.DownloadableFileWithFallback import com.nexa.demo.bean.DownloadState import com.nexa.demo.bean.ModelData import com.nexa.demo.bean.downloadableFiles import com.nexa.demo.bean.downloadableFilesWithFallback import com.nexa.demo.bean.getNexaManifest import com.nexa.demo.bean.getNonExistModelFile import com.nexa.demo.bean.getSupportPluginIds import com.nexa.demo.bean.isNpuModel import com.nexa.demo.bean.mmprojTokenFile import com.nexa.demo.bean.modelDir import com.nexa.demo.bean.modelFile import com.nexa.demo.bean.tokenFile import com.nexa.demo.bean.withFallbackUrls import com.nexa.demo.utils.ModelFileListingUtil import com.nexa.demo.databinding.ActivityMainBinding import com.nexa.demo.databinding.DialogSelectPluginIdBinding import com.nexa.demo.listeners.CustomDialogInterface import com.nexa.demo.utils.ExecShell import com.nexa.demo.utils.ImgUtil import com.nexa.demo.utils.WavRecorder import com.nexa.demo.utils.inflate import com.nexa.sdk.AsrWrapper import com.nexa.sdk.CvWrapper import com.nexa.sdk.EmbedderWrapper import com.nexa.sdk.LlmWrapper import com.nexa.sdk.NexaSdk import com.nexa.sdk.RerankerWrapper import com.nexa.sdk.VlmWrapper import com.nexa.sdk.bean.AsrCreateInput import com.nexa.sdk.bean.AsrTranscribeInput import com.nexa.sdk.bean.CVCapability import com.nexa.sdk.bean.CVCreateInput import com.nexa.sdk.bean.CVModelConfig import com.nexa.sdk.bean.ChatMessage import com.nexa.sdk.bean.EmbedderCreateInput import com.nexa.sdk.bean.EmbeddingConfig import com.nexa.sdk.bean.LlmCreateInput import com.nexa.sdk.bean.LlmStreamResult import com.nexa.sdk.bean.ModelConfig import com.nexa.sdk.bean.RerankConfig import com.nexa.sdk.bean.RerankerCreateInput import com.nexa.sdk.bean.VlmChatMessage import com.nexa.sdk.bean.VlmContent import com.nexa.sdk.bean.VlmCreateInput import com.nexa.sdk.bean.DeviceIdValue import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.async import kotlinx.coroutines.awaitAll import kotlinx.coroutines.launch import kotlinx.serialization.json.Json import okhttp3.OkHttpClient import okhttp3.Request import java.io.File import java.io.FileNotFoundException import java.io.FileOutputStream import java.security.SecureRandom import java.security.cert.CertificateException import java.security.cert.X509Certificate import javax.net.ssl.SSLContext import javax.net.ssl.SSLSession import javax.net.ssl.SSLSocketFactory import javax.net.ssl.TrustManager import javax.net.ssl.X509TrustManager class MainActivity : FragmentActivity() { private val binding: ActivityMainBinding by inflate() private var downloadContext: DownloadContext? = null private var downloadState = DownloadState.IDLE private var downloadingModelData: ModelData? = null private lateinit var spDownloaded: SharedPreferences private lateinit var llDownloading: LinearLayout private lateinit var tvDownloadProgress: TextView private lateinit var pbDownloading: ProgressBar private lateinit var spModelList: Spinner private lateinit var btnDownload: Button private lateinit var btnLoadModel: Button private lateinit var btnUnloadModel: Button private lateinit var btnStop: Button private lateinit var etInput: EditText private lateinit var btnSend: Button private lateinit var btnClearHistory: Button private lateinit var btnAddImage: Button private lateinit var btnAudioRecord: Button private lateinit var recyclerView: RecyclerView private lateinit var adapter: ChatAdapter private lateinit var bottomPanel: LinearLayout private lateinit var btnAudioDone: Button private lateinit var btnAudioCancel: Button private lateinit var scrollImages: HorizontalScrollView private lateinit var topScrollContainer: LinearLayout private lateinit var llLoading: LinearLayout private lateinit var vTip: View private lateinit var llmWrapper: LlmWrapper private lateinit var vlmWrapper: VlmWrapper var embedderWrapper: EmbedderWrapper? = null private lateinit var rerankerWrapper: RerankerWrapper private lateinit var cvWrapper: CvWrapper private lateinit var asrWrapper: AsrWrapper private val modelScope = CoroutineScope(Dispatchers.IO) private val chatList = arrayListOf() private lateinit var llmSystemPrompt: ChatMessage private val vlmChatList = arrayListOf() private lateinit var vlmSystemPrompty: VlmChatMessage private lateinit var modelList: List private var selectModelId = "" // ADD: Track which model type is loaded private var isLoadLlmModel = false private var isLoadVlmModel = false private var isLoadEmbedderModel = false private var isLoadRerankerModel = false private var isLoadCVModel = false private var isLoadAsrModel = false private var enableThinking = false private var wavRecorder: WavRecorder? = null private var audioFile: File? = null private val savedImageFiles = mutableListOf() private val messages = arrayListOf() override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) immersionBar { statusBarColorInt(Color.WHITE) statusBarDarkFont(true) } requestPermissions(arrayOf(Manifest.permission.RECORD_AUDIO), 1002) okdownload() initData() initView() setListeners() } private fun resetLoadState() { isLoadLlmModel = false isLoadVlmModel = false isLoadEmbedderModel = false isLoadRerankerModel = false isLoadCVModel = false isLoadAsrModel = false } private fun initView() { adapter = ChatAdapter(messages) binding.rvChat.adapter = adapter llDownloading = findViewById(R.id.ll_downloading) tvDownloadProgress = findViewById(R.id.tv_download_progress) pbDownloading = findViewById(R.id.pb_downloading) spModelList = findViewById(R.id.sp_model_list) spModelList.adapter = object : SimpleAdapter(this, modelList.map { val map = mutableMapOf() map["displayName"] = it.displayName map }, R.layout.item_model, arrayOf("displayName"), intArrayOf(R.id.tv_model_id)) { } spModelList.onItemSelectedListener = object : AdapterView.OnItemSelectedListener { override fun onItemSelected( parent: AdapterView<*>?, view: View?, position: Int, id: Long ) { selectModelId = modelList[position].id messages.clear() adapter.notifyDataSetChanged() binding.rvChat.scrollTo(0, 0) } override fun onNothingSelected(parent: AdapterView<*>?) { selectModelId = "" } } btnDownload = findViewById(R.id.btn_download) btnLoadModel = findViewById(R.id.btn_load_model) btnUnloadModel = findViewById(R.id.btn_unload_model) btnStop = findViewById(R.id.btn_stop) etInput = findViewById(R.id.et_input) btnAddImage = findViewById(R.id.btn_add_image) btnAudioRecord = findViewById(R.id.btn_voice) bottomPanel = findViewById(R.id.bottom_panel) btnAudioCancel = findViewById(R.id.btn_audio_cancel) btnAudioDone = findViewById(R.id.btn_audio_done) btnSend = findViewById(R.id.btn_send) btnClearHistory = findViewById(R.id.btn_clear_history) scrollImages = findViewById(R.id.scroll_images) topScrollContainer = findViewById(R.id.ll_images_container) llLoading = findViewById(R.id.ll_loading) vTip = findViewById(R.id.v_tip) btnAudioCancel.setOnClickListener { stopRecord(true) } btnAudioDone.setOnClickListener { stopRecord(false) } findViewById
Hi!
How can I help?
================================================ FILE: cookbook/PC/function-calling/main.py ================================================ # Copyright 2024-2026 Nexa AI, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. #!/usr/bin/env python3 """NexaAI VLM Function Call Demo with Google Calendar MCP""" import asyncio import json import os import sys import argparse import re from dataclasses import dataclass from typing import List, Dict, Any, Optional from nexaai import GenerationConfig, ModelConfig, VlmChatMessage, VlmContent, setup_logging from nexaai.vlm import VLM from mcp import ClientSession, StdioServerParameters from mcp.client.stdio import stdio_client def _convert_schema_property(prop_schema: Dict[str, Any]) -> Dict[str, Any]: """Recursively convert a schema property, handling nested objects.""" result = { "type": prop_schema.get("type", "string"), } if "description" in prop_schema: result["description"] = prop_schema["description"] # Handle nested objects if prop_schema.get("type") == "object" and "properties" in prop_schema: nested_props = {} nested_required = [] for nested_name, nested_schema in prop_schema["properties"].items(): nested_props[nested_name] = _convert_schema_property(nested_schema) if nested_name in prop_schema.get("required", []): nested_required.append(nested_name) result["properties"] = nested_props if nested_required: result["required"] = nested_required # Handle arrays of objects if prop_schema.get("type") == "array" and "items" in prop_schema: result["items"] = _convert_schema_property(prop_schema["items"]) return result def mcp_tool_to_openai_format(tool) -> Dict[str, Any]: """Convert MCP tool to OpenAI function calling format.""" properties = {} required = [] if tool.inputSchema and "properties" in tool.inputSchema: for prop_name, prop_schema in tool.inputSchema["properties"].items(): properties[prop_name] = _convert_schema_property(prop_schema) if tool.inputSchema.get("required") and prop_name in tool.inputSchema["required"]: required.append(prop_name) return { "type": "function", "function": { "name": tool.name, "description": tool.description or "", "parameters": { "type": "object", "properties": properties, "required": required, } } } async def get_mcp_tools(session: ClientSession) -> List[Dict[str, Any]]: """Get tools from MCP server and convert to OpenAI format.""" result = await session.list_tools() return [mcp_tool_to_openai_format(tool) for tool in result.tools] def normalize_tool_name(tool_name: str, available_tools: List[Dict[str, Any]]) -> str: """Normalize tool name to match available tools.""" name_mappings = { "create_calendar_event": "create-event", "create-event": "create-event", "list_calendar_events": "list-events", "list-events": "list-events", "update_calendar_event": "update-event", "update-event": "update-event", "delete_calendar_event": "delete-event", "delete-event": "delete-event", "get_current_time": "get-current-time", "get-current-time": "get-current-time", } tool_names = [t.get("function", {}).get("name", "") for t in available_tools] if tool_name in name_mappings: normalized = name_mappings[tool_name] if normalized in tool_names: return normalized if tool_name in tool_names: return tool_name normalized = tool_name.replace("_", "-") return normalized if normalized in tool_names else tool_name async def execute_mcp_tool(session: ClientSession, tool_name: str, arguments: Dict[str, Any], available_tools: Optional[List[Dict[str, Any]]] = None) -> str: """Execute a tool call via MCP server.""" try: if available_tools: tool_name = normalize_tool_name(tool_name, available_tools) result = await session.call_tool(tool_name, arguments=arguments) return result.model_dump_json(indent=2) except Exception as e: return f"Error: {str(e)}" def create_calendar_server(credentials: str) -> StdioServerParameters: """Create Google Calendar MCP server parameters.""" if not os.path.exists(credentials): raise FileNotFoundError( f"Credentials file not found: {credentials}\n" f"Please create the OAuth credentials file at: {os.path.abspath(credentials)}" ) return StdioServerParameters( command="npx", args=["-y", "@cocal/google-calendar-mcp"], env={"GOOGLE_OAUTH_CREDENTIALS": os.path.abspath(credentials)}, ) def extract_function_call(text: str): """Extract function call JSON from LLM response.""" if not text: return None text = re.sub(r"<\|[^|]+\|>", "", text.strip()) try: parsed = json.loads(text) if isinstance(parsed, dict) and "name" in parsed: return parsed.get("name"), parsed.get("arguments", {}) except json.JSONDecodeError: json_start = text.find('{') if json_start == -1: return None brace_count = 0 for i in range(json_start, len(text)): if text[i] == '{': brace_count += 1 elif text[i] == '}': brace_count -= 1 if brace_count == 0: json_str = text[json_start:i + 1] try: parsed = json.loads(json_str) if isinstance(parsed, dict) and "name" in parsed: return parsed.get("name"), parsed.get("arguments", {}) except json.JSONDecodeError: pass break return None def _format_nested_properties(props: Dict[str, Any], required: List[str], prefix: str = "", indent: int = 2) -> List[str]: """Recursively format nested object properties with required field indicators.""" param_list = [] indent_str = " " * indent for param_name, param_info in props.items(): param_type = param_info.get('type', 'string') param_desc = param_info.get('description', '') is_required = param_name in required full_param_name = f"{prefix}.{param_name}" if prefix else param_name # Handle nested objects if param_type == 'object' and 'properties' in param_info: nested_props = param_info.get('properties', {}) nested_required = param_info.get('required', []) req_mark = " (REQUIRED)" if is_required else "" if param_desc: param_list.append(f"{indent_str}{param_name} (object){req_mark}: {param_desc}") else: param_list.append(f"{indent_str}{param_name} (object){req_mark}") # Recursively format nested properties nested_params = _format_nested_properties( nested_props, nested_required, full_param_name, indent + 2 ) param_list.extend(nested_params) else: req_mark = " (REQUIRED)" if is_required else "" if param_desc: param_list.append(f"{indent_str}{param_name} ({param_type}){req_mark}: {param_desc}") else: param_list.append(f"{indent_str}{param_name} ({param_type}){req_mark}") return param_list def build_system_prompt(tools: list) -> str: """Build system prompt from tool schemas.""" tools_descriptions = [] for t in tools: func = t.get('function', {}) name = func.get('name', '') desc = func.get('description', '') params = func.get('parameters', {}) props = params.get('properties', {}) required = params.get('required', []) # Format parameters with nested object support param_list = _format_nested_properties(props, required) params_str = "\n".join(param_list) if param_list else " (no parameters)" # Highlight required parameters at the top required_params = [p for p in required] required_str = f"\n REQUIRED parameters: {', '.join(required_params)}" if required_params else "" tools_descriptions.append(f"{name}: {desc}{required_str}\n{params_str}") tools_list = "\n\n".join([f"{i+1}. {td}" for i, td in enumerate(tools_descriptions)]) # Add example for create-event example_json = """{ "name": "create-event", "arguments": { "calendarId": "primary", "summary": "Meeting", "start": "2025-01-01T10:00:00", "end": "2025-01-01T11:00:00" } }""" return f"""You are a calendar assistant. When the user requests calendar actions, respond with ONLY a JSON object in this format: {{"name": "function_name", "arguments": {{"param": "value"}}}} CRITICAL RULES: - You MUST include ALL required parameters (marked as REQUIRED) - For nested objects, ALL required fields within the object must be included - If a parameter is marked as REQUIRED, it cannot be omitted - Output ONLY valid JSON, no other text before or after - Use exact function and parameter names (case-sensitive) Example for create-event: {example_json} Available functions: {tools_list} IMPORTANT: Before creating events, you may need to call get-current-time first to get accurate date/time context. Always include calendarId="primary" for create-event unless specified otherwise. """ def _handle_function_call_error(error_text: str, func_args: Dict[str, Any]) -> bool: """Handle function call errors and auto-fix parameters. Returns True if should retry.""" # Auto-fix account errors if "Account" in error_text and "not found" in error_text and "Available accounts:" in error_text: match = re.search(r'Available accounts:\s*(\w+)', error_text) if match: func_args['account'] = match.group(1) return True elif 'account' in func_args: del func_args['account'] return True # Auto-fix eventId errors if "Invalid event ID" in error_text or ("event ID" in error_text.lower() and "invalid" in error_text.lower()): if 'eventId' in func_args: del func_args['eventId'] return True # Auto-remove optional parameters that cause errors if "validation error" in error_text.lower() or "invalid" in error_text.lower(): optional_params = ['account', 'eventId', 'timeZone', 'fields'] for param in optional_params: if param in func_args: del func_args[param] return True return False async def _execute_with_retry(session: ClientSession, func_name: str, func_args: Dict[str, Any], tools: List[Dict[str, Any]], max_retries: int = 3) -> str: """Execute function call with automatic error handling and retry.""" retry_count = 0 func_result = "" while retry_count <= max_retries: func_result = await execute_mcp_tool(session, func_name, func_args, tools) try: result_data = json.loads(func_result) if isinstance(func_result, str) else func_result if result_data.get('isError', False): error_text = "" if isinstance(result_data.get('content'), list): for item in result_data['content']: if item.get('type') == 'text': error_text = item.get('text', '') break if retry_count < max_retries and _handle_function_call_error(error_text, func_args): retry_count += 1 continue break except Exception: break return func_result or "" def init_vlm(tools: List[Dict[str, Any]]) -> VLM: """Initialize VLM with tools.""" system_prompt = build_system_prompt(tools) print('[debug] system_prompt:', system_prompt) return VLM.from_("NexaAI/OmniNeural-4B", config=ModelConfig( system_prompt=system_prompt, n_ctx=4096, n_threads=0, n_threads_batch=0, n_batch=0, n_ubatch=0, n_seq_max=0, n_gpu_layers=999 )) @dataclass class FunctionCallAgentResult: """Result of function call agent execution.""" func_name: Optional[str] func_result: Optional[str] response_text: str async def call_agent( vlm: VLM, session: ClientSession, tools: List[Dict[str, Any]], text: Optional[str] = None, image: Optional[str] = None, audio: Optional[str] = None ) -> FunctionCallAgentResult: if not text and not image and not audio: raise ValueError("At least one of text, image, or audio must be provided") contents = [] image_paths = [] audio_paths = [] if image: image_path = os.path.abspath(image) if not os.path.exists(image_path): raise FileNotFoundError(f"Image file not found: {image_path}") image_paths.append(image_path) contents.append(VlmContent(type="image", text=image_path)) if audio: audio_path = os.path.abspath(audio) if not os.path.exists(audio_path): raise FileNotFoundError(f"Audio file not found: {audio_path}") audio_paths.append(audio_path) contents.append(VlmContent(type="audio", text=audio_path)) if text: contents.append(VlmContent(type="text", text=text)) conversation = [VlmChatMessage(role="user", contents=contents)] # Generate initial response prompt = vlm.apply_chat_template(conversation) print('[debug] prompt:', prompt) print('[debug] generate_stream...') response_text = "" for token in vlm.generate_stream(prompt, config=GenerationConfig( max_tokens=2048, image_paths=image_paths or None, audio_paths=audio_paths or None, image_max_length=512 )): print(token, end="", flush=True) response_text += token print() print('[debug] response_text:', response_text) func_call = extract_function_call(response_text) if not func_call: print(f"[error] Failed to extract function call from response") return FunctionCallAgentResult( func_name=None, func_result=None, response_text=response_text ) func_name, func_args = func_call if func_name and isinstance(func_name, str): print('[debug] calling function:', func_name) func_result = await _execute_with_retry(session, func_name, func_args, tools) print('[debug] func_result:', func_result) # Parse function result to extract success/error message result_message = "" try: result_data = json.loads(func_result) if isinstance(func_result, str) else func_result if result_data.get('isError', False): # Extract error message if isinstance(result_data.get('content'), list): for item in result_data['content']: if item.get('type') == 'text': result_message = item.get('text', '') break else: # Extract success message or summary if isinstance(result_data.get('content'), list): for item in result_data['content']: if item.get('type') == 'text': result_message = item.get('text', '') break except Exception: result_message = str(func_result) followup = conversation + [ VlmChatMessage(role="assistant", contents=[VlmContent(type="text", text=response_text)]), VlmChatMessage(role="user", contents=[VlmContent(type="text", text=f"Function execution completed. Result: {result_message}\n\n" f"Now respond to the user in natural language. You are in RESPONSE MODE, not function calling mode.\n" f"- DO NOT output any JSON format\n" f"- DO NOT use {{}} brackets\n" f"- DO NOT call any function\n" f"- Just speak naturally like a helpful assistant\n" f"- Tell the user what happened with the calendar event in a friendly way")]) ] followup_response = "" for token in vlm.generate_stream( vlm.apply_chat_template(followup, enable_thinking=False), config=GenerationConfig(max_tokens=2048) ): followup_response += token return FunctionCallAgentResult( func_name=func_name, func_result=func_result, response_text=followup_response ) return FunctionCallAgentResult( func_name=None, func_result=None, response_text=response_text ) async def call_agent_wrapper( text: Optional[str] = None, image: Optional[str] = None, audio: Optional[str] = None, credentials: str = "gcp-oauth.keys.json" ) -> FunctionCallAgentResult: setup_logging() if not text and not image and not audio: raise ValueError("At least one of text, image, or audio must be provided") server = create_calendar_server(credentials) async with stdio_client(server) as (read, write): async with ClientSession(read, write) as session: await session.initialize() tools = await get_mcp_tools(session) tools = [t for t in tools if t.get('function', {}).get('name', '') in ['create-event', 'get-current-time']] vlm = init_vlm(tools) result = await call_agent(vlm, session, tools, text, image, audio) return result async def main(): """Command-line interface for the agent.""" setup_logging() parser = argparse.ArgumentParser() parser.add_argument("--credentials", default="gcp-oauth.keys.json") parser.add_argument("--text", help="Text input") parser.add_argument("--image", help="Image file path") parser.add_argument("--audio", help="Audio file path") args = parser.parse_args() if not args.text and not args.image and not args.audio: parser.print_help() return server = create_calendar_server(args.credentials) async with stdio_client(server) as (read, write): async with ClientSession(read, write) as session: await session.initialize() tools = await get_mcp_tools(session) tools = [t for t in tools if t.get('function', {}).get('name', '') in ['create-event', 'get-current-time']] vlm = init_vlm(tools) result = await call_agent(vlm, session, tools, args.text, args.image, args.audio) if result.response_text: print(result.response_text) if __name__ == "__main__": asyncio.run(main()) ================================================ FILE: cookbook/PC/function-calling/requirements.txt ================================================ mcp Flask[async] ================================================ FILE: cookbook/PC/live-translate/README.md ================================================ # NexaAI Live Translator Real-time speech recognition and language translation demo using NexaAI's ASR and LLM models. Supports multi-language translation with low-latency sentence-level translation. ## Features - 🎙️ **Real-time Speech Recognition**: Uses NexaAI's Parakeet ASR model for multi-language transcription - 🌐 **Instant Translation**: LLM provides fast, accurate translations - 📊 **Segment-based**: Automatic sentence/paragraph boundary detection for natural translations - 🖥️ **Modern Web UI**: Clean, responsive interface with live text display ## Requirements - Windows/Linux with Qualcomm NPU device. - Python 3.9+ ## Installation ### 1. Clone and Setup Environment ```bash cd cookbook/PC/live-translate python -m venv .venv # Windows .venv\Scripts\activate # Linux source .venv/bin/activate ``` ### 2. Install Dependencies ```bash pip install -r requirements.txt ``` ### 3. Download Models Download the required models using NexaAI CLI: ```bash nexa pull NexaAI/parakeet-tdt-0.6b-v3-npu nexa pull NexaAI/HY-MT1.5-1.8B-npu ``` ## Usage ### Quick Start ```bash python app.py ``` Then open your browser to: **http://localhost:5000** ### How to Use 1. **Select Target Language**: Choose your desired translation language from the dropdown (e.g., Chinese, English, French, etc.) 2. **Start Recording**: Click the "Start Recording" button 3. **Speak**: Speak naturally into your microphone 4. **Watch Real-time Translation**: - Left panel shows real-time transcription - Right panel shows real-time translation (auto-updated per sentence) 5. **Stop Recording**: Click "Stop Recording" to end ================================================ FILE: cookbook/PC/live-translate/app.py ================================================ import logging import atexit import threading import numpy as np from queue import Queue, Empty from typing import Optional from flask import Flask, jsonify, render_template, request from flask_cors import CORS from flask_socketio import SocketIO, emit from nexaai import LLM, GenerationConfig, SamplerConfig, ASR, setup_logging, LlmChatMessage from nexaai.asr import ASRStreamConfig logging.basicConfig(level=logging.INFO, format='[%(asctime)s] %(levelname)s: %(message)s', force=True) setup_logging(level=logging.DEBUG) logger = logging.getLogger(__name__) app = Flask(__name__, template_folder='.', static_folder='.') CORS(app) socketio = SocketIO(app, cors_allowed_origins='*', async_mode='threading', logger=False, engineio_logger=False) asr_model: Optional[ASR] = None llm_model: Optional[LLM] = None stream_managers = {} class TranslationStreamManager: def __init__(self, asr_model: ASR, llm_model: LLM, sid: str, target_language: str = 'zh'): self.asr = asr_model self.llm = llm_model self.sid = sid self.stream = None self.stream_context = None self.stream_active = False self.target_language = target_language self.audio_queue: Queue[bytes] = Queue(maxsize=50) self.audio_thread: Optional[threading.Thread] = None self.stop_event = threading.Event() self.translation_queue: Queue[Optional[str]] = Queue(maxsize=20) self.translation_thread: Optional[threading.Thread] = None self.translation_degraded = False self.last_committed_source = '' self.pending_text = '' self.stable_count = 0 def start_stream(self, target_language: str = 'zh'): self.target_language = target_language self.stream_active = True logger.info('Starting ASR stream') config = ASRStreamConfig( sample_rate=16000, chunk_duration=4.0, overlap_duration=3.5, max_queue_size=10, buffer_size=1024, timestamps='segment', beam_size=4, ) def on_transcription(text: str): try: if text and text.strip(): self.on_new_segment(text) except Exception as e: logger.error(f'Error in transcription callback: {e}', exc_info=True) socketio.emit('error', {'message': f'Transcription error: {e}'}, to=self.sid) try: self.stream_context = self.asr.stream(language='en', config=config) self.stream = self.stream_context.__enter__() self.stream.start(on_transcription=on_transcription) logger.info('ASR stream started successfully') self.stop_event.clear() self.audio_thread = threading.Thread( target=self._audio_worker, name=f'audio-worker-{self.sid}', daemon=True ) self.audio_thread.start() self.translation_thread = threading.Thread( target=self._translation_worker, name=f'translation-worker-{self.sid}', daemon=True ) self.translation_thread.start() except Exception as e: logger.error(f'Error starting stream: {e}', exc_info=True) if self.stream_context is not None: try: self.stream_context.__exit__(None, None, None) except Exception: pass self.stream = None self.stream_active = False socketio.emit('error', {'message': f'Failed to start stream: {e}'}, to=self.sid) def push_audio(self, audio_bytes: bytes): if not self.stream_active: return if len(audio_bytes) % 2 == 1: audio_bytes = audio_bytes[:-1] if not audio_bytes: return try: self.audio_queue.put_nowait(audio_bytes) except Exception as e: logger.error(f'Error enqueuing audio: {e}') socketio.emit('error', {'message': f'Audio queue error: {e}'}, to=self.sid) def _audio_worker(self): try: while not self.stop_event.is_set(): try: audio_bytes = self.audio_queue.get(timeout=0.5) except Empty: continue try: audio_array = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0 if self.stream and self.stream_active: self.stream.push_audio(audio_array.tolist()) except Exception as e: logger.error(f'Error pushing audio in worker: {e}') socketio.emit('error', {'message': f'Audio processing error: {e}'}, to=self.sid) finally: self.audio_queue.task_done() except Exception as e: logger.error(f'Audio worker crashed: {e}', exc_info=True) socketio.emit('error', {'message': f'Audio worker error: {e}'}, to=self.sid) def on_new_segment(self, segment_text: str): if not segment_text.strip(): return try: logger.info(f'New segment: {segment_text}') if segment_text == self.pending_text: self.stable_count += 1 else: self.pending_text = segment_text self.stable_count = 1 socketio.emit('transcription', {'original': segment_text}, to=self.sid) is_sentence_end = segment_text.rstrip().endswith(('.', '?', '!')) if self.stable_count < 3 and not is_sentence_end: logger.debug(f'Waiting for stabilization (count={self.stable_count}, sentence_end={is_sentence_end})') return if segment_text == self.last_committed_source: logger.debug('Skipping translation duplicate (unchanged source)') return self.last_committed_source = segment_text logger.info('Committing stabilized segment for translation') self._enqueue_translation(segment_text) except Exception as e: logger.error(f'Error processing segment: {e}', exc_info=True) socketio.emit('error', {'message': f'Segment processing error: {e}'}, to=self.sid) def _translate_text(self, text: str, target_lang: str) -> Optional[str]: try: prompt = f""" Translate the following segment into {target_lang}, without additional explanation.\n\n{text} """ prompt = self.llm.apply_chat_template([LlmChatMessage(role='user', content=prompt)], enable_thinking=False) logger.info(f'Translation prompt: {prompt}') result = self.llm.generate( prompt, GenerationConfig(max_tokens=256, sampler_config=SamplerConfig(temperature=0.3)), ) self.llm.reset() logger.info(f'Translation result: {result.full_text}') return result.full_text except Exception as e: logger.error(f'Translation error: {e}', exc_info=True) return None def _enqueue_translation(self, text: str): if self.translation_degraded: logger.error(f'Translation degraded; dropping text: {text}') return try: self.translation_queue.put_nowait(text) except Exception as e: logger.error(f'Failed to enqueue translation: {e}') def _translation_worker(self): while not self.stop_event.is_set(): try: text = self.translation_queue.get(timeout=0.5) except Empty: continue if text is None: self.translation_queue.task_done() break try: translated = self._translate_text(text, self.target_language) if translated: logger.info(f'Translated to: {translated}') socketio.emit( 'translation', {'translated': translated, 'original': text, 'language': self.target_language}, to=self.sid, ) else: socketio.emit('error', {'message': 'Translation failed'}, to=self.sid) except Exception as e: logger.error(f'Translation worker error: {e}', exc_info=True) finally: self.translation_queue.task_done() def stop_stream(self): self.stream_active = False self.stop_event.set() try: self.audio_queue.put_nowait(b'') except Exception: pass if self.audio_thread and self.audio_thread.is_alive(): self.audio_thread.join(timeout=2.0) self.audio_thread = None try: self.translation_queue.put_nowait(None) except Exception: pass if self.translation_thread and self.translation_thread.is_alive(): self.translation_thread.join(timeout=2.0) self.translation_thread = None if self.stream: try: self.stream.stop(graceful=True) self.stream = None except Exception as e: logger.error(f'Error stopping stream: {e}') if self.stream_context is not None: try: self.stream_context.__exit__(None, None, None) except Exception as e: logger.error(f'Error closing stream context: {e}') logger.info('ASR stream stopped') def initialize_models(): global asr_model, llm_model asr_model = ASR.from_(model='NexaAI/parakeet-tdt-0.6b-v3-npu') llm_model = LLM.from_(model='NexaAI/HY-MT1.5-1.8B-npu') logger.info('✓ LLM model loaded successfully') # ============================================================================ # HTTP Routes # ============================================================================ @app.route('/') def index(): return render_template('index.html') @app.route('/api/health', methods=['GET']) def health(): return jsonify( { 'status': 'ok', 'asr_loaded': asr_model is not None, 'llm_loaded': llm_model is not None, } ) @app.route('/api/translate-segment', methods=['POST']) def translate_segment(): try: data = request.get_json() text = data.get('text', '').strip() target_lang = data.get('target_lang', 'zh') if not text: return jsonify({'error': 'Empty text'}), 400 logger.info(f"REST API: Translating '{text}' to {target_lang}") if asr_model is None or llm_model is None: return jsonify({'error': 'Models not loaded'}), 500 manager = TranslationStreamManager(asr_model, llm_model, 'rest-api', target_lang) translated = manager._translate_text(text, target_lang) if translated is None: return jsonify({'error': 'Translation failed'}), 500 return jsonify( { 'original': text, 'translated': translated, 'target_lang': target_lang, } ) except Exception as e: logger.error(f'Error in translate_segment: {e}', exc_info=True) return jsonify({'error': str(e)}), 500 # ============================================================================ # WebSocket Events # ============================================================================ @socketio.on('connect') def handle_connect(): emit('connect', {'data': 'Connected to translation server'}) @socketio.on('disconnect') def handle_disconnect(): sid = request.sid logger.info(f'Client disconnected: {sid}') if sid in stream_managers: stream_managers[sid].stop_stream() del stream_managers[sid] @socketio.on('start_stream') def handle_start_stream(data): sid = request.sid if isinstance(data, dict): target_language = data.get('target_lang', 'zh') else: target_language = 'zh' try: logger.info(f'[{sid}] start_stream event received, target_language: {target_language}') logger.debug(f'[{sid}] asr_model loaded: {asr_model is not None}, llm_model loaded: {llm_model is not None}') if asr_model and llm_model: logger.info(f'[{sid}] Creating TranslationStreamManager...') manager = TranslationStreamManager(asr_model, llm_model, sid, target_language) logger.info(f'[{sid}] Starting ASR stream...') manager.start_stream(target_language) stream_managers[sid] = manager logger.info(f'[{sid}] Stream registered in stream_managers, total streams: {len(stream_managers)}') emit( 'stream_started', { 'status': 'ok', 'target_language': target_language, }, ) logger.info(f'[{sid}] stream_started event emitted') else: logger.error( f'[{sid}] Models not loaded! asr_model={asr_model is not None}, llm_model={llm_model is not None}' ) emit('error', {'message': 'Models not loaded'}) except Exception as e: logger.error(f'[{sid}] Error starting stream: {e}', exc_info=True) emit('error', {'message': f'Failed to start stream: {e}'}) @socketio.on('audio_chunk') def handle_audio_chunk(data): sid = request.sid if sid not in stream_managers: logger.warning( f'[{sid}] Received audio_chunk but stream not started. Active streams: {list(stream_managers.keys())}' ) emit('error', {'message': 'Stream not started. Click "Start Recording" first.'}) return try: if isinstance(data, (bytes, bytearray)): audio_bytes = bytes(data) else: raise ValueError(f'Unsupported audio payload type: {type(data)}') if not audio_bytes: raise ValueError('Empty audio payload') if len(audio_bytes) % 2 == 1: audio_bytes = audio_bytes[:-1] logger.debug(f'[{sid}] Received audio chunk: {len(audio_bytes)} bytes') stream_managers[sid].push_audio(audio_bytes) except Exception as e: logger.error(f'[{sid}] Error processing audio chunk: {e}', exc_info=True) emit('error', {'message': f'Audio processing error: {e}'}) @socketio.on('stop_stream') def handle_stop_stream(): sid = request.sid if sid and sid in stream_managers: stream_managers[sid].stop_stream() del stream_managers[sid] emit('stream_stopped', {'status': 'ok'}) logger.info(f'[{sid}] Stream stopped') def cleanup_models(): global asr_model, llm_model logger.info('Cleaning up models...') if asr_model: try: del asr_model asr_model = None except Exception as e: logger.error(f'Error cleaning up ASR model: {e}') if llm_model: try: del llm_model llm_model = None except Exception as e: logger.error(f'Error cleaning up LLM model: {e}') logger.info('Models cleaned up') if __name__ == '__main__': logger.info('=' * 80) logger.info('Starting NexaAI Live Translator...') logger.info('=' * 80) atexit.register(cleanup_models) initialize_models() logger.info('✓ Starting Flask+SocketIO server on http://127.0.0.1:5000') logger.info('=' * 80) socketio.run( app, host='127.0.0.1', port=5000, ) ================================================ FILE: cookbook/PC/live-translate/index.html ================================================ NexaAI Live Translator

🎙️ NexaAI Live Translator

Real-time speech recognition and translation

Original
Translation
================================================ FILE: cookbook/PC/live-translate/requirements.txt ================================================ nexaai>=v1.0.44-rc1 Flask>=2.3.0 flask-cors>=4.0.0 flask-socketio>=5.3.0 python-socketio>=5.9.0 numpy>=1.24.0 jsonify>=0.5 ================================================ FILE: cookbook/README.md ================================================ # Nexa SDK Cookbook This repository contains comprehensive demos and examples for Nexa SDK across different platforms: **PC (Python)**, **Android**, and **Linux**. --- ## 🖥️ PC Demos (Python Bindings) All PC demos are located in the [`PC/`](./PC) directory and run on **Windows (x64/ARM64 Snapdragon)**, **macOS**, and **Linux**. ### 🤖 Agent-Granite Lightweight on-device AI assistant with function calling (web search) using Granite-4-Micro model. Includes Gradio interface for local agentic workflows. - [Python-Binding-Example](./PC/Agent-Granite/Python-Binding-Example) - [Serve-Example](./PC/Agent-Granite/Serve-Example) ### 🔧 Function-Calling Function calling capabilities with NexaAI VLM model, integrated with Google Calendar via MCP protocol. Supports multi-modal input (text, image, audio) with Web UI and CLI interfaces. - [Demo](./PC/function-calling) ### 📚 RAG-LLM End-to-end Retrieval-Augmented Generation pipeline with embeddings, reranking, and generation models. Query your own documents (PDFs, Word, text) locally on device. - [Python-Binding-Example](./PC/RAG-LLM/Python-Binding-Example) - [Serve-Example](./PC/RAG-LLM/Serve-Example) ### 🖼️ Multimodal-Qwen3VL Vision-Language Model (VLM) with structured JSON outputs, multi-image input, and custom system prompt control. - [Python-Binding-Example](./PC/Multimodal-Qwen3VL/Python-Binding-Example) ### 🔍 RAG-VLM Lightweight RAG system with Qwen3VL multimodal model powered by Nexa Serve. Supports PDFs, Word docs, text files, and images. Includes CLI and Gradio UI. - [Demo](./PC/RAG-VLM) ### 🌐 Web-Agent-Qwen3VL Local multimodal LLM-driven browser automation using Qwen3-VL. Enables autonomous web navigation, searches, and complex web tasks. - [Demo](./PC/Web-Agent-Qwen3VL) --- ## 📱 Android Demos For Android SDK demos and examples, please refer to [`android/`](./android). --- ## 🐧 Linux Demos Linux-specific demos coming soon in the [`linux/`](./linux) directory. ================================================ FILE: cookbook/README_zh.md ================================================ # Nexa SDK 使用指南 本仓库包含 Nexa SDK 在不同平台的完整演示和示例:**PC (Python)**、**Android** 和 **Linux**。 --- ## 📁 目录结构 - **`PC/`** - 适用于 Windows、macOS 和 Linux 的 Python 绑定演示 - **`android/`** - Android 平台专属演示(即将推出,Android SDK 请参考 [bindings/android](../bindings/android)) - **`linux/`** - Linux 专属演示(即将推出) --- ## 🖥️ PC 演示(Python 绑定) 所有 PC 演示位于 [`PC/`](./PC) 目录,支持 **Windows (x64/ARM64 Snapdragon)**、**macOS** 和 **Linux**。 ### 🤖 Agent-Granite 轻量级本地 AI 助手,支持函数调用(网页搜索),基于 Granite-4-Micro 模型。包含 Gradio 界面,可实现本地 Agent 流程。 - [Python绑定示例](./PC/Agent-Granite/Python-Binding-Example) - [服务示例](./PC/Agent-Granite/Serve-Example) ### 🔧 函数调用 使用 NexaAI VLM 模型实现函数调用能力,并通过 MCP 协议集成 Google 日历。支持多模态输入(文本、图片、音频),包含 Web UI 和 CLI 界面。 - [演示](./PC/function-calling) ### 📚 RAG-LLM 端到端的检索增强生成(RAG)流程,集成嵌入、重排序与生成模型。可在本地查询 PDF、Word、文本等文档。 - [Python绑定示例](./PC/RAG-LLM/Python-Binding-Example) - [服务示例](./PC/RAG-LLM/Serve-Example) ### 🖼️ 多模态-Qwen3VL 视觉语言模型(VLM),支持结构化 JSON 输出、多图片输入及自定义系统提示。 - [Python绑定示例](./PC/Multimodal-Qwen3VL/Python-Binding-Example) ### 🔍 RAG-VLM 采用 Qwen3VL 多模态模型的轻量级 RAG 系统,由 Nexa Serve 驱动。支持 PDF、Word、文本、图片。带有 CLI 和 Gradio UI。 - [演示](./PC/RAG-VLM) ### 🌐 Web-Agent-Qwen3VL 本地多模态 LLM 驱动的网页自动化,基于 Qwen3-VL。支持自主网页导航、搜索及复杂网络任务。 - [演示](./PC/Web-Agent-Qwen3VL) --- ## 📱 Android 演示 Android SDK 演示和示例请参考 [`bindings/android`](../bindings/android)。 --- ## 🐧 Linux 演示 Linux 专属演示即将在 [`linux/`](./linux) 目录推出。 --- ## 🔒 隐私优先 **所有演示均在本地运行** — 数据不离开你的设备。 ================================================ FILE: cookbook/ag2/README.md ================================================ # AG2 + Nexa SDK Examples Practical examples of integrating Nexa SDK with AG2 (formerly AutoGen). ## Prerequisites - Python 3.10+ - [Nexa SDK installed](../../README.md#installation) ## Quick Start Simple agent creation and multi-agent conversation with Nexa SDK. 1. Run nexa serve in separate terminal. ```bash nexa pull NexaAI/Qwen3-4B-GGUF nexa serve ``` 2. Install dependencies. ``` pip install -r requirements.txt ``` 3. Run the example script. ```bash python example.py ``` ================================================ FILE: cookbook/ag2/example.py ================================================ #!/usr/bin/env python3 """ Basic agent example with Nexa SDK and AG2. Demonstrates: - Configuring AG2 with Nexa SDK's OpenAI-compatible endpoint - Creating a single conversable agent - Running a two-agent conversation """ from autogen import ConversableAgent, LLMConfig def create_llm_config(): """Create LLM config pointing to Nexa's OpenAI-compatible endpoint.""" return LLMConfig({ "model": "NexaAI/Qwen3-4B-GGUF", "base_url": "http://localhost:18181/v1", "api_type": "openai", "api_key": "not-needed", }) def single_agent_example(): """Example of a single conversable agent.""" print("=" * 50) print("Single Agent Example") print("=" * 50) llm_config = create_llm_config() agent = ConversableAgent( name="helpful_agent", system_message="You are a helpful AI assistant that gives concise answers.", llm_config=llm_config, ) print("\nRunning agent...") response = agent.run( message="Write a Python function to calculate the factorial of a number.", max_turns=3, ) response.process() for msg in response.messages: print(f"\n[{msg.get('role', 'unknown')}]: {msg.get('content', '')}") def two_agent_example(): """Example of a two-agent conversation.""" print("\n" + "=" * 50) print("Two-Agent Conversation Example") print("=" * 50) llm_config = create_llm_config() # Create a coding assistant coder = ConversableAgent( name="coder", system_message=( "You are a Python developer. Write clean, well-documented code. " "Reply TERMINATE when the task is complete." ), llm_config=llm_config, ) # Create a code reviewer reviewer = ConversableAgent( name="reviewer", system_message=( "You are a code reviewer. Review the code for correctness, style, " "and best practices. Reply TERMINATE when the review is done." ), llm_config=llm_config, ) print("\nStarting two-agent conversation...") result = coder.initiate_chat( recipient=reviewer, message="Write a Python function to check if a string is a palindrome.", max_turns=3, ) print(f"\nConversation summary:\n{result.summary}") def main(): """Run all agent examples.""" print("\n" + "=" * 50) print("AG2 + Nexa SDK Basic Agent Examples") print("=" * 50 + "\n") single_agent_example() two_agent_example() print("\n" + "=" * 50) print("All examples completed!") print("=" * 50) if __name__ == "__main__": main() ================================================ FILE: cookbook/ag2/requirements.txt ================================================ ag2[openai]>=0.8.0 ================================================ FILE: cookbook/android/RAG-LLM/.gitignore ================================================ # Gradle files .gradle/ build/ arm64-v8a/ .DS_Store # Local configuration file (sdk path, etc) local.properties # Log/OS Files *.log # Android Studio generated files and folders captures/ .externalNativeBuild/ .cxx/ *.aab *.apk output-metadata.json # IntelliJ *.iml .idea/ misc.xml deploymentTargetDropDown.xml render.experimental.xml # Keystore files *.jks *.keystore # Google Services (e.g. APIs or Firebase) google-services.json # Android Profiling *.hprof app/libs/app-release.aar app/libs/app-debug.aar app/release/ repo/ ================================================ FILE: cookbook/android/RAG-LLM/README.md ================================================ # Nexa Android SDK Demo App ## Overview The Nexa AI Android SDK enables on-device AI inference for Android applications with NPU acceleration. Run Large Language Models (LLMs), Vision-Language Models (VLMs), Embeddings, Speech Recognition (ASR), Reranking, and Computer Vision models on Android devices with support for NPU, GPU, and CPU inference. Follow [Android SDK Doc](https://docs.nexa.ai/en/nexa-sdk-android/overview) for more details. This folder contains the demo app for the Android SDK. ## Device Compatibility ### Supported Hardware - **NPU**: Qualcomm Snapdragon 8 Gen 4 (optimized) - **GPU**: Qualcomm Adreno GPU - **CPU**: ARM64-v8a - **RAM**: 4GB+ recommended - **Storage**: 100MB - 4GB (varies by model) ### Minimum Requirements - Android API Level 27+ (Android 8.1 Oreo) - **Architecture**: ARM64-v8a - **Android SDK Version**: 27+ ## Build and Run 1. Clone the repository ```bash git clone https://github.com/NexaAI/nexa-sdk/ ``` 2. Open this folder `bindings/android` in Android Studio 3. Follow the instructions in [Android SDK Doc](https://docs.nexa.ai/en/nexa-sdk-android/overview), download model (For example, [`OmniNeural-4B`](https://huggingface.co/NexaAI/OmniNeural-4B)) and put it in App's assets folder (For example, `/data/data/com.nexa.demo/files/models/omni-neural-4b `). 4. Build and run the app ================================================ FILE: cookbook/android/RAG-LLM/app/.gitignore ================================================ /build /src/main/assets/model_list_backup.json /src/main/assets/model_list_local.json ================================================ FILE: cookbook/android/RAG-LLM/app/build.gradle.kts ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. plugins { alias(libs.plugins.android.application) alias(libs.plugins.kotlin.android) alias(libs.plugins.kotlin.compose) id("org.jetbrains.kotlin.plugin.serialization") version "1.9.23" } android { namespace = "com.nexa.demo" compileSdk = 36 signingConfigs { create("release") { storeFile = file("test") storePassword = "123456" keyAlias = "test" keyPassword = "123456" } } defaultConfig { applicationId = "com.nexa.demo" minSdk = 27 targetSdk = 36 versionCode = 1 versionName = "1.0" testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" } buildTypes { release { isMinifyEnabled = false applicationIdSuffix = ".rag" proguardFiles( getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro" ) signingConfig = signingConfigs.getByName("release") } debug { isMinifyEnabled = false applicationIdSuffix = ".rag" proguardFiles( getDefaultProguardFile("proguard-android-optimize.txt"), "proguard-rules.pro" ) signingConfig = signingConfigs.getByName("release") } } compileOptions { sourceCompatibility = JavaVersion.VERSION_11 targetCompatibility = JavaVersion.VERSION_11 } kotlinOptions { jvmTarget = "11" } // sourceSets { // getByName("main") { // jniLibs.srcDirs("src/main/jniLibs") // } // } packagingOptions { jniLibs.useLegacyPackaging = true } buildFeatures { viewBinding = true dataBinding = true compose = true buildConfig = true } } val bridgePathExist = gradle.extra["bridgePathExist"] as Boolean print("bridgePathExist: $bridgePathExist\n") dependencies { // ===== NEXA CLOUD SDK ===== implementation("ai.nexa:core:0.0.22") // ===== NEXA CLOUD SDK END ===== implementation(project(":transform")) implementation(":okdownload-core@aar") implementation(":okdownload-sqlite@aar") implementation(":okdownload-okhttp@aar") implementation(":okdownload-ktx@aar") implementation(kotlin("reflect")) implementation(libs.glide) implementation(libs.gson) implementation(libs.markwon.core) implementation(libs.markwon.strikethrough) implementation(libs.markwon.tables) implementation(libs.markwon.linkify) implementation(libs.recyclerview) implementation(libs.toaster) implementation(libs.material) implementation(libs.imm.bar) implementation(libs.imm.bar.ktx) implementation(libs.auto.size) implementation(libs.okhttp) implementation(libs.kotlinx.serialization.json) implementation(libs.androidx.core.ktx) implementation(libs.androidx.lifecycle.runtime.ktx) implementation(libs.androidx.activity.compose) implementation(platform(libs.androidx.compose.bom)) implementation(libs.androidx.ui) implementation(libs.androidx.ui.graphics) implementation(libs.androidx.ui.tooling.preview) implementation(libs.androidx.material3) implementation(libs.androidx.appcompat) implementation(libs.androidx.activity) implementation(libs.androidx.constraintlayout) testImplementation(libs.junit) androidTestImplementation(libs.androidx.junit) androidTestImplementation(libs.androidx.espresso.core) androidTestImplementation(platform(libs.androidx.compose.bom)) androidTestImplementation(libs.androidx.ui.test.junit4) debugImplementation(libs.androidx.ui.tooling) debugImplementation(libs.androidx.ui.test.manifest) } ================================================ FILE: cookbook/android/RAG-LLM/app/proguard-rules.pro ================================================ # Copyright 2024-2026 Nexa AI, Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Add project specific ProGuard rules here. # You can control the set of applied configuration files using the # proguardFiles setting in build.gradle. # # For more details, see # http://developer.android.com/guide/developing/tools/proguard.html # If your project uses WebView with JS, uncomment the following # and specify the fully qualified class name to the JavaScript interface # class: #-keepclassmembers class fqcn.of.javascript.interface.for.webview { # public *; #} # Uncomment this to preserve the line number information for # debugging stack traces. #-keepattributes SourceFile,LineNumberTable # If you keep the line number information, uncomment this to # hide the original source file name. #-renamesourcefileattribute SourceFile ================================================ FILE: cookbook/android/RAG-LLM/app/src/androidTest/java/com/nexa/demo/ExampleInstrumentedTest.kt ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.nexa.demo import androidx.test.platform.app.InstrumentationRegistry import androidx.test.ext.junit.runners.AndroidJUnit4 import org.junit.Test import org.junit.runner.RunWith import org.junit.Assert.* /** * Instrumented test, which will execute on an Android device. * * See [testing documentation](http://d.android.com/tools/testing). */ @RunWith(AndroidJUnit4::class) class ExampleInstrumentedTest { @Test fun useAppContext() { // Context of the app under test. val appContext = InstrumentationRegistry.getInstrumentation().targetContext assertEquals("com.nexa.demo", appContext.packageName) } } ================================================ FILE: cookbook/android/RAG-LLM/app/src/main/AndroidManifest.xml ================================================ ================================================ FILE: cookbook/android/RAG-LLM/app/src/main/assets/model_list.json ================================================ [ { "id": "Llama3.2-3B-NPU-Turbo-NPU-mobile", "displayName": "Llama3.2-3B", "modelName": "files-1-2.nexa", "versionCode": 1, "pluginIds": 273, "dependencies": ["Llama-3.2-3B-Instruct-Q4_0"], "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/Llama3.2-3B-NPU-Turbo-NPU-mobile/", "modelUrl": "files-1-2.nexa" }, { "id": "Llama-3.2-3B-Instruct-Q4_0", "displayName": "Llama3.2-3B Q40 GGUF", "modelName": "Llama-3.2-3B-Instruct-Q4_0.gguf", "type": "chat", "versionCode": 1, "pluginIds": 17, "show": false, "dependencies": ["embeddinggemma-300m-npu-mobile"], "modelUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/Llama-3.2-3B-Instruct-GGUF/Llama-3.2-3B-Instruct-Q4_0.gguf" }, { "id": "Qwen3-4B-Instruct-2507-npu", "displayName": "Qwen3-4B", "modelName": "files-1-2.nexa", "versionCode": 1, "pluginIds": 273, "dependencies": ["Qwen3-4B-Q4_0"], "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/Qwen3-4B-Instruct-2507-npu-mobile/", "modelUrl": "files-1-2.nexa" }, { "id": "Qwen3-4B-Q4_0", "displayName": "Qwen3-4B Q40 GGUF", "modelName": "Qwen3-4B-Q4_0.gguf", "type": "chat", "versionCode": 1, "pluginIds": 17, "show": false, "dependencies": ["embeddinggemma-300m-npu-mobile"], "modelUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/Qwen3-4B-GGUF/Qwen3-4B-Q4_0.gguf" }, { "id": "Granite-4-Micro-NPU-mobile", "displayName": "Granite4-Micro(3B)", "modelName": "files-1-2.nexa", "versionCode": 1, "pluginIds": 273, "dependencies": ["granite-4.0-micro-Q4_0"], "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/Granite-4-Micro-NPU-mobile/", "modelUrl": "files-1-2.nexa" }, { "id": "granite-4.0-micro-Q4_0", "displayName": "Granite4-Micro(3B) Q40 GGUF", "modelName": "granite-4.0-micro-Q4_0.gguf", "type": "chat", "versionCode": 1, "show": false, "dependencies": ["embeddinggemma-300m-npu-mobile"], "modelUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/granite-4.0-micro-GGUF/granite-4.0-micro-Q4_0.gguf" }, { "id": "Granite-4.0-h-350M-NPU-mobile", "displayName": "Granite4-350M", "modelName": "files-1-2.nexa", "versionCode": 1, "pluginIds": 273, "dependencies": ["granite-4.0-350m-Q4_0"], "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/Granite-4.0-h-350M-NPU-mobile/", "modelUrl": "files-1-2.nexa" }, { "id": "granite-4.0-350m-Q4_0", "displayName": "Granite4-350M Q40 GGUF", "modelName": "granite-4.0-350m-Q4_0.gguf", "type": "chat", "versionCode": 1, "show": false, "dependencies": ["embeddinggemma-300m-npu-mobile"], "modelUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/granite-4.0-350m-GGUF/granite-4.0-350m-Q4_0.gguf" }, { "id": "LFM2-1.2B-npu-mobile", "displayName": "LFM2-1.2B", "modelName": "files-1-2.nexa", "versionCode": 1, "pluginIds": 273, "dependencies": ["LFM2-1.2B-Q4_0"], "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/LFM2-1.2B-npu-mobile/", "modelUrl": "files-1-2.nexa" }, { "id": "LFM2-1.2B-Q4_0", "displayName": "LFM2-1.2B Q40 GGUF", "modelName": "LFM2-1.2B-Q4_0.gguf", "type": "chat", "versionCode": 1, "pluginIds": 17, "show": false, "dependencies": ["embeddinggemma-300m-npu-mobile"], "modelUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/LFM2-1.2B-GGUF/LFM2-1.2B-Q4_0.gguf" }, { "id": "embeddinggemma-300m-npu-mobile", "displayName": "embed-gemma", "modelName": "files-1-2.nexa", "versionCode": 1, "show": false, "baseUrl": "https://nexa-model-hub-bucket.s3.us-west-1.amazonaws.com/public/nexa_sdk/huggingface-models/embeddinggemma-300m-npu-mobile/", "modelUrl": "files-1-2.nexa" } ] ================================================ FILE: cookbook/android/RAG-LLM/app/src/main/java/com/nexa/demo/ChatAdapter.kt ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.nexa.demo import android.content.Intent import android.net.Uri import android.text.TextUtils import android.text.method.LinkMovementMethod import android.view.LayoutInflater import android.view.View import android.view.ViewGroup import android.widget.Button import android.widget.ImageView import android.widget.LinearLayout import android.widget.TextView import androidx.recyclerview.widget.RecyclerView import com.nexa.demo.activity.FileContentActivity import com.nexa.demo.bean.EmbedResultBean import io.noties.markwon.Markwon import io.noties.markwon.ext.strikethrough.StrikethroughPlugin import io.noties.markwon.ext.tables.TablePlugin import io.noties.markwon.linkify.LinkifyPlugin import java.io.File data class Message( val content: String, val type: MessageType, val images: List = emptyList(), val audio: List = emptyList(), val embedResultBean: EmbedResultBean? = null, // Deprecated, keeping for compatibility val retrievedChunks: List = emptyList(), // Store multiple retrieved chunks val formattedPrompt: String? = null // Store the full LLM prompt ) enum class MessageType(val value: Int) { USER(0), ASSISTANT(1), PROFILE(2), IMAGES(3); companion object { fun from(value: Int): MessageType = entries.firstOrNull { it.value == value } ?: PROFILE } } interface OnCitationsClick { fun onClick(position: Int, retrievedChunks: List) } class ChatAdapter(private val messages: List, private val onCitationsClick: OnCitationsClick) : RecyclerView.Adapter() { override fun getItemViewType(position: Int): Int { val message = messages[position] return message.type.value } override fun onCreateViewHolder(parent: ViewGroup, viewType: Int): RecyclerView.ViewHolder { val inflater = LayoutInflater.from(parent.context) val type = MessageType.from(viewType) return if (type == MessageType.USER) { UserViewHolder(inflater.inflate(R.layout.item_user_message, parent, false)) } else if (type == MessageType.ASSISTANT) { AiViewHolder(inflater.inflate(R.layout.item_ai_message, parent, false)) } else if (type == MessageType.IMAGES) { ImagesViewHolder(inflater.inflate(R.layout.item_image_message, parent, false)) } else { ProfileViewHolder(inflater.inflate(R.layout.item_profile_message, parent, false)) } } override fun onBindViewHolder(holder: RecyclerView.ViewHolder, position: Int) { val message = messages[position] if (holder is UserViewHolder) holder.bind(message) if (holder is AiViewHolder) holder.bind(message) if (holder is ImagesViewHolder) holder.bind(message) if (holder is ProfileViewHolder) holder.bind(position, message, onCitationsClick) } override fun getItemCount() = messages.size class UserViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) { private val tvMessage: TextView = itemView.findViewById(R.id.tv_message) fun bind(message: Message) { tvMessage.text = message.content } } class AiViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) { private val tvMessage: TextView = itemView.findViewById(R.id.tv_message) private val markwon: Markwon = Markwon.builder(itemView.context) .usePlugin(StrikethroughPlugin.create()) .usePlugin(TablePlugin.create(itemView.context)) .usePlugin(LinkifyPlugin.create()) .build() fun bind(message: Message) { markwon.setMarkdown(tvMessage, message.content) tvMessage.movementMethod = LinkMovementMethod.getInstance() } } class ProfileViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) { private val tvMessage: TextView = itemView.findViewById(R.id.tv_message) private val btnCitations: Button = itemView.findViewById(R.id.btn_citations) private val btnViewPrompt: Button = itemView.findViewById(R.id.btn_view_prompt) fun bind(position: Int, message: Message, onCitationsClick: OnCitationsClick) { tvMessage.text = message.content // Handle multiple citations if (message.retrievedChunks.isNotEmpty()) { btnCitations.text = "${message.retrievedChunks.size} Citations" btnCitations.visibility = View.VISIBLE btnCitations.setOnClickListener { onCitationsClick.onClick(position, message.retrievedChunks) } } else { btnCitations.visibility = View.GONE } // Handle view prompt button if (!TextUtils.isEmpty(message.formattedPrompt)) { btnViewPrompt.setOnClickListener { it.context.startActivity( Intent( it.context, FileContentActivity::class.java ).apply { this.putExtra( FileContentActivity.KEY_PROMPT_CONTENT, message.formattedPrompt ) }) } btnViewPrompt.visibility = View.VISIBLE } else { btnViewPrompt.visibility = View.GONE } } private fun dpToPx(dp: Int, context: android.content.Context): Int { return (dp * context.resources.displayMetrics.density).toInt() } } class ImagesViewHolder(itemView: View) : RecyclerView.ViewHolder(itemView) { private val imageContainer: LinearLayout = itemView.findViewById(R.id.image_container) fun bind(message: Message) { val savedImageFiles = message.images imageContainer.removeAllViews() val context = itemView.context for (file in savedImageFiles) { val itemView = LayoutInflater.from(context) .inflate(R.layout.item_image_item_message, imageContainer, false) val ivImage = itemView.findViewById(R.id.iv_image) ivImage.setImageURI(Uri.fromFile(file)) imageContainer.addView(itemView) } } } } ================================================ FILE: cookbook/android/RAG-LLM/app/src/main/java/com/nexa/demo/FileConfig.kt ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.nexa.demo import android.content.Context import java.io.File class FileConfig { companion object { val modelsDir = fun(context: Context): File { return File(context.filesDir, "models").apply { if (!exists()) mkdirs() } } } } ================================================ FILE: cookbook/android/RAG-LLM/app/src/main/java/com/nexa/demo/GenerationConfigSample.kt ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.nexa.demo import com.nexa.sdk.bean.GenerationConfig import com.nexa.sdk.bean.SamplerConfig // Configuration sample for generation with defaults compatible with bridge // maxTokens: 0 = no limit, generates until model's natural stopping point data class GenerationConfigSample( var maxTokens: Int = 2048, var stopWords: List? = null, var stopCount: Int = 0, var nPast: Int = 0, var imagePaths: List? = null, var imageCount: Int = 0, var audioPaths: List? = null, var audioCount: Int = 0, var systemPrompt: String = "" ) { // Convert to GenerationConfig with minimal sampler setup for bridge compatibility // Sampler config uses bridge defaults (no custom parameters applied) fun toGenerationConfig(grammarString:String? = null): GenerationConfig { return GenerationConfig( maxTokens = this.maxTokens, stopWords = this.stopWords?.toTypedArray(), stopCount = this.stopCount, nPast = this.nPast, //samplerConfig = SamplerConfig( // grammarString = grammarString // All other sampler parameters use bridge defaults // No temperature, topK, topP, penalties applied //), imagePaths = this.imagePaths?.toTypedArray(), imageCount = this.imageCount, audioPaths = this.audioPaths?.toTypedArray(), audioCount = this.audioCount ) } } ================================================ FILE: cookbook/android/RAG-LLM/app/src/main/java/com/nexa/demo/MainActivity.kt ================================================ // Copyright 2024-2026 Nexa AI, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.nexa.demo import android.Manifest import android.app.Activity import android.content.Context import android.content.DialogInterface import android.content.Intent import android.content.SharedPreferences import android.content.pm.PackageManager import android.graphics.Bitmap import android.graphics.BitmapFactory import android.graphics.Color import android.net.Uri import android.os.Bundle import android.os.Environment import android.provider.MediaStore import android.system.Os import android.text.Editable import android.text.TextUtils import android.text.TextWatcher import android.util.Log import android.view.KeyEvent import android.view.LayoutInflater import android.view.View import android.view.inputmethod.InputMethodManager import android.widget.AdapterView import android.widget.Button import android.widget.EditText import android.widget.HorizontalScrollView import android.widget.ImageButton import android.widget.ImageView import android.widget.LinearLayout import android.widget.PopupWindow import android.widget.ProgressBar import android.widget.SeekBar import android.widget.BaseAdapter import android.widget.SimpleAdapter import android.widget.Spinner import android.widget.TextView import android.widget.Toast import android.window.OnBackInvokedDispatcher import androidx.activity.ComponentActivity import androidx.activity.OnBackPressedCallback import androidx.activity.result.ActivityResultLauncher import androidx.activity.result.contract.ActivityResultContracts import androidx.appcompat.app.AlertDialog import androidx.compose.ui.unit.TextUnit import androidx.core.app.ActivityCompat import androidx.core.content.ContextCompat import androidx.core.content.FileProvider import androidx.core.view.ViewCompat import androidx.core.view.WindowInsetsCompat import androidx.fragment.app.Fragment import androidx.fragment.app.FragmentActivity import androidx.recyclerview.widget.RecyclerView import com.google.android.material.bottomsheet.BottomSheetDialog import com.gyf.immersionbar.ktx.immersionBar import com.hjq.toast.Toaster import com.liulishuo.okdownload.DownloadContext import com.liulishuo.okdownload.DownloadTask import com.liulishuo.okdownload.OkDownload import com.liulishuo.okdownload.core.cause.EndCause import com.liulishuo.okdownload.core.connection.DownloadOkHttp3Connection import com.liulishuo.okdownload.kotlin.listener.createDownloadContextListener import com.liulishuo.okdownload.kotlin.listener.createListener1 import com.nexa.demo.activity.FolderActivity import com.nexa.demo.adapter.ChunkAdapter import com.nexa.demo.bean.DownloadState import com.nexa.demo.bean.EmbedResultBean import com.nexa.demo.bean.ModelData import com.nexa.demo.bean.DownloadableFileWithFallback import com.nexa.demo.bean.downloadableFiles import com.nexa.demo.bean.downloadableFilesWithFallback import com.nexa.demo.bean.downloadableFilesWithNpuList import com.nexa.demo.bean.getNexaManifest import com.nexa.demo.bean.getNonExistModelFile import com.nexa.demo.bean.getSupportPluginIds import com.nexa.demo.bean.allModelFilesExist import com.nexa.demo.bean.isDownloaded import com.nexa.demo.bean.isNpuModel import com.nexa.demo.bean.withFallbackUrls import com.nexa.demo.utils.ModelFileListingUtil import com.nexa.demo.bean.mmprojTokenFile import com.nexa.demo.bean.modelDir import com.nexa.demo.bean.modelFile import com.nexa.demo.bean.tokenFile import com.nexa.demo.databinding.ActivityMainBinding import com.nexa.demo.databinding.DialogConfigBinding import com.nexa.demo.databinding.DialogSelectPluginIdBinding import com.nexa.demo.databinding.DialogTopkConfigBinding import com.nexa.demo.fragments.IndexFragment import com.nexa.demo.listeners.CustomDialogInterface import com.nexa.demo.utils.DensityUtil import com.nexa.demo.utils.ExecShell import com.nexa.demo.utils.GenerateEmbedStringsUtil import com.nexa.demo.utils.ImgUtil import com.nexa.demo.utils.PermissionUtil import com.nexa.demo.utils.SharePreferenceKeys import com.nexa.demo.utils.WavRecorder import com.nexa.demo.utils.inflate import com.nexa.sdk.AsrWrapper import com.nexa.sdk.CvWrapper import com.nexa.sdk.EmbedderWrapper import com.nexa.sdk.LlmWrapper import com.nexa.sdk.NexaSdk import com.nexa.sdk.RerankerWrapper import com.nexa.sdk.VlmWrapper import com.nexa.sdk.bean.AsrCreateInput import com.nexa.sdk.bean.AsrTranscribeInput import com.nexa.sdk.bean.CVCapability import com.nexa.sdk.bean.CVCreateInput import com.nexa.sdk.bean.CVModelConfig import com.nexa.sdk.bean.ChatMessage import com.nexa.sdk.bean.DeviceIdValue import com.nexa.sdk.bean.EmbedderCreateInput import com.nexa.sdk.bean.EmbeddingConfig import com.nexa.sdk.bean.LlmCreateInput import com.nexa.sdk.bean.LlmStreamResult import com.nexa.sdk.bean.ModelConfig import com.nexa.sdk.bean.RerankConfig import com.nexa.sdk.bean.RerankerCreateInput import com.nexa.sdk.bean.VlmChatMessage import com.nexa.sdk.bean.VlmContent import com.nexa.sdk.bean.VlmCreateInput import kotlinx.coroutines.CoroutineScope import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.async import kotlinx.coroutines.awaitAll import kotlinx.coroutines.launch import kotlinx.serialization.json.Json import okhttp3.OkHttpClient import okhttp3.Request import java.io.File import java.io.FileNotFoundException import java.io.FileOutputStream import java.security.SecureRandom import java.security.cert.CertificateException import java.security.cert.X509Certificate import javax.net.ssl.SSLContext import javax.net.ssl.SSLSession import javax.net.ssl.SSLSocketFactory import javax.net.ssl.TrustManager import javax.net.ssl.X509TrustManager import androidx.core.content.edit import com.nexa.demo.bean.DownloadableFile import kotlinx.serialization.StringFormat class MainActivity : FragmentActivity() { private val binding: ActivityMainBinding by inflate() private var downloadContext: DownloadContext? = null private var downloadState = DownloadState.IDLE private var downloadingModelData: ModelData? = null private lateinit var spDownloaded: SharedPreferences private lateinit var llDownloading: LinearLayout private lateinit var tvDownloadProgress: TextView private lateinit var pbDownloading: ProgressBar // Inline download progress views private lateinit var llControlButtonsRow: LinearLayout private lateinit var llDownloadProgress: LinearLayout private lateinit var pbDownloadProgress: ProgressBar private lateinit var tvDownloadPercent: TextView private lateinit var tvDownloadFailed: TextView private lateinit var btnCancelDownloadInline: Button private lateinit var btnRetryDownloadInline: Button private var downloadFailed = false private lateinit var spModelList: Spinner private lateinit var btnDownload: Button private lateinit var btnLoadModel: Button private lateinit var btnUnloadModel: Button private lateinit var btnStop: Button private lateinit var etInput: EditText private lateinit var btnSend: Button private lateinit var btnClearHistory: Button private lateinit var btnAddImage: Button private lateinit var btnAudioRecord: Button private lateinit var recyclerView: RecyclerView private lateinit var adapter: ChatAdapter private lateinit var bottomPanel: LinearLayout private lateinit var btnAudioDone: Button private lateinit var btnAudioCancel: Button private lateinit var scrollImages: HorizontalScrollView private lateinit var topScrollContainer: LinearLayout private lateinit var llLoading: LinearLayout private lateinit var vTip: View private lateinit var llmWrapper: LlmWrapper private lateinit var vlmWrapper: VlmWrapper var embedderWrapper: EmbedderWrapper? = null private lateinit var rerankerWrapper: RerankerWrapper private lateinit var cvWrapper: CvWrapper private lateinit var asrWrapper: AsrWrapper private val modelScope = CoroutineScope(Dispatchers.IO) private val chatList = arrayListOf() private lateinit var llmSystemPrompt: ChatMessage private val vlmChatList = arrayListOf() private lateinit var vlmSystemPrompty: VlmChatMessage private lateinit var modelList: List private var selectModelId = "" private var spinnerText = "" // ADD: Track which model type is loaded private var isLoadLlmModel = false private var isLoadVlmModel = false private var isLoadEmbedderModel = false private var isLoadRerankerModel = false private var isLoadCVModel = false private var isLoadAsrModel = false private var enableThinking = false private var wavRecorder: WavRecorder? = null private var audioFile: File? = null private var lastFormattedPrompt: String? = null // Store the last formatted prompt private var retrievedChunksList = mutableListOf() // Store retrieved chunks private val savedImageFiles = mutableListOf() private val messages = arrayListOf() private val embedMsgList = arrayListOf() private val embedResultList = arrayListOf() private lateinit var selectFolderResult: ActivityResultLauncher private lateinit var chunkAdapter: ChunkAdapter override fun onCreate(savedInstanceState: Bundle?) { super.onCreate(savedInstanceState) immersionBar { transparentStatusBar() statusBarDarkFont(false) } requestPermissions(arrayOf(Manifest.permission.RECORD_AUDIO), 1002) okdownload() initData() initView() setListeners() } private val embedStringPrefix = "You are a careful assistant. Use ONLY the provided context to answer." private fun createEmbedChatMessage(embedString: String): ChatMessage { val embedString = "$embedStringPrefix\n\n\\n$embedString\\n" return ChatMessage(role = "system", embedString) } private fun addEmbedChatMessage(embedString: String, msgList: ArrayList) { createEmbedChatMessage(embedString).let { msg -> var hasAdded = false embedMsgList.forEach { if (it.content == msg.content) { hasAdded = true return@forEach } } if (!hasAdded) { embedMsgList.add(msg) } } embedMsgList.forEach { if (!msgList.contains(it)) { msgList.add(it) } } } // Format file list with chunk counts for display private fun formatFilesInSearch(): String { if (embedResultList.isEmpty()) { return "Files: 0 | Chunks: 0" } // Group by file path and count chunks val fileChunkCounts = embedResultList.groupBy { it.path } .mapValues { it.value.size } .toList() .sortedBy { File(it.first).name } val fileCount = fileChunkCounts.size val totalChunks = embedResultList.size return "Files: $fileCount | Chunks: $totalChunks" } private fun resetLoadState() { isLoadLlmModel = false isLoadVlmModel = false isLoadEmbedderModel = false isLoadRerankerModel = false isLoadCVModel = false isLoadAsrModel = false } private fun initView() { chunkAdapter = ChunkAdapter() binding.rvCitation.adapter = chunkAdapter adapter = ChatAdapter(messages, object : OnCitationsClick { override fun onClick(position: Int, retrievedChunks: List) { chunkAdapter.updateData(retrievedChunks) binding.llCitations.visibility = View.VISIBLE } }) binding.rvChat.adapter = adapter // Set status bar padding for immersive status bar val headerView = findViewById(R.id.header_view) headerView?.let { view -> ViewCompat.setOnApplyWindowInsetsListener(view) { v, insets -> val statusBarHeight = insets.getInsets(WindowInsetsCompat.Type.statusBars()).top v.setPadding(0, statusBarHeight, 0, 0) insets } } // When keyboard is visible: only IME padding so input sits flush with keyboard. // When keyboard is hidden: only nav bar padding so buttons are not covered. ViewCompat.setOnApplyWindowInsetsListener(binding.root) { v, insets -> val navBottom = insets.getInsets(WindowInsetsCompat.Type.navigationBars()).bottom val imeBottom = insets.getInsets(WindowInsetsCompat.Type.ime()).bottom val bottomPadding = if (imeBottom > 0) imeBottom else navBottom v.setPadding(v.paddingLeft, v.paddingTop, v.paddingRight, bottomPadding) insets } llDownloading = findViewById(R.id.ll_downloading) tvDownloadProgress = findViewById(R.id.tv_download_progress) pbDownloading = findViewById(R.id.pb_downloading) // Initialize inline download progress views llControlButtonsRow = findViewById(R.id.ll_control_buttons_row) llDownloadProgress = findViewById(R.id.ll_download_progress) pbDownloadProgress = findViewById(R.id.pb_download_progress) tvDownloadPercent = findViewById(R.id.tv_download_percent) tvDownloadFailed = findViewById(R.id.tv_download_failed) btnCancelDownloadInline = findViewById(R.id.btn_cancel_download_inline) btnRetryDownloadInline = findViewById(R.id.btn_retry_download_inline) spModelList = findViewById(R.id.sp_model_list) spModelList.dropDownVerticalOffset = DensityUtil.dpToPx(this, 40f) spModelList.post { // Set dropdown width to match spinner width for symmetric padding spModelList.dropDownWidth = spModelList.width } val spinnerData = modelList.filter { it.show }.map { val map = mutableMapOf() map["modelId"] = it.id map["displayName"] = it.displayName map } // Set custom view for spinner selected item spModelList.adapter = object : BaseAdapter() { override fun getCount(): Int = spinnerData.size override fun getItem(position: Int): Any = spinnerData[position] override fun getItemId(position: Int): Long = position.toLong() override fun getView(position: Int, convertView: View?, parent: android.view.ViewGroup?): View { val view = convertView ?: LayoutInflater.from(this@MainActivity) .inflate(R.layout.spinner_custom, parent, false) val textView = view.findViewById(android.R.id.text1) textView.text = spinnerData[position]["displayName"] return view } override fun getDropDownView(position: Int, convertView: View?, parent: android.view.ViewGroup?): View { val view = convertView ?: LayoutInflater.from(this@MainActivity) .inflate(R.layout.item_spinner_dropdown, parent, false) val textView = view.findViewById(android.R.id.text1) val checkIcon = view.findViewById(R.id.iv_check) textView.text = spinnerData[position]["displayName"] // Show check icon for selected item val isSelected = position == spModelList.selectedItemPosition checkIcon.visibility = if (isSelected) View.VISIBLE else View.GONE return view } } spModelList.onItemSelectedListener = object : AdapterView.OnItemSelectedListener { override fun onItemSelected( parent: AdapterView<*>?, view: View?, position: Int, id: Long ) { selectModelId = spinnerData[position].get("modelId") ?: "" spinnerText = spinnerData[position].get("displayName") ?: "" Log.d(TAG, "spinnerText:$spinnerText") // Use actual file existence check (not just SharedPreferences) val selectedModel = modelList.first { it.id == selectModelId } val filesExist = isModelDownloaded(selectedModel) == null // Check if this model is currently downloading val isDownloading = downloadState == DownloadState.DOWNLOADING && downloadingModelData?.displayName == spinnerText if (isDownloading && !filesExist) { // Show progress row for current downloading model llControlButtonsRow.visibility = View.GONE llDownloadProgress.visibility = View.VISIBLE } else { changeOperationUI( if (filesExist) { OperationState.DOWNLOADED } else { OperationState.DEFAULT } ) } messages.clear() adapter.notifyDataSetChanged() binding.rvChat.scrollTo(0, 0) } override fun onNothingSelected(parent: AdapterView<*>?) { selectModelId = "" } } btnDownload = findViewById(R.id.btn_download) btnLoadModel = findViewById(R.id.btn_load_model) btnUnloadModel = findViewById(R.id.btn_unload_model) btnStop = findViewById(R.id.btn_stop) etInput = findViewById(R.id.et_input) btnAddImage = findViewById(R.id.btn_add_image) btnAudioRecord = findViewById(R.id.btn_voice) bottomPanel = findViewById(R.id.bottom_panel) btnAudioCancel = findViewById(R.id.btn_audio_cancel) btnAudioDone = findViewById(R.id.btn_audio_done) btnSend = findViewById(R.id.btn_send) btnClearHistory = findViewById(R.id.btn_clear_history) scrollImages = findViewById(R.id.scroll_images) topScrollContainer = findViewById(R.id.ll_images_container) llLoading = findViewById(R.id.ll_loading) vTip = findViewById(R.id.v_tip) btnAudioCancel.setOnClickListener { stopRecord(true) } btnAudioDone.setOnClickListener { stopRecord(false) } findViewById