[
  {
    "path": ".github/workflows/ci.yml",
    "content": "name: CI\n\non:\n  push:\n  pull_request:\n\njobs:\n  mock-test:\n    runs-on: ubuntu-latest\n    timeout-minutes: 15\n    strategy:\n      matrix:\n        python-version: ['3.10', '3.11', '3.12', '3.13', '3.14']\n    env:\n      PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER: '1'\n      PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS: '20'\n      PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS: '10'\n      UV_CACHE_DIR: .cache/uv\n    steps:\n      - uses: actions/checkout@v4\n      - name: Set up Python ${{ matrix.python-version }}\n        uses: actions/setup-python@v5\n        with:\n          python-version: ${{ matrix.python-version }}\n      - name: Cache uv downloads\n        uses: actions/cache@v4\n        with:\n          path: ${{ env.UV_CACHE_DIR }}\n          key: ${{ runner.os }}-uv-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}\n          restore-keys: |\n            ${{ runner.os }}-uv-py${{ matrix.python-version }}-\n            ${{ runner.os }}-uv-\n      - name: Install uv\n        run: curl -LsSf https://astral.sh/uv/install.sh | sh\n      - name: Add uv to PATH\n        run: echo \"$HOME/.cargo/bin\" >> $GITHUB_PATH\n      - name: Sync dependencies\n        run: uv sync\n      - name: Run tests\n        run: uv run pytest -s tests/test_cli.py\n\n  mlx-integration-test:\n    if: github.event_name == 'push' && github.ref == 'refs/heads/main'\n    runs-on: macos-latest\n    timeout-minutes: 45\n    env:\n      HF_HOME: .cache/hf\n      HUGGINGFACE_HUB_CACHE: .cache/hf/hub\n      PLAMO_TRANSLATE_CLI_MODEL_NAME: mlx-community/plamo-2-translate\n      PLAMO_TRANSLATE_CLI_MODEL_CACHE_KEY: plamo-2-translate\n      PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER: '0'\n      PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS: '900'\n      PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS: '900'\n      UV_CACHE_DIR: .cache/uv\n    steps:\n      - uses: actions/checkout@v4\n      - name: Set up Python 3.13\n        uses: actions/setup-python@v5\n        with:\n          python-version: '3.13'\n      - name: Cache uv downloads\n        uses: actions/cache@v4\n        with:\n          path: ${{ env.UV_CACHE_DIR }}\n          key: ${{ runner.os }}-uv-py3.13-${{ hashFiles('uv.lock') }}\n          restore-keys: |\n            ${{ runner.os }}-uv-py3.13-\n            ${{ runner.os }}-uv-\n      - name: Cache Hugging Face models\n        uses: actions/cache@v4\n        with:\n          path: ${{ env.HUGGINGFACE_HUB_CACHE }}\n          key: ${{ runner.os }}-hf-${{ env.PLAMO_TRANSLATE_CLI_MODEL_CACHE_KEY }}-v1\n          restore-keys: |\n            ${{ runner.os }}-hf-${{ env.PLAMO_TRANSLATE_CLI_MODEL_CACHE_KEY }}-\n            ${{ runner.os }}-hf-\n      - name: Install uv\n        run: curl -LsSf https://astral.sh/uv/install.sh | sh\n      - name: Add uv to PATH\n        run: echo \"$HOME/.cargo/bin\" >> $GITHUB_PATH\n      - name: Sync dependencies\n        run: uv sync\n      - name: Run integration tests\n        run: uv run pytest -s tests/test_cli_integration.py\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\n.pybuilder/\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# IPython\nprofile_default/\nipython_config.py\n\n# pyenv\n#   For a library or package, you might want to ignore these files since the code is\n#   intended to run in multiple environments; otherwise, check them in:\n# .python-version\n\n# pipenv\n#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.\n#   However, in case of collaboration, if having platform-specific dependencies or dependencies\n#   having no cross-platform support, pipenv may install dependencies that don't work, or not\n#   install all needed dependencies.\n#Pipfile.lock\n\n# UV\n#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#uv.lock\n\n# poetry\n#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.\n#   This is especially recommended for binary packages to ensure reproducibility, and is more\n#   commonly ignored for libraries.\n#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control\n#poetry.lock\n\n# pdm\n#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.\n#pdm.lock\n#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it\n#   in version control.\n#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control\n.pdm.toml\n.pdm-python\n.pdm-build/\n\n# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm\n__pypackages__/\n\n# Celery stuff\ncelerybeat-schedule\ncelerybeat.pid\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.dmypy.json\ndmypy.json\n\n# Pyre type checker\n.pyre/\n\n# pytype static type analyzer\n.pytype/\n\n# Cython debug symbols\ncython_debug/\n\n# PyCharm\n#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can\n#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore\n#  and can be added to the global gitignore or merged into this file.  For a more nuclear\n#  option (not recommended) you can uncomment the following to ignore the entire idea folder.\n#.idea/\n\n# Ruff stuff:\n.ruff_cache/\n\n# PyPI configuration file\n.pypirc\n"
  },
  {
    "path": "LICENSE",
    "content": "                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright [yyyy] [name of copyright owner]\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "README.md",
    "content": "# plamo-translate-cli\n\nA command-line interface for translation using the plamo-2-translate model with local execution.\n\n## Features\n\n- Translate text between 16+ languages including Japanese, English, Chinese, Korean, and more\n- Simple command-line interface for easy integration into scripts and workflows\n- Supports various server backends (MLX, with planned support for Ollama and vLLM)\n  - Currently, optimized for macOS with Apple Silicon using MLX framework\n\n## Installation\n\n### For macOS\n\n`plamo-translate` currently installs on Python 3.10 through 3.14 on macOS.\nNo additional workaround is required for `sentencepiece` on Python 3.13 or 3.14 with current upstream releases.\n\n```sh\npip install plamo-translate\n```\n\n#### [`uv tool`](https://docs.astral.sh/uv/concepts/tools/)\n\nIf you use [`uv`](https://github.com/astral-sh/uv) as a package manager rather than `pip`, you can install `plamo-translate` into an isolated environment:\n\n```sh\nuv tool install -p 3.14 plamo-translate\n```\n\n## Development\n\n```sh\nuv sync\nsource .venv/bin/activate\n```\n\n## Requirements\n\n- Python 3.10 through 3.14\n  - Common dependencies:\n    - mcp[cli]\n    - numba\n  - On macOS:\n    - mlx-lm\n\n## Usage\n\n### Basic usage\n\nYou can specify the input and output language by giving `--from` and `--to` options.\nIf you don't specify them, the input/output language will be automatically selected from English or Japanese.\n\n#### Interactive mode\n\n```sh\n$ plamo-translate\nLoading models...done!\nInteractive mode enabled. Type your input below (Ctrl+D to exit).\n> こんにちは、お元気ですか？\nHello, how are you?\n> 「お腹減った〜何食べたい？」「私はうなぎ！」\n\"I'm hungry! What do you want to eat?\" \"I want eel!\"\n> You translate ambiguous expression in Japanese into English very well.\nあなたは日本語の曖昧な表現を英語に翻訳するのがとても上手です。\n```\n\n#### Pipe mode\n\n```sh\n$ cat file.txt | plamo-translate\nThe virtual worlds of the internet have experienced remarkable technological advancement. Meanwhile, the real world still contains numerous areas where technology has yet to make significant inroads, with many inefficient manual tasks and dangerous work still requiring human intervention. This situation stems from the fact that conventional technology has struggled to adapt to the dynamic changes and diverse conditions of the real world.\n\nPFN's core strengths lie in machine learning and deep learning technologies, which demonstrate exceptional flexibility in handling uncertainty and have the potential to create significant impact in the real world. For example, by applying deep learning technologies to robots that excel at repetitive tasks, we can enable them to make more human-like flexible judgments and perform complex tasks.\n\nTo create meaningful impact in the real world, it's essential to push the boundaries of cutting-edge technology and research application domains where technological innovation can create tangible change. For these purposes, PFN assembles a team of exceptionally talented professionals with diverse expertise.\n```\n\n#### Server mode\n\nFirst, launch the server:\n\n```sh\n$ plamo-translate server\n```\n\nThen, use the client mode:\n\n```sh\n$ plamo-translate --input '家計は火の車だ'\nOur household is in financial trouble.\n```\n\nYou can also use the interactive mode with the server:\n\n```sh\n$ plamo-translate\nLoading models...done!\nInteractive mode enabled. Type your input below (Ctrl+D to exit).\n> 家計は火の車だ\nOur household is in financial trouble.\n```\n\nIt can skip the loading time of the model, so it is useful when you want to use this tool frequently.\n\n### Using from MCP Client\n\nThe `plamo-translate server` command starts an MCP (Model Context Protocol) server. This allows `plamo-translate` to be used as a tool in other applications that support MCP, such as Claude Desktop.\n\nHere, we introduce how to use `plamo-translate` with Claude Desktop, which is a popular MCP client.\n\n1.  Start the `plamo-translate` server:\n    ```sh\n    plamo-translate server\n    ```\n2.  In a new terminal, run the following command to display the MCP configuration for Claude Desktop:\n    ```sh\n    plamo-translate show-claude-config\n    ```\n    and you will see the configuration in JSON format as follows:\n    ```json\n    {\n      \"mcpServers\": {\n        \"plamo-translate\": {\n          \"command\": \"/Users/shunta/.linuxbrew/bin/npx\",\n          \"args\": [\n            \"-y\",\n            \"mcp-remote\",\n            \"http://localhost:8000/mcp\",\n            \"--allow-http\",\n            \"--transport\",\n            \"http-only\"\n          ],\n          \"env\": {\n            \"PATH\": \"[THE SAME STRING AS YOUR CURRENT PATH ENVIRONMENT VARIABLE]\",\n          }\n        }\n      }\n    }\n    ```\n3.  Copy the outputted configuration.\n4.  Paste this configuration into your Claude Desktop's MCP configuration file (on macOS, this is typically located at `~/Library/Application Support/Claude/claude_desktop_config.json`).\n\nOnce configured, you can use `plamo-translate` directly from Claude Desktop.\n\n#### Select precision of the model weight\n\nYou can specify the precision of the model weight by giving a `--precision` option.\n\n```sh\n$ plamo-translate server --precision 8bit\n```\n## Supported Languages\n\n- Japanese\n- Japanese(easy)\n- English\n\n### Experimentally Supported Languages\n\n- Chinese\n- Taiwanese\n- Korean\n- Arabic\n- Italian\n- Indonesian\n- Dutch\n- Spanish\n- Thai\n- German\n- French\n- Vietnamese\n- Russian\n\n## Server Backends\n\n- mlx: Optimized for macOS with Apple Silicon (default on macOS)\n\n## Options\n\n- --input TEXT Input text to translate\n- --from TEXT Input language for translation (default: English)\n- --to TEXT Output language for translation (default: Japanese)\n- --precision Model weight precision to use. You can select from: [4bit, 8bit, bf16] (default: 4bit)\n\n## Configuration\n\nYou can configure the following parameters using environment variables:\n\n- `PLAMO_TRANSLATE_CLI_SERVER_START_PORT`: Specifies the starting port number for the server.\n- `PLAMO_TRANSLATE_CLI_SERVER_END_PORT`: Specifies the ending port number for the server.\n- `PLAMO_TRANSLATE_CLI_TEMP`: Sets the temperature for text generation.\n- `PLAMO_TRANSLATE_CLI_TOP_P`: Sets the top-p (nucleus) sampling probability.\n- `PLAMO_TRANSLATE_CLI_TOP_K`: Sets the top-k sampling number.\n- `PLAMO_TRANSLATE_CLI_REPETITION_PENALTY`: Sets the repetition penalty.\n- `PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE`: Sets the context size for repetition penalty.\n\n## Deploy\n\n```sh\nbash scripts/deploy.sh\n```\n"
  },
  {
    "path": "pyproject.toml",
    "content": "[project]\nname = \"plamo-translate\"\nversion = \"1.0.5\"\ndescription = \"A command-line interface for translation using the plamo-2-translate model with local execution.\"\nreadme = \"README.md\"\nauthors = [\n    { name = \"Shunta Saito\", email = \"shunta@preferred.jp\" },\n]\nrequires-python = \">=3.10\"\nkeywords = [\n    \"machine translation\",\n    \"transformer\",\n    \"nlp\",\n    \"natural language processing\",\n    \"deep learning\",\n    \"mlx\",\n    \"mlx-lm\",\n    \"sentencepiece\",\n    \"plamo\",\n    \"plamo-translate\",\n    \"plamo-translate-cli\",\n]\nlicense = \"Apache-2.0\"\nclassifiers = [\n    \"Development Status :: 5 - Production/Stable\",\n    \"Intended Audience :: Developers\",\n    \"License :: OSI Approved :: Apache Software License\",\n    \"Programming Language :: Python :: 3\",\n    \"Programming Language :: Python :: 3.10\",\n    \"Programming Language :: Python :: 3.11\",\n    \"Programming Language :: Python :: 3.12\",\n    \"Programming Language :: Python :: 3.13\",\n    \"Programming Language :: Python :: 3.14\",\n    \"Operating System :: MacOS\",\n    \"Topic :: Utilities\",\n    \"Environment :: MacOS X\",\n    \"Environment :: Console\",\n]\ndependencies = [\n    \"mcp[cli]>=1.9.2\",\n    \"numba>=0.60.0\",\n    \"mlx-lm>=0.26.3 ; sys_platform == 'darwin'\",\n]\n\n[tool.uv]\noverride-dependencies = [\"transformers<5\"]\n\n[build-system]\nrequires = [\"hatchling\"]\nbuild-backend = \"hatchling.build\"\n\n[tool.hatch.build]\nexclude = [\"tests\", \"docs\", \"examples\", \"scripts\", \"notebooks\", \"data\", \"tmp\"]\n\n[tool.hatch.build.targets.wheel]\ncompression = \"stored\"\n\n[tool.hatch.build.targets.sdist]\ncompress-level = 0\n\n[project.scripts]\nplamo-translate = \"plamo_translate.main:main\"\n\n[tool.ruff]\nline-length = 120\n\n[dependency-groups]\ndev = [\n    \"build>=1.2.2.post1\",\n    \"ipdb>=0.13.13\",\n    \"pytest>=8.4.0\",\n    \"ruff>=0.11.12\",\n    \"twine>=6.1.0\",\n    \"wheel>=0.45.1\",\n]\n"
  },
  {
    "path": "scripts/convert_dataset.py",
    "content": "import argparse\nimport json\nfrom pathlib import Path\n\nfrom datasets import Dataset\nfrom jinja2 import Template\nfrom mlx_lm.tokenizer_utils import load_tokenizer\nfrom mlx_lm.tuner import datasets\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--tokenizer-path\", type=str)\n    parser.add_argument(\"--dataset-jsonl-path\", type=str)\n    parser.add_argument(\"--chat-template-path\", type=str)\n    parser.add_argument(\"--pack-length\", type=int, default=640)\n    args = parser.parse_args()\n\n    tokenizer_path = args.tokenizer_path\n    dataset_jsonl_path = args.dataset_jsonl_path\n    chat_template_path = args.chat_template_path\n\n    tokenizer = load_tokenizer(Path(tokenizer_path))\n\n    with open(chat_template_path, \"r\") as f:\n        chat_template = Template(f.read())\n\n    with open(dataset_jsonl_path, \"r\") as f:\n        lines = [json.loads(line) for line in f.readlines()]\n\n    dataset = []\n    prompts = []\n    n_toks = []\n    current_n_toks = 0\n    for line in lines:\n        for input_text, output_text in zip(line[\"input\"][\"content\"], line[\"output\"][\"content\"]):\n            try:\n                if \"\\n\" in input_text:\n                    input_text_str = input_text.split(\"\\n\")[1].strip()\n                else:\n                    input_text_str = input_text.strip()\n            except Exception:\n                print(input_text)\n                import ipdb\n\n                ipdb.set_trace()\n            try:\n                if \"\\n\" in output_text:\n                    output_text_str = output_text.split(\"\\n\")[1].strip()\n                else:\n                    output_text_str = output_text.strip()\n            except Exception:\n                print(output_text)\n                import ipdb\n\n                ipdb.set_trace()  # fmt: skip\n            prompt = chat_template.render(\n                messages=[\n                    {\"role\": \"user\", \"content\": f\"input lang={line['input']['lang']}\\n{input_text_str}\"},\n                    {\"role\": \"user\", \"content\": f\"output lang={line['output']['lang']}\\n{output_text_str}\"},\n                ]\n            )\n\n            n_tok = len(tokenizer.encode(prompt.strip()))\n            n_toks.append(n_tok)\n            if current_n_toks + n_tok + 1 > args.pack_length:\n                text = \"<|plamo:bos|>\".join(prompts) + \"<|plamo:bos|>\"\n                n_pad = args.pack_length - len(tokenizer.encode(text))\n                if n_pad > 0:\n                    text += \"<|plamo:pad|>\" * n_pad\n                dataset.append({\"text\": text})\n                prompts = [prompt.strip()]\n                current_n_toks = n_tok\n            else:\n                prompts.append(prompt.strip())\n                current_n_toks += n_tok\n\n    print(f\"Max tokens in a batch: {max(n_toks)}\")\n    dataset = Dataset.from_list(dataset)\n    dataset.save_to_disk(\"tmp/calibration_dataset\")\n"
  },
  {
    "path": "scripts/deploy.sh",
    "content": "rm -rf dist\nARCHFLAGS=\"-arch arm64\" MACOSX_DEPLOYMENT_TARGET=\"11.0\" \\\nuv build --wheel\n\nWHEEL_FILENAME=$(ls dist/plamo_translate-*.whl)\nuv run -m wheel tags \\\n    --python-tag py3 \\\n    --abi-tag none \\\n    --platform-tag macosx_11_0_arm64 \\\n    ${WHEEL_FILENAME}\nrm -rf ${WHEEL_FILENAME}\n\nuv run twine upload dist/*"
  },
  {
    "path": "src/plamo_translate/__init__.py",
    "content": "\"\"\"PLaMo Translate CLI package.\"\"\"\n\n__version__ = \"1.0.5\"\n"
  },
  {
    "path": "src/plamo_translate/assets/chat_template.jinja2",
    "content": "{{- \"<|plamo:op|>dataset\\ntranslation\\n\" -}}\n{% for message in messages %}\n    {{- '<|plamo:op|>' + message['content']}}\n    {%- if not loop.last %}\n        {{- '\\n'}}\n    {%- endif %}\n{% endfor %}"
  },
  {
    "path": "src/plamo_translate/clients/__init__.py",
    "content": ""
  },
  {
    "path": "src/plamo_translate/clients/translate.py",
    "content": "import asyncio\nimport logging\nfrom typing import AsyncGenerator, Dict, List\nfrom urllib.parse import urlunparse\n\nimport mcp.types as types\nfrom mcp.client.session import ClientSession\nfrom mcp.client.streamable_http import streamablehttp_client\nfrom mcp.shared.session import RequestResponder\nfrom mcp.types import TextContent\n\nfrom plamo_translate.servers.utils import Message, TranslateRequest, update_config\n\nlogger = logging.getLogger(__name__)\n\n\nasync def message_handler(\n    message: RequestResponder[types.ServerRequest, types.ClientResult] | types.ServerNotification | Exception,\n) -> None:\n    if isinstance(message, Exception):\n        logger.error(\"Error: %s\", message)\n        return\n\n\nclass MCPClient:\n    def __init__(self, stream: bool) -> None:\n        \"\"\"Initialize the MCP client.\n\n        Args:\n            stream (bool): Whether to stream the translation results.\n        \"\"\"\n        self.stream = stream\n        self.config = update_config()\n\n        port = self.config.get(\"port\", None)\n        if port is None:\n            raise ValueError(\"Port is not set in the configuration. Please start the MCP server first.\")\n        self.url = urlunparse((\"http\", f\"127.0.0.1:{port}\", \"mcp\", \"\", \"\", \"\"))\n\n    async def translate(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]:\n        \"\"\"Translate messages. If stream=True, yields chunks as they arrive.\"\"\"\n        async with streamablehttp_client(self.url) as (\n            read_stream,\n            write_stream,\n            get_session_id_callback,\n        ):\n            async with ClientSession(\n                read_stream=read_stream,\n                write_stream=write_stream,\n                message_handler=message_handler,\n            ) as session:\n                await session.initialize()\n\n                messages_obj = [Message(**message) for message in messages]\n                request = TranslateRequest(messages=messages_obj, source_language=\"\", target_language=\"\")\n\n                if self.stream:\n                    # For streaming, we'll need to handle the response differently\n                    # This will yield chunks as they arrive\n                    async for chunk in self._translate_stream(session, request):\n                        yield chunk\n                else:\n                    # The messages should already have source and target languages, so omit to specify them again\n                    response = await session.call_tool(\n                        \"plamo-translate\",\n                        arguments={\n                            \"request\": request,\n                            \"stream\": False,\n                        },\n                    )\n\n                    # Extract text from response content\n                    if response.content and len(response.content) > 0:\n                        content = response.content[0]\n                        if isinstance(content, TextContent):\n                            yield content.text\n                        else:\n                            raise ValueError(f\"Unexpected content type: {type(content)}\")\n                    else:\n                        raise ValueError(\"Empty response from translation tool\")\n\n    async def _translate_stream(self, session: ClientSession, request: TranslateRequest):\n        \"\"\"Handle streaming translation responses.\"\"\"\n        # Use a queue to pass messages from progress_handler to the generator\n        message_queue: asyncio.Queue[str] = asyncio.Queue()\n        call_complete = asyncio.Event()\n\n        async def progress_handler(progress: float, total: float | None, message: str | None) -> None:\n            \"\"\"Handle progress updates which might contain partial translations.\"\"\"\n            if message:\n                await message_queue.put(message)\n\n        async def call_tool_wrapper():\n            \"\"\"Wrapper to call the tool and signal completion\"\"\"\n            try:\n                response = await session.call_tool(\n                    \"plamo-translate\",\n                    arguments={\n                        \"request\": request,\n                        \"stream\": True,\n                    },\n                    progress_callback=progress_handler,\n                )\n                # Put the final response in the queue if needed\n                if response.content and len(response.content) > 0:\n                    content = response.content[0]\n                    if isinstance(content, TextContent):\n                        await message_queue.put(content.text)\n            finally:\n                call_complete.set()\n\n        # Start the tool call in the background\n        asyncio.create_task(call_tool_wrapper())\n\n        # Yield messages as they arrive\n        chunks = []\n        while not call_complete.is_set() or not message_queue.empty():\n            try:\n                message = await asyncio.wait_for(message_queue.get(), timeout=0.1)\n                chunks.append(message)\n                yield message\n            except asyncio.TimeoutError:\n                # No message available, continue waiting\n                continue\n"
  },
  {
    "path": "src/plamo_translate/main.py",
    "content": "#!/usr/bin/env python3\nimport argparse\nimport asyncio\nimport atexit\nimport json\nimport logging\nimport multiprocessing\nimport os\nimport readline\nimport signal\nimport subprocess\nimport sys\nimport time\nfrom pathlib import Path\nfrom typing import Dict, List\n\nfrom plamo_translate import __version__\nfrom plamo_translate.clients import translate\nfrom plamo_translate.servers.utils import (\n    PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE,\n    PLAMO_TRANSLATE_CLI_REPETITION_PENALTY,\n    SUPPORTED_LANGUAGES,\n    update_config,\n    verify_mcp_server_ready,\n)\n\nos.environ[\"HF_HUB_DISABLE_PROGRESS_BARS\"] = \"1\"\n\nlogger = logging.getLogger(__name__)\n\n\ndef start_mcp_server(backend_type: str, log_level: str, show_progress: bool = False) -> None:\n    # To avoid showing warnings related to resource_tracker\n    signal.signal(signal.SIGTERM, lambda _signal_number, _frame: exit(0))\n    if os.environ.get(\"PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER\") == \"1\":\n        from plamo_translate.servers.mock import server as mock_server\n\n        server = mock_server.PLaMoTranslateServer(log_level=log_level, show_progress=show_progress)\n        try:\n            server.run(transport=\"streamable-http\")\n        except Exception as e:\n            print(f\"Error during server running: {e}\")\n    elif backend_type == \"mlx\":\n        from plamo_translate.servers.mlx import server as mlx_server\n\n        server = mlx_server.PLaMoTranslateServer(log_level=log_level, show_progress=show_progress)\n        try:\n            server.run(transport=\"streamable-http\")\n        except Exception as e:\n            print(f\"Error during server running: {e}\")\n    else:\n        raise ValueError(f\"Unsupported backend type: {backend_type}\")\n\n\ndef check_server_running() -> bool:\n    config = update_config()\n    if \"port\" not in config:\n        return False\n    port = config[\"port\"]\n    tools = asyncio.run(verify_mcp_server_ready(port))\n    if \"plamo-translate\" in tools:\n        return True\n    return False\n\n\ndef wait_for_server_ready() -> None:\n    while not check_server_running():\n        time.sleep(0.1)\n\n\nasync def print_translation(\n    client: translate.MCPClient, messages: List[Dict[str, str]], stream: bool\n) -> List[Dict[str, str]]:\n    async for result in client.translate(messages):\n        if not stream:\n            print(result, end=\"\", flush=True)\n        else:\n            messages[-1][\"content\"] += result\n            print(result, end=\"\", flush=True)\n\n    return messages\n\n\ndef run_translate(args: argparse.Namespace) -> None:\n    from_lang = args.from_lang\n    if from_lang != \"\":\n        from_lang = f\" lang={from_lang}\"\n\n    to = args.to\n    if to != \"\":\n        to = f\" lang={to}\"\n\n    backend_type = args.backend_type\n    stream = args.stream\n\n    if args.input is None and not args.interactive:\n        input_text = sys.stdin.read()\n        args.input = input_text\n    else:\n        input_text = args.input\n\n    messages: List[Dict[str, str]] = []\n\n    if not check_server_running():\n        if args.interactive:\n            show_progress = True\n        else:\n            show_progress = False\n        server = multiprocessing.Process(\n            target=start_mcp_server,\n            args=(backend_type, \"CRITICAL\", show_progress),\n            daemon=True,\n        )\n        server.start()\n        wait_for_server_ready()\n\n    client = translate.MCPClient(stream=stream)\n\n    try:\n        if args.interactive:\n            history_file = Path.home() / \".plamo_translate_history\"\n            if not history_file.exists():\n                history_file.touch()\n            try:\n                readline.read_history_file(history_file)\n                readline.set_history_length(-1)\n            except Exception:\n                print(f\"History file {history_file} not found. Starting a new history file.\")\n            atexit.register(readline.write_history_file, history_file)\n            print(\"Interactive mode enabled. Type your input below (Ctrl+D to exit).\")\n\n            while True:\n                try:\n                    input_text = input(\"> \")\n                    if input_text.strip() == \"\":\n                        continue\n\n                    messages.append(\n                        {\n                            \"role\": \"user\",\n                            \"content\": f\"input{from_lang}\\n{input_text}\",\n                        },\n                    )\n                    messages.append(\n                        {\n                            \"role\": \"user\",\n                            \"content\": f\"output{to}\\n\",\n                        },\n                    )\n                    messages = asyncio.run(print_translation(client, messages, stream=args.stream))\n\n                except KeyboardInterrupt:\n                    print(\"\\nTranslation interrupted by user (Ctrl+C).\")\n                    sys.exit(0)\n                    break\n                except EOFError:\n                    print(\"\\nCtrl+D received. Exiting.\")\n                    sys.exit(0)\n                    break\n\n        else:\n            # Non-interactive mode: translate the input once\n            messages.append(\n                {\n                    \"role\": \"user\",\n                    \"content\": f\"input{from_lang}\\n{input_text}\",\n                },\n            )\n            messages.append(\n                {\n                    \"role\": \"user\",\n                    \"content\": f\"output{to}\\n\",\n                },\n            )\n            asyncio.run(print_translation(client, messages, stream=args.stream))\n\n    except Exception as e:\n        raise e\n\n    finally:\n        sys.exit(0)\n\n\ndef main() -> None:\n    global_parser = argparse.ArgumentParser(add_help=False)\n    global_parser.add_argument(\n        \"--version\",\n        \"-v\",\n        action=\"version\",\n        version=\"%(prog)s {version}\".format(version=__version__),\n        help=\"Show program's version number and exit.\",\n    )\n\n    # Add arguments for the default command (translate)\n    # These will be used if no subcommand is provided\n    global_parser.add_argument(\"--input\", type=str, help=\"Input text to translate\", default=None)\n    global_parser.add_argument(\n        \"--from\",\n        type=str,\n        help=\"Input language for translation\",\n        default=\"English|Japanese\",\n        choices=SUPPORTED_LANGUAGES,\n        dest=\"from_lang\",\n    )\n    global_parser.add_argument(\n        \"--to\",\n        type=str,\n        help=\"Output language for translation\",\n        default=\"\",\n        choices=SUPPORTED_LANGUAGES + [\"\"],\n    )\n    global_parser.add_argument(\n        \"--backend-type\",\n        type=str,\n        default=\"mlx\",\n        choices=[\"mlx\"],\n        help=\"Server backend to use (default: mlx on macOS, transformers elsewhere)\",\n    )\n    global_parser.add_argument(\n        \"--precision\",\n        \"-p\",\n        type=str,\n        default=\"4bit\",\n        choices=[\"4bit\", \"8bit\", \"bf16\"],\n        help=\"Model parameter's precision to use (default: 4bit)\",\n    )\n    global_parser.add_argument(\n        \"--no-stream\",\n        action=\"store_true\",\n        help=\"Enable batch processing mode for translation\",\n    )\n    global_parser.add_argument(\n        \"--interactive\",\n        \"-i\",\n        action=\"store_true\",\n        help=\"Enable interactive mode for translation\",\n    )\n\n    # Create the parser for the \"server\" command\n    parser = argparse.ArgumentParser(description=\"PLaMo Translate CLI\", parents=[global_parser])\n\n    subparsers = parser.add_subparsers(dest=\"command\", help=\"Command to run\")\n    _ = subparsers.add_parser(\"server\", help=\"Run the server\", parents=[global_parser])\n    _ = subparsers.add_parser(\n        \"show-claude-config\", help=\"Show the MCP server config for Claude Desktop\", parents=[global_parser]\n    )\n\n    args = parser.parse_args()\n\n    # Route to appropriate command handler\n    if hasattr(args, \"version\") and args.version:\n        # The version action should have already exited, but as a fallback:\n        sys.exit(0)\n\n    if PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is not None and PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is None:\n        raise ValueError(\n            \"If PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is set, \"\n            \"PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE must also be set.\"\n        )\n    elif PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is None and PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is not None:\n        raise ValueError(\n            \"If PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is set, \"\n            \"PLAMO_TRANSLATE_CLI_REPETITION_PENALTY must also be set.\"\n        )\n\n    if args.input is None and (args.interactive or sys.stdin.isatty()):\n        args.interactive = True\n        logging.basicConfig(level=logging.ERROR)\n        os.environ[\"PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL\"] = \"CRITICAL\"\n    else:\n        args.interactive = False\n        logging.basicConfig(level=logging.CRITICAL)\n        os.environ[\"PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL\"] = \"CRITICAL\"\n\n    args.stream = not args.no_stream\n    if args.backend_type == \"mlx\":\n        if args.precision == \"4bit\":\n            model_name = \"mlx-community/plamo-2-translate\"\n        elif args.precision == \"8bit\":\n            model_name = \"mlx-community/plamo-2-translate-8bit\"\n        elif args.precision == \"bf16\":\n            model_name = \"mlx-community/plamo-2-translate-bf16\"\n\n    update_config(backend_type=args.backend_type, model_name=model_name)\n\n    if \"PLAMO_TRANSLATE_CLI_MODEL_NAME\" not in os.environ:\n        os.environ[\"PLAMO_TRANSLATE_CLI_MODEL_NAME\"] = model_name\n\n    if args.command == \"server\":\n        logging.basicConfig(level=logging.INFO)\n        if check_server_running():\n            print(\"MCP server is already running. Skipping server start.\")\n            sys.exit(0)\n        while not check_server_running():\n            try:\n                logger.info(\"Starting server...\")\n                start_mcp_server(args.backend_type, \"INFO\", True)\n                logger.info(\"The server is running (Ctrl+C to stop)\")\n            except KeyboardInterrupt:\n                logger.error(\"\\nCtrl+C received. Exiting.\")\n                break\n            except EOFError:\n                logger.error(\"\\nCtrl+D received. Exiting.\")\n                break\n            except Exception as e:\n                logger.error(f\"An error occurred: {str(e)}: {e}. Restarting server...\")\n\n    elif args.command == \"show-claude-config\":\n        cmd = subprocess.run([\"which\", \"npx\"], check=True, capture_output=True, text=True)\n        if cmd.returncode != 0:\n            logger.error(\"npx command not found. Please install Node.js and npx.\")\n            exit(1)\n        npx_path = cmd.stdout.strip()\n        config = update_config()\n        print(\n            json.dumps(\n                {\n                    \"mcpServers\": {\n                        \"plamo-translate\": {\n                            \"command\": npx_path,\n                            \"args\": [\n                                \"-y\",\n                                \"mcp-remote\",\n                                f\"http://localhost:{config['port']}/mcp\",\n                                \"--allow-http\",\n                                \"--transport\",\n                                \"http-only\",\n                            ],\n                            \"env\": {\"PATH\": os.environ[\"PATH\"]},\n                        }\n                    }\n                },\n                indent=2,\n                ensure_ascii=False,\n            )\n        )\n    else:\n        run_translate(args)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "src/plamo_translate/servers/__init__.py",
    "content": ""
  },
  {
    "path": "src/plamo_translate/servers/mlx/__init__.py",
    "content": ""
  },
  {
    "path": "src/plamo_translate/servers/mlx/server.py",
    "content": "import asyncio\nimport contextlib\nimport importlib.resources\nimport logging\nimport os\nimport subprocess\nimport sys\nfrom typing import Callable, Tuple\n\nimport mlx.core as mx\nimport mlx.nn as nn\nfrom mcp.server.fastmcp import Context, FastMCP\nfrom mlx_lm.generate import stream_generate\nfrom mlx_lm.sample_utils import make_logits_processors, make_sampler\nfrom mlx_lm.tokenizer_utils import TokenizerWrapper\nfrom mlx_lm.utils import load\n\nfrom plamo_translate.servers.utils import (\n    INSTRUCTION,\n    PLAMO_MAX_TOKENS,\n    PLAMO_TRANSLATE_CLI_MODEL_NAME,\n    PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE,\n    PLAMO_TRANSLATE_CLI_REPETITION_PENALTY,\n    PLAMO_TRANSLATE_CLI_TEMP,\n    PLAMO_TRANSLATE_CLI_TOP_K,\n    PLAMO_TRANSLATE_CLI_TOP_P,\n    TranslateRequest,\n    construct_llm_input,\n    find_free_port,\n    update_config,\n)\nfrom plamo_translate.servers.warnings import (\n    build_optional_gpu_dependency_warning_options,\n    suppress_optional_gpu_dependency_warnings,\n)\n\nlogger = logging.getLogger(__name__)\n\n\nclass PLaMoTranslateServer(FastMCP):\n    \"\"\"PLaMo Translate Server using FastMCP.\"\"\"\n\n    def __init__(self, log_level: str, show_progress: bool = False) -> None:\n        super().__init__(\n            name=\"plamo-translate\",\n            instructions=INSTRUCTION,\n            log_level=log_level,\n            stateless_http=False,\n            host=\"127.0.0.1\",\n            port=find_free_port(),\n            lifespan=self.lifespan,\n        )\n\n        # Set environment variables to switch if it shows progress bars for loading models or not\n        self.show_progress = show_progress\n\n        model, tokenizer, sampler, logits_processors = self.load_model()\n        self.model = model\n        self.tokenizer = tokenizer\n        self.sampler = sampler\n        self.logits_processors = logits_processors\n\n        self.add_tool(\n            fn=self.translate,\n            name=\"plamo-translate\",\n            description=INSTRUCTION,\n        )\n\n    @contextlib.asynccontextmanager\n    async def lifespan(self, server: FastMCP):\n        try:\n            async with contextlib.AsyncExitStack() as stack:\n                # Pre-processings before a request is processed\n                yield\n                # Post-processings after a request is processed\n        except Exception as e:\n            logger.error(f\"Error during lifespan: {str(e)} {e}\")\n            await stack.aclose()\n\n    def load_model(self) -> Tuple[nn.Module, TokenizerWrapper, Callable[..., mx.array], list]:\n        \"\"\"Load the MLX model if not already loaded.\"\"\"\n        try:\n            ref = importlib.resources.files(\"plamo_translate.assets\").joinpath(\"chat_template.jinja2\")\n            chat_template = ref.read_text(encoding=\"utf-8\")\n        except FileNotFoundError:\n            raise RuntimeError(\"chat_template.jinja2 not found in assets directory\")\n\n        model_name = os.getenv(\"PLAMO_TRANSLATE_CLI_MODEL_NAME\", PLAMO_TRANSLATE_CLI_MODEL_NAME)\n        update_config(model_name=model_name)\n\n        # Reload mlx_lm.utils here to refleect the environment variables for progress bars\n        if self.show_progress:\n            envs = os.environ.copy()\n            envs[\"HF_HUB_DISABLE_PROGRESS_BARS\"] = \"0\"\n            subprocess.run(\n                [\n                    sys.executable,\n                    *build_optional_gpu_dependency_warning_options(),\n                    \"-m\",\n                    \"mlx_lm\",\n                    \"generate\",\n                    \"--model\",\n                    model_name,\n                    \"--max-tokens\",\n                    \"1\",\n                    \"--trust-remote-code\",\n                ],\n                env=envs,\n                stdout=subprocess.DEVNULL,\n            )\n\n        with suppress_optional_gpu_dependency_warnings():\n            model, tokenizer = load(\n                model_name,\n                model_config={\"trust_remote_code\": True},\n                tokenizer_config={\n                    \"trust_remote_code\": True,\n                    \"chat_template\": chat_template,\n                },\n            )\n        tokenizer.add_eos_token(\"<|plamo:op|>\")\n\n        sampler = make_sampler(\n            temp=float(PLAMO_TRANSLATE_CLI_TEMP),\n            top_p=float(PLAMO_TRANSLATE_CLI_TOP_P),\n            top_k=int(PLAMO_TRANSLATE_CLI_TOP_K),\n        )\n\n        logits_processors = make_logits_processors(\n            repetition_penalty=(\n                float(PLAMO_TRANSLATE_CLI_REPETITION_PENALTY)\n                if PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is not None\n                else None\n            ),\n            repetition_context_size=(\n                int(PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE)\n                if PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is not None\n                else None\n            ),\n        )\n\n        return model, tokenizer, sampler, logits_processors\n\n    async def translate(self, request: TranslateRequest, stream: bool, context: Context) -> str:\n        \"\"\"Run the translation tool\"\"\"\n        logger.info(f\"Received translation request: {context.request_id}\")\n        try:\n            messages = construct_llm_input(request)\n            prompt = self.tokenizer.apply_chat_template(messages, add_generation_prompt=False)  # type:ignore[call-arg]\n\n            # Generate translation\n            translation = \"\"\n            segments_count = 0\n\n            for segment in stream_generate(\n                model=self.model,\n                tokenizer=self.tokenizer,\n                prompt=prompt,\n                sampler=self.sampler,\n                logits_processors=self.logits_processors,\n                max_tokens=int(PLAMO_MAX_TOKENS),\n            ):\n                translation += segment.text\n                segments_count += 1\n\n                if stream:\n                    # Send progress notification with the new segment\n                    await context.report_progress(\n                        progress=segments_count,\n                        total=None,  # We don't know the total in advance\n                        message=segment.text,  # Send the segment as the message\n                    )\n\n                    # Small delay to ensure progress is sent\n                    await asyncio.sleep(0)\n\n            if not stream:\n                return translation\n            else:\n                return \"\"\n\n        except Exception as e:\n            logger.error(f\"Translation error: {str(e)}\")\n            raise e\n"
  },
  {
    "path": "src/plamo_translate/servers/mock/__init__.py",
    "content": "from .server import PLaMoTranslateServer\n\n__all__ = [\"PLaMoTranslateServer\"]\n"
  },
  {
    "path": "src/plamo_translate/servers/mock/server.py",
    "content": "import asyncio\nimport logging\nfrom typing import Dict\n\nfrom mcp.server.fastmcp import Context, FastMCP\n\nfrom plamo_translate.servers.utils import INSTRUCTION, TranslateRequest, find_free_port, update_config\n\nlogger = logging.getLogger(__name__)\n\nMOCK_TRANSLATIONS: Dict[str, str] = {\n    \"Proud, but humble\": \"誇り高いが、謙虚です。\",\n    \"Boldly do what no one has done before\": \"誰もしたことがないことを大胆にやりなさい。\",\n}\n\n\ndef _extract_input_text(request: TranslateRequest) -> str:\n    for message in reversed(request.messages):\n        if not message.content.startswith(\"input\"):\n            continue\n\n        _, _, input_text = message.content.partition(\"\\n\")\n        return input_text.strip().lstrip(\">\").strip()\n\n    return request.messages[-1].content.strip().lstrip(\">\").strip()\n\n\nclass PLaMoTranslateServer(FastMCP):\n    \"\"\"Lightweight MCP server used by the test suite.\"\"\"\n\n    def __init__(self, log_level: str, show_progress: bool = False) -> None:\n        super().__init__(\n            name=\"plamo-translate\",\n            instructions=INSTRUCTION,\n            log_level=log_level,\n            stateless_http=False,\n            host=\"127.0.0.1\",\n            port=find_free_port(),\n        )\n        update_config(model_name=\"mock\")\n        self.show_progress = show_progress\n        self.add_tool(\n            fn=self.translate,\n            name=\"plamo-translate\",\n            description=INSTRUCTION,\n        )\n\n    async def translate(self, request: TranslateRequest, stream: bool, context: Context) -> str:\n        input_text = _extract_input_text(request)\n        translation = next(\n            (candidate for source, candidate in MOCK_TRANSLATIONS.items() if source in input_text),\n            f\"[mock translation] {input_text}\",\n        )\n\n        if not stream:\n            return translation\n\n        for index, chunk in enumerate([translation], start=1):\n            await context.report_progress(progress=index, total=1, message=chunk)\n            await asyncio.sleep(0)\n\n        return \"\"\n"
  },
  {
    "path": "src/plamo_translate/servers/utils.py",
    "content": "import asyncio\nimport json\nimport logging\nimport os\nimport socket\nimport textwrap\nfrom contextlib import closing\nfrom tempfile import NamedTemporaryFile\nfrom pathlib import Path\nfrom typing import Any, Dict, List, Optional\n\nfrom mcp import ClientSession\nfrom mcp.client.streamable_http import streamablehttp_client\nfrom pydantic import BaseModel, Field\n\nlogger = logging.getLogger(__name__)\n\nSUPPORTED_LANGUAGES = [\n    \"Japanese\",\n    \"Japanese(easy)\",\n    \"English\",\n    \"Chinese\",\n    \"Taiwanese\",\n    \"Korean\",\n    \"Arabic\",\n    \"Italian\",\n    \"Indonesian\",\n    \"Dutch\",\n    \"Spanish\",\n    \"Thai\",\n    \"German\",\n    \"French\",\n    \"Vietnamese\",\n    \"Russian\",\n    \"English|Japanese\",\n]\n\nPLAMO_TRANSLATE_CLI_MODEL_NAME = os.environ.get(\"PLAMO_TRANSLATE_CLI_MODEL_NAME\", \"mlx-community/plamo-2-translate\")\nPLAMO_TRANSLATE_CLI_SERVER_START_PORT = int(os.environ.get(\"PLAMO_TRANSLATE_CLI_SERVER_START_PORT\", 30000))\nPLAMO_TRANSLATE_CLI_SERVER_END_PORT = int(os.environ.get(\"PLAMO_TRANSLATE_CLI_SERVER_END_PORT\", 30099))\nPLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL = os.environ.get(\"PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL\", \"INFO\")\nPLAMO_TRANSLATE_CLI_TEMP = os.environ.get(\"PLAMO_TRANSLATE_CLI_TEMP\", \"0.0\")\nPLAMO_TRANSLATE_CLI_TOP_P = os.environ.get(\"PLAMO_TRANSLATE_CLI_TOP_P\", \"0.98\")\nPLAMO_TRANSLATE_CLI_TOP_K = os.environ.get(\"PLAMO_TRANSLATE_CLI_TOP_K\", \"0\")\nPLAMO_TRANSLATE_CLI_REPETITION_PENALTY = os.environ.get(\"PLAMO_TRANSLATE_CLI_REPETITION_PENALTY\", None)\nPLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE = os.environ.get(\"PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE\", None)\nPLAMO_MAX_TOKENS = os.environ.get(\"PLAMO_MAX_TOKENS\", \"32768\")\nSUPPORTED_LANGUAGES_LIST_STR = \"\\n-\".join(SUPPORTED_LANGUAGES)\nINSTRUCTION = textwrap.dedent(\n    f\"\"\"Use the `plamo-translate` tool to translate text between multiple languages.\n    Supported languages include:\n\n    - {SUPPORTED_LANGUAGES_LIST_STR}\n\n    Use the tool by specifying the text and the source and target languages.\n    \"\"\"\n)\n\n\nasync def verify_mcp_server_ready(port: int) -> List[str]:\n    \"\"\"Verify if the MCP server is ready to accept connections.\"\"\"\n    try:\n        url = f\"http://127.0.0.1:{port}/mcp\"\n        async with streamablehttp_client(url) as (\n            read_stream,\n            write_stream,\n            get_session_id_callback,\n        ):\n            async with ClientSession(\n                read_stream=read_stream,\n                write_stream=write_stream,\n            ) as session:\n                await session.initialize()\n                tools = await session.list_tools()\n                return [tool.name for tool in tools.tools]\n    except Exception:\n        return []\n\n\ndef find_free_port(\n    start_port: int = PLAMO_TRANSLATE_CLI_SERVER_START_PORT,\n    end_port: int = PLAMO_TRANSLATE_CLI_SERVER_END_PORT,\n) -> int:\n    \"\"\"\n    Find a port in the range [start_port, end_port].\n    \"\"\"\n    config = update_config()\n\n    # Phase 1: Check for existing MCP server with 'plamo-translate' tool\n    if \"port\" in config:\n        port = config[\"port\"]\n\n        try:\n            tools = asyncio.run(verify_mcp_server_ready(port))\n        except Exception as e:\n            logger.info(f\"Failed to connect to MCP server on port {port}: {e}\")\n            tools = []\n\n        if \"plamo-translate\" in tools:\n            logger.info(f\"Found existing MCP server with 'plamo-translate' tool on port {port}.\")\n            return port\n\n        previous_port = port\n    else:\n        previous_port = None\n\n    # Phase 2: If no suitable MCP server found, find any free port in the range\n    for port in range(start_port, end_port + 1):\n        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:\n            sock.settimeout(0.1)  # Short timeout for connection attempt\n            try:\n                result = sock.connect_ex((\"127.0.0.1\", port))\n                if result != 0:\n                    # If connection failed (errno != 0), port is likely free\n                    logger.info(f\"Found free port: {port}\")\n                    if previous_port is not None and previous_port != port:\n                        logger.info(f\"Updating MCP server port from {previous_port} to {port}.\")\n                    update_config(port=port)\n                    return port\n            except Exception:\n                # This can happen if e.g. sock.connect_ex itself has issues, or port is restricted\n                pass  # Try next port\n\n    raise RuntimeError(\n        \"Could not find a suitable MCP server with 'plamo-translate' tool \"\n        f\"or a free port in the range {start_port}-{end_port}.\"\n    )\n\n\ndef update_config(**kwargs) -> Dict[str, Any]:\n    tmp_dir = os.environ.get(\"TMPDIR\", None)\n    if tmp_dir is None:\n        raise ValueError(\"TMPDIR environment variable is not set. Please set it to a valid directory.\")\n    tmp_config_path = Path(tmp_dir) / \"plamo-translate-config.json\"\n\n    if not tmp_config_path.exists():\n        if not kwargs:\n            return {}\n        config = kwargs\n        _write_config(tmp_config_path, config, indent=4)\n        logger.info(\n            f\"Created new temporary config file at {tmp_config_path} with initial values: \"\n            f\"{json.dumps(config, indent=4, ensure_ascii=False)}\"\n        )\n        return config\n\n    with tmp_config_path.open(\"r\") as f:\n        try:\n            config = json.load(f)\n        except json.JSONDecodeError:\n            logger.warning(f\"Config file {tmp_config_path} is corrupted. Recreating it.\")\n            config = {}\n\n    if not kwargs:\n        return config\n\n    for key, value in kwargs.items():\n        config[key] = value\n\n    _write_config(tmp_config_path, config)\n\n    return config\n\n\ndef _write_config(path: Path, config: Dict[str, Any], *, indent: int | None = None) -> None:\n    path.parent.mkdir(parents=True, exist_ok=True)\n    with NamedTemporaryFile(\"w\", dir=path.parent, delete=False, encoding=\"utf-8\") as tmp_file:\n        json.dump(config, tmp_file, indent=indent)\n        tmp_file.flush()\n        os.fsync(tmp_file.fileno())\n        tmp_path = Path(tmp_file.name)\n    tmp_path.replace(path)\n\n\nclass Message(BaseModel):\n    \"\"\"Model for messages in translation request\"\"\"\n\n    role: str = Field(..., description=\"Role of the message sender (e.g., 'user', 'assistant')\")\n    content: str = Field(..., description=\"Content of the message\")\n\n\nclass TranslateRequest(BaseModel):\n    \"\"\"Request model for translation\"\"\"\n\n    messages: List[Message] = Field(..., description=\"List of messages for translation\")\n    source_language: Optional[str] = Field(\n        \"\",\n        description=(\n            \"Source language that is one of the followings: \"\n            f\"{', '.join(SUPPORTED_LANGUAGES)}. \"\n            \"Note that 'English|Japanese' is used to detect the input language automatically.\"\n        ),\n    )\n    target_language: Optional[str] = Field(\n        \"\",\n        description=(\n            \"Target language that is one of the followings: \"\n            f\"{', '.join(SUPPORTED_LANGUAGES)}. \"\n            \"This can be empty when the source language is 'English|Japanese'.\"\n        ),\n    )\n\n\ndef construct_llm_input(request: TranslateRequest) -> List[Message]:\n    \"\"\"Construct the input for the LLM from messages and languages\"\"\"\n\n    # If it has already been constructed messages with lang=* part, return it as is\n    if request.source_language == \"\" and request.target_language == \"\":\n        return request.messages\n\n    if request.source_language != \"\":\n        source_text = request.messages[-1].content.strip()\n        request.messages[-1].content = f\"input lang={request.source_language}\\n\" + source_text\n    if request.target_language != \"\":\n        request.messages.append(Message(role=\"user\", content=f\"output lang={request.target_language}\\n\"))\n    else:\n        request.messages.append(Message(role=\"user\", content=\"output\\n\"))\n\n    return request.messages\n"
  },
  {
    "path": "src/plamo_translate/servers/warnings.py",
    "content": "import contextlib\nimport re\nimport warnings\nfrom collections.abc import Iterator\n\nOPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES = (\n    \"mamba_ssm could not be imported\",\n    \"causal_conv1d could not be imported\",\n)\n\n\n@contextlib.contextmanager\ndef suppress_optional_gpu_dependency_warnings() -> Iterator[None]:\n    \"\"\"Hide known optional dependency warnings emitted by remote model code.\"\"\"\n    with warnings.catch_warnings():\n        for message in OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES:\n            warnings.filterwarnings(\n                action=\"ignore\",\n                message=rf\"^{re.escape(message)}$\",\n                category=UserWarning,\n            )\n        yield\n\n\ndef build_optional_gpu_dependency_warning_options() -> list[str]:\n    \"\"\"Build `python -W` options that suppress known optional dependency warnings.\"\"\"\n    options: list[str] = []\n    for message in OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES:\n        options.extend([\"-W\", f\"ignore:{message}:UserWarning\"])\n    return options\n"
  },
  {
    "path": "tests/test_cli.py",
    "content": "import http.server\nimport multiprocessing\nimport os\nimport socket\nimport socketserver\nimport subprocess\nimport time\n\nimport pytest\n\nfrom plamo_translate.main import check_server_running\nfrom plamo_translate.servers.utils import PLAMO_TRANSLATE_CLI_SERVER_START_PORT, update_config\n\nCLI_TIMEOUT_SECONDS = int(os.environ.get(\"PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS\", \"20\"))\nSERVER_STARTUP_TIMEOUT_SECONDS = int(os.environ.get(\"PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS\", \"10\"))\n\n\n@pytest.fixture(autouse=True)\ndef isolated_test_environment(monkeypatch, tmp_path):\n    monkeypatch.setenv(\n        \"PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER\",\n        os.environ.get(\"PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER\", \"1\"),\n    )\n    monkeypatch.setenv(\"TMPDIR\", str(tmp_path))\n\n\ndef wait_for_server_ready(timeout: int = SERVER_STARTUP_TIMEOUT_SECONDS) -> None:\n    deadline = time.monotonic() + timeout\n    while time.monotonic() < deadline:\n        if check_server_running():\n            return\n        time.sleep(0.1)\n\n    raise AssertionError(\"Timed out waiting for the MCP server to become ready.\")\n\n\ndef wait_for_port_in_use(port: int, timeout: int = SERVER_STARTUP_TIMEOUT_SECONDS) -> None:\n    deadline = time.monotonic() + timeout\n    while time.monotonic() < deadline:\n        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:\n            if sock.connect_ex((\"127.0.0.1\", port)) == 0:\n                return\n        time.sleep(0.1)\n\n    raise AssertionError(f\"Timed out waiting for port {port} to start accepting connections.\")\n\n\ndef test_update_config_without_kwargs_is_read_only(tmp_path):\n    config_path = tmp_path / \"plamo-translate-config.json\"\n\n    assert update_config() == {}\n    assert not config_path.exists(), \"Read-only access should not create the config file\"\n\n    initial_config = {\"port\": PLAMO_TRANSLATE_CLI_SERVER_START_PORT}\n    update_config(**initial_config)\n    initial_contents = config_path.read_text()\n\n    assert update_config() == initial_config\n    assert config_path.read_text() == initial_contents, \"Read-only access should not rewrite the config file\"\n\n\ndef stop_subprocess(process: subprocess.Popen[str] | None) -> None:\n    if process is None:\n        return\n\n    process.terminate()\n    try:\n        process.wait(timeout=5)\n    except subprocess.TimeoutExpired:\n        process.kill()\n        process.wait(timeout=5)\n\n\ndef stop_multiprocess(process: multiprocessing.Process | None) -> None:\n    if process is None:\n        return\n\n    process.terminate()\n    process.join(timeout=5)\n    if process.is_alive():\n        process.kill()\n        process.join(timeout=5)\n\n\ndef test_plamo_translate_without_server():\n    text_to_translate = \"Proud, but humble\"\n    command = [\"plamo-translate\", \"--from\", \"English\", \"--to\", \"Japanese\", \"--input\", text_to_translate]\n    result = subprocess.run(command, capture_output=True, text=True, timeout=CLI_TIMEOUT_SECONDS)\n    assert result.returncode == 0\n    assert \"誇り高\" in result.stdout and \"謙虚\" in result.stdout\n\n\ndef test_plamo_translate_server_simple_use():\n    first_process = None\n    try:\n        command = [\"plamo-translate\", \"server\"]\n        first_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n        wait_for_server_ready()\n\n        config = update_config()\n        print(f\"Server started with config: {config}\")\n        assert \"port\" in config, \"Server configuration should include a port\"\n        port = config[\"port\"]\n        assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT, f\"Expected server port to be 8000, got {port}\"\n\n        text_to_translate = \"Proud, but humble\"\n        result = subprocess.run(\n            [\"plamo-translate\", \"--input\", text_to_translate, \"--from\", \"English\", \"--to\", \"Japanese\"],\n            capture_output=True,\n            text=True,\n            timeout=CLI_TIMEOUT_SECONDS,\n        )\n        assert \"誇り高い\" in result.stdout and \"謙虚\" in result.stdout\n\n        result = subprocess.run(\n            [\"plamo-translate\", \"--from\", \"English\", \"--to\", \"Japanese\"],\n            input=text_to_translate,\n            capture_output=True,\n            text=True,\n            timeout=CLI_TIMEOUT_SECONDS,\n        )\n        assert \"誇り高い\" in result.stdout and \"謙虚\" in result.stdout\n    finally:\n        stop_subprocess(first_process)\n\n\ndef test_plamo_translate_server_already_running():\n    first_process = None\n    second_process = None\n    try:\n        command = [\"plamo-translate\", \"server\"]\n        first_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n        print(\"Starting first plamo-translate server process...\")\n        wait_for_server_ready()\n        print(\"First server process started successfully.\")\n\n        # If the server is already running, the further call of `plamo-translate server` should not start a new server.\n        second_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n        stdout, _ = second_process.communicate(timeout=CLI_TIMEOUT_SECONDS)\n        print(stdout.strip())\n        assert \"MCP server is already running\" in stdout\n        config = update_config()\n        print(f\"Server started with config: {config}\")\n        assert \"port\" in config, \"Server configuration should include a port\"\n        port = config[\"port\"]\n        assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT, f\"Expected server port to be 8000, got {port}\"\n    finally:\n        stop_subprocess(first_process)\n        stop_subprocess(second_process)\n\n\ndef start_http_server():\n    port = PLAMO_TRANSLATE_CLI_SERVER_START_PORT\n    handler = http.server.SimpleHTTPRequestHandler\n    with socketserver.TCPServer((\"127.0.0.1\", port), handler) as httpd:\n        httpd.serve_forever()\n\n\ndef test_plamo_translate_server_find_new_port():\n    http_server_process = None\n    mcp_server_process = None\n    try:\n        http_server_process = multiprocessing.Process(target=start_http_server, daemon=True)\n        http_server_process.start()\n        print(f\"HTTP server started on port {PLAMO_TRANSLATE_CLI_SERVER_START_PORT}\")\n        wait_for_port_in_use(PLAMO_TRANSLATE_CLI_SERVER_START_PORT)\n\n        # The default port is used by the HTTP server, so the MCP server should use a different port\n        command = [\"plamo-translate\", \"server\"]\n        mcp_server_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n        print(\"Starting plamo-translate server...\")\n        wait_for_server_ready()\n        stop_subprocess(mcp_server_process)\n        mcp_server_process = None\n\n        config = update_config()\n        print(f\"Server started with config: {config}\")\n        assert \"port\" in config, \"Server configuration should include a port\"\n        port = config[\"port\"]\n        assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT + 1, (\n            f\"Expected server port to be {PLAMO_TRANSLATE_CLI_SERVER_START_PORT + 1}, got {port}\"\n        )\n    finally:\n        stop_multiprocess(http_server_process)\n        stop_subprocess(mcp_server_process)\n\n\ndef test_plamo_translate_server_interactive():\n    mcp_server_process = None\n    client_process = None\n    try:\n        command = [\"plamo-translate\", \"server\"]\n        mcp_server_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n        wait_for_server_ready()\n        config = update_config()\n        print(f\"Server started with config: {config}\")\n        assert \"port\" in config, \"Server configuration should include a port\"\n        port = config[\"port\"]\n        assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT, (\n            f\"Expected server port to be {PLAMO_TRANSLATE_CLI_SERVER_START_PORT}, got {port}\"\n        )\n\n        client_command = [\"plamo-translate\", \"-i\", \"--from\", \"English\", \"--to\", \"Japanese\"]\n        client_process = subprocess.Popen(\n            client_command,\n            stdin=subprocess.PIPE,\n            stdout=subprocess.PIPE,\n            stderr=subprocess.PIPE,\n            text=True,\n        )\n\n        all_inputs = \"\\n\".join([\"Proud, but humble\", \"Boldly do what no one has done before\"]) + \"\\n\"\n\n        stdout, stderr = client_process.communicate(input=all_inputs, timeout=CLI_TIMEOUT_SECONDS)\n        assert \"誇り高\" in stdout and \"謙虚\" in stdout\n        assert \"大胆に\" in stdout\n    finally:\n        stop_subprocess(mcp_server_process)\n        stop_subprocess(client_process)\n"
  },
  {
    "path": "tests/test_cli_integration.py",
    "content": "import os\nimport subprocess\nimport time\n\nimport pytest\n\nfrom plamo_translate.main import check_server_running\nfrom plamo_translate.servers.utils import PLAMO_TRANSLATE_CLI_SERVER_START_PORT, update_config\n\nCLI_TIMEOUT_SECONDS = int(os.environ.get(\"PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS\", \"900\"))\nSERVER_STARTUP_TIMEOUT_SECONDS = int(os.environ.get(\"PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS\", \"900\"))\n\n\n@pytest.fixture(autouse=True)\ndef integration_test_environment(monkeypatch, tmp_path):\n    monkeypatch.setenv(\n        \"PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER\",\n        os.environ.get(\"PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER\", \"0\"),\n    )\n    monkeypatch.setenv(\"TMPDIR\", str(tmp_path))\n\n\ndef wait_for_server_ready(timeout: int = SERVER_STARTUP_TIMEOUT_SECONDS) -> None:\n    deadline = time.monotonic() + timeout\n    while time.monotonic() < deadline:\n        if check_server_running():\n            return\n        time.sleep(0.5)\n\n    raise AssertionError(\"Timed out waiting for the MCP server to become ready.\")\n\n\ndef stop_subprocess(process: subprocess.Popen[str] | None) -> None:\n    if process is None:\n        return\n\n    process.terminate()\n    try:\n        process.wait(timeout=5)\n    except subprocess.TimeoutExpired:\n        process.kill()\n        process.wait(timeout=5)\n\n\ndef test_plamo_translate_server_roundtrip_with_real_model():\n    server_process = None\n    try:\n        server_process = subprocess.Popen(\n            [\"plamo-translate\", \"server\"],\n            stdout=subprocess.PIPE,\n            stderr=subprocess.PIPE,\n            text=True,\n        )\n        wait_for_server_ready()\n\n        config = update_config()\n        assert config.get(\"port\") == PLAMO_TRANSLATE_CLI_SERVER_START_PORT\n\n        text_to_translate = \"Proud, but humble\"\n        result = subprocess.run(\n            [\"plamo-translate\", \"--input\", text_to_translate, \"--from\", \"English\", \"--to\", \"Japanese\"],\n            capture_output=True,\n            text=True,\n            timeout=CLI_TIMEOUT_SECONDS,\n        )\n        assert result.returncode == 0\n        assert \"誇り高\" in result.stdout and \"謙虚\" in result.stdout\n\n        result = subprocess.run(\n            [\"plamo-translate\", \"--from\", \"English\", \"--to\", \"Japanese\"],\n            input=text_to_translate,\n            capture_output=True,\n            text=True,\n            timeout=CLI_TIMEOUT_SECONDS,\n        )\n        assert result.returncode == 0\n        assert \"誇り高\" in result.stdout and \"謙虚\" in result.stdout\n    finally:\n        stop_subprocess(server_process)\n"
  },
  {
    "path": "tests/test_warning_filters.py",
    "content": "import warnings\n\nfrom plamo_translate.servers.warnings import (\n    OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES,\n    build_optional_gpu_dependency_warning_options,\n    suppress_optional_gpu_dependency_warnings,\n)\n\n\ndef test_build_optional_gpu_dependency_warning_options():\n    assert build_optional_gpu_dependency_warning_options() == [\n        \"-W\",\n        \"ignore:mamba_ssm could not be imported:UserWarning\",\n        \"-W\",\n        \"ignore:causal_conv1d could not be imported:UserWarning\",\n    ]\n\n\ndef test_suppress_optional_gpu_dependency_warnings_only_hides_known_messages():\n    with warnings.catch_warnings(record=True) as captured:\n        warnings.simplefilter(\"always\")\n        with suppress_optional_gpu_dependency_warnings():\n            for message in OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES:\n                warnings.warn(message, UserWarning, stacklevel=1)\n            warnings.warn(\"unexpected warning\", UserWarning, stacklevel=1)\n\n    assert [str(item.message) for item in captured] == [\"unexpected warning\"]\n"
  }
]