Repository: pfnet/plamo-translate-cli
Branch: main
Commit: 840de559ee22
Files: 22
Total size: 73.5 KB

Directory structure:
gitextract_mzvrmcnd/

├── .github/
│   └── workflows/
│       └── ci.yml
├── .gitignore
├── LICENSE
├── README.md
├── pyproject.toml
├── scripts/
│   ├── convert_dataset.py
│   └── deploy.sh
├── src/
│   └── plamo_translate/
│       ├── __init__.py
│       ├── assets/
│       │   └── chat_template.jinja2
│       ├── clients/
│       │   ├── __init__.py
│       │   └── translate.py
│       ├── main.py
│       └── servers/
│           ├── __init__.py
│           ├── mlx/
│           │   ├── __init__.py
│           │   └── server.py
│           ├── mock/
│           │   ├── __init__.py
│           │   └── server.py
│           ├── utils.py
│           └── warnings.py
└── tests/
    ├── test_cli.py
    ├── test_cli_integration.py
    └── test_warning_filters.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .github/workflows/ci.yml
================================================
name: CI

on:
  push:
  pull_request:

jobs:
  mock-test:
    runs-on: ubuntu-latest
    timeout-minutes: 15
    strategy:
      matrix:
        python-version: ['3.10', '3.11', '3.12', '3.13', '3.14']
    env:
      PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER: '1'
      PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS: '20'
      PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS: '10'
      UV_CACHE_DIR: .cache/uv
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Cache uv downloads
        uses: actions/cache@v4
        with:
          path: ${{ env.UV_CACHE_DIR }}
          key: ${{ runner.os }}-uv-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}
          restore-keys: |
            ${{ runner.os }}-uv-py${{ matrix.python-version }}-
            ${{ runner.os }}-uv-
      - name: Install uv
        run: curl -LsSf https://astral.sh/uv/install.sh | sh
      - name: Add uv to PATH
        run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Sync dependencies
        run: uv sync
      - name: Run tests
        run: uv run pytest -s tests/test_cli.py

  mlx-integration-test:
    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
    runs-on: macos-latest
    timeout-minutes: 45
    env:
      HF_HOME: .cache/hf
      HUGGINGFACE_HUB_CACHE: .cache/hf/hub
      PLAMO_TRANSLATE_CLI_MODEL_NAME: mlx-community/plamo-2-translate
      PLAMO_TRANSLATE_CLI_MODEL_CACHE_KEY: plamo-2-translate
      PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER: '0'
      PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS: '900'
      PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS: '900'
      UV_CACHE_DIR: .cache/uv
    steps:
      - uses: actions/checkout@v4
      - name: Set up Python 3.13
        uses: actions/setup-python@v5
        with:
          python-version: '3.13'
      - name: Cache uv downloads
        uses: actions/cache@v4
        with:
          path: ${{ env.UV_CACHE_DIR }}
          key: ${{ runner.os }}-uv-py3.13-${{ hashFiles('uv.lock') }}
          restore-keys: |
            ${{ runner.os }}-uv-py3.13-
            ${{ runner.os }}-uv-
      - name: Cache Hugging Face models
        uses: actions/cache@v4
        with:
          path: ${{ env.HUGGINGFACE_HUB_CACHE }}
          key: ${{ runner.os }}-hf-${{ env.PLAMO_TRANSLATE_CLI_MODEL_CACHE_KEY }}-v1
          restore-keys: |
            ${{ runner.os }}-hf-${{ env.PLAMO_TRANSLATE_CLI_MODEL_CACHE_KEY }}-
            ${{ runner.os }}-hf-
      - name: Install uv
        run: curl -LsSf https://astral.sh/uv/install.sh | sh
      - name: Add uv to PATH
        run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH
      - name: Sync dependencies
        run: uv sync
      - name: Run integration tests
        run: uv run pytest -s tests/test_cli_integration.py


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
#   For a library or package, you might want to ignore these files since the code is
#   intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
#   However, in case of collaboration, if having platform-specific dependencies or dependencies
#   having no cross-platform support, pipenv may install dependencies that don't work, or not
#   install all needed dependencies.
#Pipfile.lock

# UV
#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#uv.lock

# poetry
#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
#   This is especially recommended for binary packages to ensure reproducibility, and is more
#   commonly ignored for libraries.
#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
#   in version control.
#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
#  and can be added to the global gitignore or merged into this file.  For a more nuclear
#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# Ruff stuff:
.ruff_cache/

# PyPI configuration file
.pypirc


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in the appropriate
      comment syntax for the file format. We also recommend that a
      file or class name and description of purpose be included on the
      same "printed page" as the copyright notice for easier
      identification within third-party archives.

   Copyright [yyyy] [name of copyright owner]

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.


================================================
FILE: README.md
================================================
# plamo-translate-cli

A command-line interface for translation using the plamo-2-translate model with local execution.

## Features

- Translate text between 16+ languages including Japanese, English, Chinese, Korean, and more
- Simple command-line interface for easy integration into scripts and workflows
- Supports various server backends (MLX, with planned support for Ollama and vLLM)
  - Currently, optimized for macOS with Apple Silicon using MLX framework

## Installation

### For macOS

`plamo-translate` currently installs on Python 3.10 through 3.14 on macOS.
No additional workaround is required for `sentencepiece` on Python 3.13 or 3.14 with current upstream releases.

```sh
pip install plamo-translate
```

#### [`uv tool`](https://docs.astral.sh/uv/concepts/tools/)

If you use [`uv`](https://github.com/astral-sh/uv) as a package manager rather than `pip`, you can install `plamo-translate` into an isolated environment:

```sh
uv tool install -p 3.14 plamo-translate
```

## Development

```sh
uv sync
source .venv/bin/activate
```

## Requirements

- Python 3.10 through 3.14
  - Common dependencies:
    - mcp[cli]
    - numba
  - On macOS:
    - mlx-lm

## Usage

### Basic usage

You can specify the input and output language by giving `--from` and `--to` options.
If you don't specify them, the input/output language will be automatically selected from English or Japanese.

#### Interactive mode

```sh
$ plamo-translate
Loading models...done!
Interactive mode enabled. Type your input below (Ctrl+D to exit).
> こんにちは、お元気ですか？
Hello, how are you?
> 「お腹減った〜何食べたい？」「私はうなぎ！」
"I'm hungry! What do you want to eat?" "I want eel!"
> You translate ambiguous expression in Japanese into English very well.
あなたは日本語の曖昧な表現を英語に翻訳するのがとても上手です。
```

#### Pipe mode

```sh
$ cat file.txt | plamo-translate
The virtual worlds of the internet have experienced remarkable technological advancement. Meanwhile, the real world still contains numerous areas where technology has yet to make significant inroads, with many inefficient manual tasks and dangerous work still requiring human intervention. This situation stems from the fact that conventional technology has struggled to adapt to the dynamic changes and diverse conditions of the real world.

PFN's core strengths lie in machine learning and deep learning technologies, which demonstrate exceptional flexibility in handling uncertainty and have the potential to create significant impact in the real world. For example, by applying deep learning technologies to robots that excel at repetitive tasks, we can enable them to make more human-like flexible judgments and perform complex tasks.

To create meaningful impact in the real world, it's essential to push the boundaries of cutting-edge technology and research application domains where technological innovation can create tangible change. For these purposes, PFN assembles a team of exceptionally talented professionals with diverse expertise.
```

#### Server mode

First, launch the server:

```sh
$ plamo-translate server
```

Then, use the client mode:

```sh
$ plamo-translate --input '家計は火の車だ'
Our household is in financial trouble.
```

You can also use the interactive mode with the server:

```sh
$ plamo-translate
Loading models...done!
Interactive mode enabled. Type your input below (Ctrl+D to exit).
> 家計は火の車だ
Our household is in financial trouble.
```

It can skip the loading time of the model, so it is useful when you want to use this tool frequently.

### Using from MCP Client

The `plamo-translate server` command starts an MCP (Model Context Protocol) server. This allows `plamo-translate` to be used as a tool in other applications that support MCP, such as Claude Desktop.

Here, we introduce how to use `plamo-translate` with Claude Desktop, which is a popular MCP client.

1.  Start the `plamo-translate` server:
    ```sh
    plamo-translate server
    ```
2.  In a new terminal, run the following command to display the MCP configuration for Claude Desktop:
    ```sh
    plamo-translate show-claude-config
    ```
    and you will see the configuration in JSON format as follows:
    ```json
    {
      "mcpServers": {
        "plamo-translate": {
          "command": "/Users/shunta/.linuxbrew/bin/npx",
          "args": [
            "-y",
            "mcp-remote",
            "http://localhost:8000/mcp",
            "--allow-http",
            "--transport",
            "http-only"
          ],
          "env": {
            "PATH": "[THE SAME STRING AS YOUR CURRENT PATH ENVIRONMENT VARIABLE]",
          }
        }
      }
    }
    ```
3.  Copy the outputted configuration.
4.  Paste this configuration into your Claude Desktop's MCP configuration file (on macOS, this is typically located at `~/Library/Application Support/Claude/claude_desktop_config.json`).

Once configured, you can use `plamo-translate` directly from Claude Desktop.

#### Select precision of the model weight

You can specify the precision of the model weight by giving a `--precision` option.

```sh
$ plamo-translate server --precision 8bit
```
## Supported Languages

- Japanese
- Japanese(easy)
- English

### Experimentally Supported Languages

- Chinese
- Taiwanese
- Korean
- Arabic
- Italian
- Indonesian
- Dutch
- Spanish
- Thai
- German
- French
- Vietnamese
- Russian

## Server Backends

- mlx: Optimized for macOS with Apple Silicon (default on macOS)

## Options

- --input TEXT Input text to translate
- --from TEXT Input language for translation (default: English)
- --to TEXT Output language for translation (default: Japanese)
- --precision Model weight precision to use. You can select from: [4bit, 8bit, bf16] (default: 4bit)

## Configuration

You can configure the following parameters using environment variables:

- `PLAMO_TRANSLATE_CLI_SERVER_START_PORT`: Specifies the starting port number for the server.
- `PLAMO_TRANSLATE_CLI_SERVER_END_PORT`: Specifies the ending port number for the server.
- `PLAMO_TRANSLATE_CLI_TEMP`: Sets the temperature for text generation.
- `PLAMO_TRANSLATE_CLI_TOP_P`: Sets the top-p (nucleus) sampling probability.
- `PLAMO_TRANSLATE_CLI_TOP_K`: Sets the top-k sampling number.
- `PLAMO_TRANSLATE_CLI_REPETITION_PENALTY`: Sets the repetition penalty.
- `PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE`: Sets the context size for repetition penalty.

## Deploy

```sh
bash scripts/deploy.sh
```


================================================
FILE: pyproject.toml
================================================
[project]
name = "plamo-translate"
version = "1.0.5"
description = "A command-line interface for translation using the plamo-2-translate model with local execution."
readme = "README.md"
authors = [
    { name = "Shunta Saito", email = "shunta@preferred.jp" },
]
requires-python = ">=3.10"
keywords = [
    "machine translation",
    "transformer",
    "nlp",
    "natural language processing",
    "deep learning",
    "mlx",
    "mlx-lm",
    "sentencepiece",
    "plamo",
    "plamo-translate",
    "plamo-translate-cli",
]
license = "Apache-2.0"
classifiers = [
    "Development Status :: 5 - Production/Stable",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: Apache Software License",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Programming Language :: Python :: 3.13",
    "Programming Language :: Python :: 3.14",
    "Operating System :: MacOS",
    "Topic :: Utilities",
    "Environment :: MacOS X",
    "Environment :: Console",
]
dependencies = [
    "mcp[cli]>=1.9.2",
    "numba>=0.60.0",
    "mlx-lm>=0.26.3 ; sys_platform == 'darwin'",
]

[tool.uv]
override-dependencies = ["transformers<5"]

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build]
exclude = ["tests", "docs", "examples", "scripts", "notebooks", "data", "tmp"]

[tool.hatch.build.targets.wheel]
compression = "stored"

[tool.hatch.build.targets.sdist]
compress-level = 0

[project.scripts]
plamo-translate = "plamo_translate.main:main"

[tool.ruff]
line-length = 120

[dependency-groups]
dev = [
    "build>=1.2.2.post1",
    "ipdb>=0.13.13",
    "pytest>=8.4.0",
    "ruff>=0.11.12",
    "twine>=6.1.0",
    "wheel>=0.45.1",
]


================================================
FILE: scripts/convert_dataset.py
================================================
import argparse
import json
from pathlib import Path

from datasets import Dataset
from jinja2 import Template
from mlx_lm.tokenizer_utils import load_tokenizer
from mlx_lm.tuner import datasets

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--tokenizer-path", type=str)
    parser.add_argument("--dataset-jsonl-path", type=str)
    parser.add_argument("--chat-template-path", type=str)
    parser.add_argument("--pack-length", type=int, default=640)
    args = parser.parse_args()

    tokenizer_path = args.tokenizer_path
    dataset_jsonl_path = args.dataset_jsonl_path
    chat_template_path = args.chat_template_path

    tokenizer = load_tokenizer(Path(tokenizer_path))

    with open(chat_template_path, "r") as f:
        chat_template = Template(f.read())

    with open(dataset_jsonl_path, "r") as f:
        lines = [json.loads(line) for line in f.readlines()]

    dataset = []
    prompts = []
    n_toks = []
    current_n_toks = 0
    for line in lines:
        for input_text, output_text in zip(line["input"]["content"], line["output"]["content"]):
            try:
                if "\n" in input_text:
                    input_text_str = input_text.split("\n")[1].strip()
                else:
                    input_text_str = input_text.strip()
            except Exception:
                print(input_text)
                import ipdb

                ipdb.set_trace()
            try:
                if "\n" in output_text:
                    output_text_str = output_text.split("\n")[1].strip()
                else:
                    output_text_str = output_text.strip()
            except Exception:
                print(output_text)
                import ipdb

                ipdb.set_trace()  # fmt: skip
            prompt = chat_template.render(
                messages=[
                    {"role": "user", "content": f"input lang={line['input']['lang']}\n{input_text_str}"},
                    {"role": "user", "content": f"output lang={line['output']['lang']}\n{output_text_str}"},
                ]
            )

            n_tok = len(tokenizer.encode(prompt.strip()))
            n_toks.append(n_tok)
            if current_n_toks + n_tok + 1 > args.pack_length:
                text = "<|plamo:bos|>".join(prompts) + "<|plamo:bos|>"
                n_pad = args.pack_length - len(tokenizer.encode(text))
                if n_pad > 0:
                    text += "<|plamo:pad|>" * n_pad
                dataset.append({"text": text})
                prompts = [prompt.strip()]
                current_n_toks = n_tok
            else:
                prompts.append(prompt.strip())
                current_n_toks += n_tok

    print(f"Max tokens in a batch: {max(n_toks)}")
    dataset = Dataset.from_list(dataset)
    dataset.save_to_disk("tmp/calibration_dataset")


================================================
FILE: scripts/deploy.sh
================================================
rm -rf dist
ARCHFLAGS="-arch arm64" MACOSX_DEPLOYMENT_TARGET="11.0" \
uv build --wheel

WHEEL_FILENAME=$(ls dist/plamo_translate-*.whl)
uv run -m wheel tags \
    --python-tag py3 \
    --abi-tag none \
    --platform-tag macosx_11_0_arm64 \
    ${WHEEL_FILENAME}
rm -rf ${WHEEL_FILENAME}

uv run twine upload dist/*

================================================
FILE: src/plamo_translate/__init__.py
================================================
"""PLaMo Translate CLI package."""

__version__ = "1.0.5"


================================================
FILE: src/plamo_translate/assets/chat_template.jinja2
================================================
{{- "<|plamo:op|>dataset\ntranslation\n" -}}
{% for message in messages %}
    {{- '<|plamo:op|>' + message['content']}}
    {%- if not loop.last %}
        {{- '\n'}}
    {%- endif %}
{% endfor %}

================================================
FILE: src/plamo_translate/clients/__init__.py
================================================


================================================
FILE: src/plamo_translate/clients/translate.py
================================================
import asyncio
import logging
from typing import AsyncGenerator, Dict, List
from urllib.parse import urlunparse

import mcp.types as types
from mcp.client.session import ClientSession
from mcp.client.streamable_http import streamablehttp_client
from mcp.shared.session import RequestResponder
from mcp.types import TextContent

from plamo_translate.servers.utils import Message, TranslateRequest, update_config

logger = logging.getLogger(__name__)


async def message_handler(
    message: RequestResponder[types.ServerRequest, types.ClientResult] | types.ServerNotification | Exception,
) -> None:
    if isinstance(message, Exception):
        logger.error("Error: %s", message)
        return


class MCPClient:
    def __init__(self, stream: bool) -> None:
        """Initialize the MCP client.

        Args:
            stream (bool): Whether to stream the translation results.
        """
        self.stream = stream
        self.config = update_config()

        port = self.config.get("port", None)
        if port is None:
            raise ValueError("Port is not set in the configuration. Please start the MCP server first.")
        self.url = urlunparse(("http", f"127.0.0.1:{port}", "mcp", "", "", ""))

    async def translate(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]:
        """Translate messages. If stream=True, yields chunks as they arrive."""
        async with streamablehttp_client(self.url) as (
            read_stream,
            write_stream,
            get_session_id_callback,
        ):
            async with ClientSession(
                read_stream=read_stream,
                write_stream=write_stream,
                message_handler=message_handler,
            ) as session:
                await session.initialize()

                messages_obj = [Message(**message) for message in messages]
                request = TranslateRequest(messages=messages_obj, source_language="", target_language="")

                if self.stream:
                    # For streaming, we'll need to handle the response differently
                    # This will yield chunks as they arrive
                    async for chunk in self._translate_stream(session, request):
                        yield chunk
                else:
                    # The messages should already have source and target languages, so omit to specify them again
                    response = await session.call_tool(
                        "plamo-translate",
                        arguments={
                            "request": request,
                            "stream": False,
                        },
                    )

                    # Extract text from response content
                    if response.content and len(response.content) > 0:
                        content = response.content[0]
                        if isinstance(content, TextContent):
                            yield content.text
                        else:
                            raise ValueError(f"Unexpected content type: {type(content)}")
                    else:
                        raise ValueError("Empty response from translation tool")

    async def _translate_stream(self, session: ClientSession, request: TranslateRequest):
        """Handle streaming translation responses."""
        # Use a queue to pass messages from progress_handler to the generator
        message_queue: asyncio.Queue[str] = asyncio.Queue()
        call_complete = asyncio.Event()

        async def progress_handler(progress: float, total: float | None, message: str | None) -> None:
            """Handle progress updates which might contain partial translations."""
            if message:
                await message_queue.put(message)

        async def call_tool_wrapper():
            """Wrapper to call the tool and signal completion"""
            try:
                response = await session.call_tool(
                    "plamo-translate",
                    arguments={
                        "request": request,
                        "stream": True,
                    },
                    progress_callback=progress_handler,
                )
                # Put the final response in the queue if needed
                if response.content and len(response.content) > 0:
                    content = response.content[0]
                    if isinstance(content, TextContent):
                        await message_queue.put(content.text)
            finally:
                call_complete.set()

        # Start the tool call in the background
        asyncio.create_task(call_tool_wrapper())

        # Yield messages as they arrive
        chunks = []
        while not call_complete.is_set() or not message_queue.empty():
            try:
                message = await asyncio.wait_for(message_queue.get(), timeout=0.1)
                chunks.append(message)
                yield message
            except asyncio.TimeoutError:
                # No message available, continue waiting
                continue


================================================
FILE: src/plamo_translate/main.py
================================================
#!/usr/bin/env python3
import argparse
import asyncio
import atexit
import json
import logging
import multiprocessing
import os
import readline
import signal
import subprocess
import sys
import time
from pathlib import Path
from typing import Dict, List

from plamo_translate import __version__
from plamo_translate.clients import translate
from plamo_translate.servers.utils import (
    PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE,
    PLAMO_TRANSLATE_CLI_REPETITION_PENALTY,
    SUPPORTED_LANGUAGES,
    update_config,
    verify_mcp_server_ready,
)

os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"

logger = logging.getLogger(__name__)


def start_mcp_server(backend_type: str, log_level: str, show_progress: bool = False) -> None:
    # To avoid showing warnings related to resource_tracker
    signal.signal(signal.SIGTERM, lambda _signal_number, _frame: exit(0))
    if os.environ.get("PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER") == "1":
        from plamo_translate.servers.mock import server as mock_server

        server = mock_server.PLaMoTranslateServer(log_level=log_level, show_progress=show_progress)
        try:
            server.run(transport="streamable-http")
        except Exception as e:
            print(f"Error during server running: {e}")
    elif backend_type == "mlx":
        from plamo_translate.servers.mlx import server as mlx_server

        server = mlx_server.PLaMoTranslateServer(log_level=log_level, show_progress=show_progress)
        try:
            server.run(transport="streamable-http")
        except Exception as e:
            print(f"Error during server running: {e}")
    else:
        raise ValueError(f"Unsupported backend type: {backend_type}")


def check_server_running() -> bool:
    config = update_config()
    if "port" not in config:
        return False
    port = config["port"]
    tools = asyncio.run(verify_mcp_server_ready(port))
    if "plamo-translate" in tools:
        return True
    return False


def wait_for_server_ready() -> None:
    while not check_server_running():
        time.sleep(0.1)


async def print_translation(
    client: translate.MCPClient, messages: List[Dict[str, str]], stream: bool
) -> List[Dict[str, str]]:
    async for result in client.translate(messages):
        if not stream:
            print(result, end="", flush=True)
        else:
            messages[-1]["content"] += result
            print(result, end="", flush=True)

    return messages


def run_translate(args: argparse.Namespace) -> None:
    from_lang = args.from_lang
    if from_lang != "":
        from_lang = f" lang={from_lang}"

    to = args.to
    if to != "":
        to = f" lang={to}"

    backend_type = args.backend_type
    stream = args.stream

    if args.input is None and not args.interactive:
        input_text = sys.stdin.read()
        args.input = input_text
    else:
        input_text = args.input

    messages: List[Dict[str, str]] = []

    if not check_server_running():
        if args.interactive:
            show_progress = True
        else:
            show_progress = False
        server = multiprocessing.Process(
            target=start_mcp_server,
            args=(backend_type, "CRITICAL", show_progress),
            daemon=True,
        )
        server.start()
        wait_for_server_ready()

    client = translate.MCPClient(stream=stream)

    try:
        if args.interactive:
            history_file = Path.home() / ".plamo_translate_history"
            if not history_file.exists():
                history_file.touch()
            try:
                readline.read_history_file(history_file)
                readline.set_history_length(-1)
            except Exception:
                print(f"History file {history_file} not found. Starting a new history file.")
            atexit.register(readline.write_history_file, history_file)
            print("Interactive mode enabled. Type your input below (Ctrl+D to exit).")

            while True:
                try:
                    input_text = input("> ")
                    if input_text.strip() == "":
                        continue

                    messages.append(
                        {
                            "role": "user",
                            "content": f"input{from_lang}\n{input_text}",
                        },
                    )
                    messages.append(
                        {
                            "role": "user",
                            "content": f"output{to}\n",
                        },
                    )
                    messages = asyncio.run(print_translation(client, messages, stream=args.stream))

                except KeyboardInterrupt:
                    print("\nTranslation interrupted by user (Ctrl+C).")
                    sys.exit(0)
                    break
                except EOFError:
                    print("\nCtrl+D received. Exiting.")
                    sys.exit(0)
                    break

        else:
            # Non-interactive mode: translate the input once
            messages.append(
                {
                    "role": "user",
                    "content": f"input{from_lang}\n{input_text}",
                },
            )
            messages.append(
                {
                    "role": "user",
                    "content": f"output{to}\n",
                },
            )
            asyncio.run(print_translation(client, messages, stream=args.stream))

    except Exception as e:
        raise e

    finally:
        sys.exit(0)


def main() -> None:
    global_parser = argparse.ArgumentParser(add_help=False)
    global_parser.add_argument(
        "--version",
        "-v",
        action="version",
        version="%(prog)s {version}".format(version=__version__),
        help="Show program's version number and exit.",
    )

    # Add arguments for the default command (translate)
    # These will be used if no subcommand is provided
    global_parser.add_argument("--input", type=str, help="Input text to translate", default=None)
    global_parser.add_argument(
        "--from",
        type=str,
        help="Input language for translation",
        default="English|Japanese",
        choices=SUPPORTED_LANGUAGES,
        dest="from_lang",
    )
    global_parser.add_argument(
        "--to",
        type=str,
        help="Output language for translation",
        default="",
        choices=SUPPORTED_LANGUAGES + [""],
    )
    global_parser.add_argument(
        "--backend-type",
        type=str,
        default="mlx",
        choices=["mlx"],
        help="Server backend to use (default: mlx on macOS, transformers elsewhere)",
    )
    global_parser.add_argument(
        "--precision",
        "-p",
        type=str,
        default="4bit",
        choices=["4bit", "8bit", "bf16"],
        help="Model parameter's precision to use (default: 4bit)",
    )
    global_parser.add_argument(
        "--no-stream",
        action="store_true",
        help="Enable batch processing mode for translation",
    )
    global_parser.add_argument(
        "--interactive",
        "-i",
        action="store_true",
        help="Enable interactive mode for translation",
    )

    # Create the parser for the "server" command
    parser = argparse.ArgumentParser(description="PLaMo Translate CLI", parents=[global_parser])

    subparsers = parser.add_subparsers(dest="command", help="Command to run")
    _ = subparsers.add_parser("server", help="Run the server", parents=[global_parser])
    _ = subparsers.add_parser(
        "show-claude-config", help="Show the MCP server config for Claude Desktop", parents=[global_parser]
    )

    args = parser.parse_args()

    # Route to appropriate command handler
    if hasattr(args, "version") and args.version:
        # The version action should have already exited, but as a fallback:
        sys.exit(0)

    if PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is not None and PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is None:
        raise ValueError(
            "If PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is set, "
            "PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE must also be set."
        )
    elif PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is None and PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is not None:
        raise ValueError(
            "If PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is set, "
            "PLAMO_TRANSLATE_CLI_REPETITION_PENALTY must also be set."
        )

    if args.input is None and (args.interactive or sys.stdin.isatty()):
        args.interactive = True
        logging.basicConfig(level=logging.ERROR)
        os.environ["PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL"] = "CRITICAL"
    else:
        args.interactive = False
        logging.basicConfig(level=logging.CRITICAL)
        os.environ["PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL"] = "CRITICAL"

    args.stream = not args.no_stream
    if args.backend_type == "mlx":
        if args.precision == "4bit":
            model_name = "mlx-community/plamo-2-translate"
        elif args.precision == "8bit":
            model_name = "mlx-community/plamo-2-translate-8bit"
        elif args.precision == "bf16":
            model_name = "mlx-community/plamo-2-translate-bf16"

    update_config(backend_type=args.backend_type, model_name=model_name)

    if "PLAMO_TRANSLATE_CLI_MODEL_NAME" not in os.environ:
        os.environ["PLAMO_TRANSLATE_CLI_MODEL_NAME"] = model_name

    if args.command == "server":
        logging.basicConfig(level=logging.INFO)
        if check_server_running():
            print("MCP server is already running. Skipping server start.")
            sys.exit(0)
        while not check_server_running():
            try:
                logger.info("Starting server...")
                start_mcp_server(args.backend_type, "INFO", True)
                logger.info("The server is running (Ctrl+C to stop)")
            except KeyboardInterrupt:
                logger.error("\nCtrl+C received. Exiting.")
                break
            except EOFError:
                logger.error("\nCtrl+D received. Exiting.")
                break
            except Exception as e:
                logger.error(f"An error occurred: {str(e)}: {e}. Restarting server...")

    elif args.command == "show-claude-config":
        cmd = subprocess.run(["which", "npx"], check=True, capture_output=True, text=True)
        if cmd.returncode != 0:
            logger.error("npx command not found. Please install Node.js and npx.")
            exit(1)
        npx_path = cmd.stdout.strip()
        config = update_config()
        print(
            json.dumps(
                {
                    "mcpServers": {
                        "plamo-translate": {
                            "command": npx_path,
                            "args": [
                                "-y",
                                "mcp-remote",
                                f"http://localhost:{config['port']}/mcp",
                                "--allow-http",
                                "--transport",
                                "http-only",
                            ],
                            "env": {"PATH": os.environ["PATH"]},
                        }
                    }
                },
                indent=2,
                ensure_ascii=False,
            )
        )
    else:
        run_translate(args)


if __name__ == "__main__":
    main()


================================================
FILE: src/plamo_translate/servers/__init__.py
================================================


================================================
FILE: src/plamo_translate/servers/mlx/__init__.py
================================================


================================================
FILE: src/plamo_translate/servers/mlx/server.py
================================================
import asyncio
import contextlib
import importlib.resources
import logging
import os
import subprocess
import sys
from typing import Callable, Tuple

import mlx.core as mx
import mlx.nn as nn
from mcp.server.fastmcp import Context, FastMCP
from mlx_lm.generate import stream_generate
from mlx_lm.sample_utils import make_logits_processors, make_sampler
from mlx_lm.tokenizer_utils import TokenizerWrapper
from mlx_lm.utils import load

from plamo_translate.servers.utils import (
    INSTRUCTION,
    PLAMO_MAX_TOKENS,
    PLAMO_TRANSLATE_CLI_MODEL_NAME,
    PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE,
    PLAMO_TRANSLATE_CLI_REPETITION_PENALTY,
    PLAMO_TRANSLATE_CLI_TEMP,
    PLAMO_TRANSLATE_CLI_TOP_K,
    PLAMO_TRANSLATE_CLI_TOP_P,
    TranslateRequest,
    construct_llm_input,
    find_free_port,
    update_config,
)
from plamo_translate.servers.warnings import (
    build_optional_gpu_dependency_warning_options,
    suppress_optional_gpu_dependency_warnings,
)

logger = logging.getLogger(__name__)


class PLaMoTranslateServer(FastMCP):
    """PLaMo Translate Server using FastMCP."""

    def __init__(self, log_level: str, show_progress: bool = False) -> None:
        super().__init__(
            name="plamo-translate",
            instructions=INSTRUCTION,
            log_level=log_level,
            stateless_http=False,
            host="127.0.0.1",
            port=find_free_port(),
            lifespan=self.lifespan,
        )

        # Set environment variables to switch if it shows progress bars for loading models or not
        self.show_progress = show_progress

        model, tokenizer, sampler, logits_processors = self.load_model()
        self.model = model
        self.tokenizer = tokenizer
        self.sampler = sampler
        self.logits_processors = logits_processors

        self.add_tool(
            fn=self.translate,
            name="plamo-translate",
            description=INSTRUCTION,
        )

    @contextlib.asynccontextmanager
    async def lifespan(self, server: FastMCP):
        try:
            async with contextlib.AsyncExitStack() as stack:
                # Pre-processings before a request is processed
                yield
                # Post-processings after a request is processed
        except Exception as e:
            logger.error(f"Error during lifespan: {str(e)} {e}")
            await stack.aclose()

    def load_model(self) -> Tuple[nn.Module, TokenizerWrapper, Callable[..., mx.array], list]:
        """Load the MLX model if not already loaded."""
        try:
            ref = importlib.resources.files("plamo_translate.assets").joinpath("chat_template.jinja2")
            chat_template = ref.read_text(encoding="utf-8")
        except FileNotFoundError:
            raise RuntimeError("chat_template.jinja2 not found in assets directory")

        model_name = os.getenv("PLAMO_TRANSLATE_CLI_MODEL_NAME", PLAMO_TRANSLATE_CLI_MODEL_NAME)
        update_config(model_name=model_name)

        # Reload mlx_lm.utils here to refleect the environment variables for progress bars
        if self.show_progress:
            envs = os.environ.copy()
            envs["HF_HUB_DISABLE_PROGRESS_BARS"] = "0"
            subprocess.run(
                [
                    sys.executable,
                    *build_optional_gpu_dependency_warning_options(),
                    "-m",
                    "mlx_lm",
                    "generate",
                    "--model",
                    model_name,
                    "--max-tokens",
                    "1",
                    "--trust-remote-code",
                ],
                env=envs,
                stdout=subprocess.DEVNULL,
            )

        with suppress_optional_gpu_dependency_warnings():
            model, tokenizer = load(
                model_name,
                model_config={"trust_remote_code": True},
                tokenizer_config={
                    "trust_remote_code": True,
                    "chat_template": chat_template,
                },
            )
        tokenizer.add_eos_token("<|plamo:op|>")

        sampler = make_sampler(
            temp=float(PLAMO_TRANSLATE_CLI_TEMP),
            top_p=float(PLAMO_TRANSLATE_CLI_TOP_P),
            top_k=int(PLAMO_TRANSLATE_CLI_TOP_K),
        )

        logits_processors = make_logits_processors(
            repetition_penalty=(
                float(PLAMO_TRANSLATE_CLI_REPETITION_PENALTY)
                if PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is not None
                else None
            ),
            repetition_context_size=(
                int(PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE)
                if PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is not None
                else None
            ),
        )

        return model, tokenizer, sampler, logits_processors

    async def translate(self, request: TranslateRequest, stream: bool, context: Context) -> str:
        """Run the translation tool"""
        logger.info(f"Received translation request: {context.request_id}")
        try:
            messages = construct_llm_input(request)
            prompt = self.tokenizer.apply_chat_template(messages, add_generation_prompt=False)  # type:ignore[call-arg]

            # Generate translation
            translation = ""
            segments_count = 0

            for segment in stream_generate(
                model=self.model,
                tokenizer=self.tokenizer,
                prompt=prompt,
                sampler=self.sampler,
                logits_processors=self.logits_processors,
                max_tokens=int(PLAMO_MAX_TOKENS),
            ):
                translation += segment.text
                segments_count += 1

                if stream:
                    # Send progress notification with the new segment
                    await context.report_progress(
                        progress=segments_count,
                        total=None,  # We don't know the total in advance
                        message=segment.text,  # Send the segment as the message
                    )

                    # Small delay to ensure progress is sent
                    await asyncio.sleep(0)

            if not stream:
                return translation
            else:
                return ""

        except Exception as e:
            logger.error(f"Translation error: {str(e)}")
            raise e


================================================
FILE: src/plamo_translate/servers/mock/__init__.py
================================================
from .server import PLaMoTranslateServer

__all__ = ["PLaMoTranslateServer"]


================================================
FILE: src/plamo_translate/servers/mock/server.py
================================================
import asyncio
import logging
from typing import Dict

from mcp.server.fastmcp import Context, FastMCP

from plamo_translate.servers.utils import INSTRUCTION, TranslateRequest, find_free_port, update_config

logger = logging.getLogger(__name__)

MOCK_TRANSLATIONS: Dict[str, str] = {
    "Proud, but humble": "誇り高いが、謙虚です。",
    "Boldly do what no one has done before": "誰もしたことがないことを大胆にやりなさい。",
}


def _extract_input_text(request: TranslateRequest) -> str:
    for message in reversed(request.messages):
        if not message.content.startswith("input"):
            continue

        _, _, input_text = message.content.partition("\n")
        return input_text.strip().lstrip(">").strip()

    return request.messages[-1].content.strip().lstrip(">").strip()


class PLaMoTranslateServer(FastMCP):
    """Lightweight MCP server used by the test suite."""

    def __init__(self, log_level: str, show_progress: bool = False) -> None:
        super().__init__(
            name="plamo-translate",
            instructions=INSTRUCTION,
            log_level=log_level,
            stateless_http=False,
            host="127.0.0.1",
            port=find_free_port(),
        )
        update_config(model_name="mock")
        self.show_progress = show_progress
        self.add_tool(
            fn=self.translate,
            name="plamo-translate",
            description=INSTRUCTION,
        )

    async def translate(self, request: TranslateRequest, stream: bool, context: Context) -> str:
        input_text = _extract_input_text(request)
        translation = next(
            (candidate for source, candidate in MOCK_TRANSLATIONS.items() if source in input_text),
            f"[mock translation] {input_text}",
        )

        if not stream:
            return translation

        for index, chunk in enumerate([translation], start=1):
            await context.report_progress(progress=index, total=1, message=chunk)
            await asyncio.sleep(0)

        return ""


================================================
FILE: src/plamo_translate/servers/utils.py
================================================
import asyncio
import json
import logging
import os
import socket
import textwrap
from contextlib import closing
from tempfile import NamedTemporaryFile
from pathlib import Path
from typing import Any, Dict, List, Optional

from mcp import ClientSession
from mcp.client.streamable_http import streamablehttp_client
from pydantic import BaseModel, Field

logger = logging.getLogger(__name__)

SUPPORTED_LANGUAGES = [
    "Japanese",
    "Japanese(easy)",
    "English",
    "Chinese",
    "Taiwanese",
    "Korean",
    "Arabic",
    "Italian",
    "Indonesian",
    "Dutch",
    "Spanish",
    "Thai",
    "German",
    "French",
    "Vietnamese",
    "Russian",
    "English|Japanese",
]

PLAMO_TRANSLATE_CLI_MODEL_NAME = os.environ.get("PLAMO_TRANSLATE_CLI_MODEL_NAME", "mlx-community/plamo-2-translate")
PLAMO_TRANSLATE_CLI_SERVER_START_PORT = int(os.environ.get("PLAMO_TRANSLATE_CLI_SERVER_START_PORT", 30000))
PLAMO_TRANSLATE_CLI_SERVER_END_PORT = int(os.environ.get("PLAMO_TRANSLATE_CLI_SERVER_END_PORT", 30099))
PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL = os.environ.get("PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL", "INFO")
PLAMO_TRANSLATE_CLI_TEMP = os.environ.get("PLAMO_TRANSLATE_CLI_TEMP", "0.0")
PLAMO_TRANSLATE_CLI_TOP_P = os.environ.get("PLAMO_TRANSLATE_CLI_TOP_P", "0.98")
PLAMO_TRANSLATE_CLI_TOP_K = os.environ.get("PLAMO_TRANSLATE_CLI_TOP_K", "0")
PLAMO_TRANSLATE_CLI_REPETITION_PENALTY = os.environ.get("PLAMO_TRANSLATE_CLI_REPETITION_PENALTY", None)
PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE = os.environ.get("PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE", None)
PLAMO_MAX_TOKENS = os.environ.get("PLAMO_MAX_TOKENS", "32768")
SUPPORTED_LANGUAGES_LIST_STR = "\n-".join(SUPPORTED_LANGUAGES)
INSTRUCTION = textwrap.dedent(
    f"""Use the `plamo-translate` tool to translate text between multiple languages.
    Supported languages include:

    - {SUPPORTED_LANGUAGES_LIST_STR}

    Use the tool by specifying the text and the source and target languages.
    """
)


async def verify_mcp_server_ready(port: int) -> List[str]:
    """Verify if the MCP server is ready to accept connections."""
    try:
        url = f"http://127.0.0.1:{port}/mcp"
        async with streamablehttp_client(url) as (
            read_stream,
            write_stream,
            get_session_id_callback,
        ):
            async with ClientSession(
                read_stream=read_stream,
                write_stream=write_stream,
            ) as session:
                await session.initialize()
                tools = await session.list_tools()
                return [tool.name for tool in tools.tools]
    except Exception:
        return []


def find_free_port(
    start_port: int = PLAMO_TRANSLATE_CLI_SERVER_START_PORT,
    end_port: int = PLAMO_TRANSLATE_CLI_SERVER_END_PORT,
) -> int:
    """
    Find a port in the range [start_port, end_port].
    """
    config = update_config()

    # Phase 1: Check for existing MCP server with 'plamo-translate' tool
    if "port" in config:
        port = config["port"]

        try:
            tools = asyncio.run(verify_mcp_server_ready(port))
        except Exception as e:
            logger.info(f"Failed to connect to MCP server on port {port}: {e}")
            tools = []

        if "plamo-translate" in tools:
            logger.info(f"Found existing MCP server with 'plamo-translate' tool on port {port}.")
            return port

        previous_port = port
    else:
        previous_port = None

    # Phase 2: If no suitable MCP server found, find any free port in the range
    for port in range(start_port, end_port + 1):
        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
            sock.settimeout(0.1)  # Short timeout for connection attempt
            try:
                result = sock.connect_ex(("127.0.0.1", port))
                if result != 0:
                    # If connection failed (errno != 0), port is likely free
                    logger.info(f"Found free port: {port}")
                    if previous_port is not None and previous_port != port:
                        logger.info(f"Updating MCP server port from {previous_port} to {port}.")
                    update_config(port=port)
                    return port
            except Exception:
                # This can happen if e.g. sock.connect_ex itself has issues, or port is restricted
                pass  # Try next port

    raise RuntimeError(
        "Could not find a suitable MCP server with 'plamo-translate' tool "
        f"or a free port in the range {start_port}-{end_port}."
    )


def update_config(**kwargs) -> Dict[str, Any]:
    tmp_dir = os.environ.get("TMPDIR", None)
    if tmp_dir is None:
        raise ValueError("TMPDIR environment variable is not set. Please set it to a valid directory.")
    tmp_config_path = Path(tmp_dir) / "plamo-translate-config.json"

    if not tmp_config_path.exists():
        if not kwargs:
            return {}
        config = kwargs
        _write_config(tmp_config_path, config, indent=4)
        logger.info(
            f"Created new temporary config file at {tmp_config_path} with initial values: "
            f"{json.dumps(config, indent=4, ensure_ascii=False)}"
        )
        return config

    with tmp_config_path.open("r") as f:
        try:
            config = json.load(f)
        except json.JSONDecodeError:
            logger.warning(f"Config file {tmp_config_path} is corrupted. Recreating it.")
            config = {}

    if not kwargs:
        return config

    for key, value in kwargs.items():
        config[key] = value

    _write_config(tmp_config_path, config)

    return config


def _write_config(path: Path, config: Dict[str, Any], *, indent: int | None = None) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with NamedTemporaryFile("w", dir=path.parent, delete=False, encoding="utf-8") as tmp_file:
        json.dump(config, tmp_file, indent=indent)
        tmp_file.flush()
        os.fsync(tmp_file.fileno())
        tmp_path = Path(tmp_file.name)
    tmp_path.replace(path)


class Message(BaseModel):
    """Model for messages in translation request"""

    role: str = Field(..., description="Role of the message sender (e.g., 'user', 'assistant')")
    content: str = Field(..., description="Content of the message")


class TranslateRequest(BaseModel):
    """Request model for translation"""

    messages: List[Message] = Field(..., description="List of messages for translation")
    source_language: Optional[str] = Field(
        "",
        description=(
            "Source language that is one of the followings: "
            f"{', '.join(SUPPORTED_LANGUAGES)}. "
            "Note that 'English|Japanese' is used to detect the input language automatically."
        ),
    )
    target_language: Optional[str] = Field(
        "",
        description=(
            "Target language that is one of the followings: "
            f"{', '.join(SUPPORTED_LANGUAGES)}. "
            "This can be empty when the source language is 'English|Japanese'."
        ),
    )


def construct_llm_input(request: TranslateRequest) -> List[Message]:
    """Construct the input for the LLM from messages and languages"""

    # If it has already been constructed messages with lang=* part, return it as is
    if request.source_language == "" and request.target_language == "":
        return request.messages

    if request.source_language != "":
        source_text = request.messages[-1].content.strip()
        request.messages[-1].content = f"input lang={request.source_language}\n" + source_text
    if request.target_language != "":
        request.messages.append(Message(role="user", content=f"output lang={request.target_language}\n"))
    else:
        request.messages.append(Message(role="user", content="output\n"))

    return request.messages


================================================
FILE: src/plamo_translate/servers/warnings.py
================================================
import contextlib
import re
import warnings
from collections.abc import Iterator

OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES = (
    "mamba_ssm could not be imported",
    "causal_conv1d could not be imported",
)


@contextlib.contextmanager
def suppress_optional_gpu_dependency_warnings() -> Iterator[None]:
    """Hide known optional dependency warnings emitted by remote model code."""
    with warnings.catch_warnings():
        for message in OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES:
            warnings.filterwarnings(
                action="ignore",
                message=rf"^{re.escape(message)}$",
                category=UserWarning,
            )
        yield


def build_optional_gpu_dependency_warning_options() -> list[str]:
    """Build `python -W` options that suppress known optional dependency warnings."""
    options: list[str] = []
    for message in OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES:
        options.extend(["-W", f"ignore:{message}:UserWarning"])
    return options


================================================
FILE: tests/test_cli.py
================================================
import http.server
import multiprocessing
import os
import socket
import socketserver
import subprocess
import time

import pytest

from plamo_translate.main import check_server_running
from plamo_translate.servers.utils import PLAMO_TRANSLATE_CLI_SERVER_START_PORT, update_config

CLI_TIMEOUT_SECONDS = int(os.environ.get("PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS", "20"))
SERVER_STARTUP_TIMEOUT_SECONDS = int(os.environ.get("PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS", "10"))


@pytest.fixture(autouse=True)
def isolated_test_environment(monkeypatch, tmp_path):
    monkeypatch.setenv(
        "PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER",
        os.environ.get("PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER", "1"),
    )
    monkeypatch.setenv("TMPDIR", str(tmp_path))


def wait_for_server_ready(timeout: int = SERVER_STARTUP_TIMEOUT_SECONDS) -> None:
    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
        if check_server_running():
            return
        time.sleep(0.1)

    raise AssertionError("Timed out waiting for the MCP server to become ready.")


def wait_for_port_in_use(port: int, timeout: int = SERVER_STARTUP_TIMEOUT_SECONDS) -> None:
    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
            if sock.connect_ex(("127.0.0.1", port)) == 0:
                return
        time.sleep(0.1)

    raise AssertionError(f"Timed out waiting for port {port} to start accepting connections.")


def test_update_config_without_kwargs_is_read_only(tmp_path):
    config_path = tmp_path / "plamo-translate-config.json"

    assert update_config() == {}
    assert not config_path.exists(), "Read-only access should not create the config file"

    initial_config = {"port": PLAMO_TRANSLATE_CLI_SERVER_START_PORT}
    update_config(**initial_config)
    initial_contents = config_path.read_text()

    assert update_config() == initial_config
    assert config_path.read_text() == initial_contents, "Read-only access should not rewrite the config file"


def stop_subprocess(process: subprocess.Popen[str] | None) -> None:
    if process is None:
        return

    process.terminate()
    try:
        process.wait(timeout=5)
    except subprocess.TimeoutExpired:
        process.kill()
        process.wait(timeout=5)


def stop_multiprocess(process: multiprocessing.Process | None) -> None:
    if process is None:
        return

    process.terminate()
    process.join(timeout=5)
    if process.is_alive():
        process.kill()
        process.join(timeout=5)


def test_plamo_translate_without_server():
    text_to_translate = "Proud, but humble"
    command = ["plamo-translate", "--from", "English", "--to", "Japanese", "--input", text_to_translate]
    result = subprocess.run(command, capture_output=True, text=True, timeout=CLI_TIMEOUT_SECONDS)
    assert result.returncode == 0
    assert "誇り高" in result.stdout and "謙虚" in result.stdout


def test_plamo_translate_server_simple_use():
    first_process = None
    try:
        command = ["plamo-translate", "server"]
        first_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        wait_for_server_ready()

        config = update_config()
        print(f"Server started with config: {config}")
        assert "port" in config, "Server configuration should include a port"
        port = config["port"]
        assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT, f"Expected server port to be 8000, got {port}"

        text_to_translate = "Proud, but humble"
        result = subprocess.run(
            ["plamo-translate", "--input", text_to_translate, "--from", "English", "--to", "Japanese"],
            capture_output=True,
            text=True,
            timeout=CLI_TIMEOUT_SECONDS,
        )
        assert "誇り高い" in result.stdout and "謙虚" in result.stdout

        result = subprocess.run(
            ["plamo-translate", "--from", "English", "--to", "Japanese"],
            input=text_to_translate,
            capture_output=True,
            text=True,
            timeout=CLI_TIMEOUT_SECONDS,
        )
        assert "誇り高い" in result.stdout and "謙虚" in result.stdout
    finally:
        stop_subprocess(first_process)


def test_plamo_translate_server_already_running():
    first_process = None
    second_process = None
    try:
        command = ["plamo-translate", "server"]
        first_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        print("Starting first plamo-translate server process...")
        wait_for_server_ready()
        print("First server process started successfully.")

        # If the server is already running, the further call of `plamo-translate server` should not start a new server.
        second_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        stdout, _ = second_process.communicate(timeout=CLI_TIMEOUT_SECONDS)
        print(stdout.strip())
        assert "MCP server is already running" in stdout
        config = update_config()
        print(f"Server started with config: {config}")
        assert "port" in config, "Server configuration should include a port"
        port = config["port"]
        assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT, f"Expected server port to be 8000, got {port}"
    finally:
        stop_subprocess(first_process)
        stop_subprocess(second_process)


def start_http_server():
    port = PLAMO_TRANSLATE_CLI_SERVER_START_PORT
    handler = http.server.SimpleHTTPRequestHandler
    with socketserver.TCPServer(("127.0.0.1", port), handler) as httpd:
        httpd.serve_forever()


def test_plamo_translate_server_find_new_port():
    http_server_process = None
    mcp_server_process = None
    try:
        http_server_process = multiprocessing.Process(target=start_http_server, daemon=True)
        http_server_process.start()
        print(f"HTTP server started on port {PLAMO_TRANSLATE_CLI_SERVER_START_PORT}")
        wait_for_port_in_use(PLAMO_TRANSLATE_CLI_SERVER_START_PORT)

        # The default port is used by the HTTP server, so the MCP server should use a different port
        command = ["plamo-translate", "server"]
        mcp_server_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        print("Starting plamo-translate server...")
        wait_for_server_ready()
        stop_subprocess(mcp_server_process)
        mcp_server_process = None

        config = update_config()
        print(f"Server started with config: {config}")
        assert "port" in config, "Server configuration should include a port"
        port = config["port"]
        assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT + 1, (
            f"Expected server port to be {PLAMO_TRANSLATE_CLI_SERVER_START_PORT + 1}, got {port}"
        )
    finally:
        stop_multiprocess(http_server_process)
        stop_subprocess(mcp_server_process)


def test_plamo_translate_server_interactive():
    mcp_server_process = None
    client_process = None
    try:
        command = ["plamo-translate", "server"]
        mcp_server_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        wait_for_server_ready()
        config = update_config()
        print(f"Server started with config: {config}")
        assert "port" in config, "Server configuration should include a port"
        port = config["port"]
        assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT, (
            f"Expected server port to be {PLAMO_TRANSLATE_CLI_SERVER_START_PORT}, got {port}"
        )

        client_command = ["plamo-translate", "-i", "--from", "English", "--to", "Japanese"]
        client_process = subprocess.Popen(
            client_command,
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        )

        all_inputs = "\n".join(["Proud, but humble", "Boldly do what no one has done before"]) + "\n"

        stdout, stderr = client_process.communicate(input=all_inputs, timeout=CLI_TIMEOUT_SECONDS)
        assert "誇り高" in stdout and "謙虚" in stdout
        assert "大胆に" in stdout
    finally:
        stop_subprocess(mcp_server_process)
        stop_subprocess(client_process)


================================================
FILE: tests/test_cli_integration.py
================================================
import os
import subprocess
import time

import pytest

from plamo_translate.main import check_server_running
from plamo_translate.servers.utils import PLAMO_TRANSLATE_CLI_SERVER_START_PORT, update_config

CLI_TIMEOUT_SECONDS = int(os.environ.get("PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS", "900"))
SERVER_STARTUP_TIMEOUT_SECONDS = int(os.environ.get("PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS", "900"))


@pytest.fixture(autouse=True)
def integration_test_environment(monkeypatch, tmp_path):
    monkeypatch.setenv(
        "PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER",
        os.environ.get("PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER", "0"),
    )
    monkeypatch.setenv("TMPDIR", str(tmp_path))


def wait_for_server_ready(timeout: int = SERVER_STARTUP_TIMEOUT_SECONDS) -> None:
    deadline = time.monotonic() + timeout
    while time.monotonic() < deadline:
        if check_server_running():
            return
        time.sleep(0.5)

    raise AssertionError("Timed out waiting for the MCP server to become ready.")


def stop_subprocess(process: subprocess.Popen[str] | None) -> None:
    if process is None:
        return

    process.terminate()
    try:
        process.wait(timeout=5)
    except subprocess.TimeoutExpired:
        process.kill()
        process.wait(timeout=5)


def test_plamo_translate_server_roundtrip_with_real_model():
    server_process = None
    try:
        server_process = subprocess.Popen(
            ["plamo-translate", "server"],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        )
        wait_for_server_ready()

        config = update_config()
        assert config.get("port") == PLAMO_TRANSLATE_CLI_SERVER_START_PORT

        text_to_translate = "Proud, but humble"
        result = subprocess.run(
            ["plamo-translate", "--input", text_to_translate, "--from", "English", "--to", "Japanese"],
            capture_output=True,
            text=True,
            timeout=CLI_TIMEOUT_SECONDS,
        )
        assert result.returncode == 0
        assert "誇り高" in result.stdout and "謙虚" in result.stdout

        result = subprocess.run(
            ["plamo-translate", "--from", "English", "--to", "Japanese"],
            input=text_to_translate,
            capture_output=True,
            text=True,
            timeout=CLI_TIMEOUT_SECONDS,
        )
        assert result.returncode == 0
        assert "誇り高" in result.stdout and "謙虚" in result.stdout
    finally:
        stop_subprocess(server_process)


================================================
FILE: tests/test_warning_filters.py
================================================
import warnings

from plamo_translate.servers.warnings import (
    OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES,
    build_optional_gpu_dependency_warning_options,
    suppress_optional_gpu_dependency_warnings,
)


def test_build_optional_gpu_dependency_warning_options():
    assert build_optional_gpu_dependency_warning_options() == [
        "-W",
        "ignore:mamba_ssm could not be imported:UserWarning",
        "-W",
        "ignore:causal_conv1d could not be imported:UserWarning",
    ]


def test_suppress_optional_gpu_dependency_warnings_only_hides_known_messages():
    with warnings.catch_warnings(record=True) as captured:
        warnings.simplefilter("always")
        with suppress_optional_gpu_dependency_warnings():
            for message in OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES:
                warnings.warn(message, UserWarning, stacklevel=1)
            warnings.warn("unexpected warning", UserWarning, stacklevel=1)

    assert [str(item.message) for item in captured] == ["unexpected warning"]