Repository: pfnet/plamo-translate-cli Branch: main Commit: 840de559ee22 Files: 22 Total size: 73.5 KB Directory structure: gitextract_mzvrmcnd/ ├── .github/ │ └── workflows/ │ └── ci.yml ├── .gitignore ├── LICENSE ├── README.md ├── pyproject.toml ├── scripts/ │ ├── convert_dataset.py │ └── deploy.sh ├── src/ │ └── plamo_translate/ │ ├── __init__.py │ ├── assets/ │ │ └── chat_template.jinja2 │ ├── clients/ │ │ ├── __init__.py │ │ └── translate.py │ ├── main.py │ └── servers/ │ ├── __init__.py │ ├── mlx/ │ │ ├── __init__.py │ │ └── server.py │ ├── mock/ │ │ ├── __init__.py │ │ └── server.py │ ├── utils.py │ └── warnings.py └── tests/ ├── test_cli.py ├── test_cli_integration.py └── test_warning_filters.py ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: pull_request: jobs: mock-test: runs-on: ubuntu-latest timeout-minutes: 15 strategy: matrix: python-version: ['3.10', '3.11', '3.12', '3.13', '3.14'] env: PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER: '1' PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS: '20' PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS: '10' UV_CACHE_DIR: .cache/uv steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Cache uv downloads uses: actions/cache@v4 with: path: ${{ env.UV_CACHE_DIR }} key: ${{ runner.os }}-uv-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }} restore-keys: | ${{ runner.os }}-uv-py${{ matrix.python-version }}- ${{ runner.os }}-uv- - name: Install uv run: curl -LsSf https://astral.sh/uv/install.sh | sh - name: Add uv to PATH run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Sync dependencies run: uv sync - name: Run tests run: uv run pytest -s tests/test_cli.py mlx-integration-test: if: github.event_name == 'push' && github.ref == 'refs/heads/main' runs-on: macos-latest timeout-minutes: 45 env: HF_HOME: .cache/hf HUGGINGFACE_HUB_CACHE: .cache/hf/hub PLAMO_TRANSLATE_CLI_MODEL_NAME: mlx-community/plamo-2-translate PLAMO_TRANSLATE_CLI_MODEL_CACHE_KEY: plamo-2-translate PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER: '0' PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS: '900' PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS: '900' UV_CACHE_DIR: .cache/uv steps: - uses: actions/checkout@v4 - name: Set up Python 3.13 uses: actions/setup-python@v5 with: python-version: '3.13' - name: Cache uv downloads uses: actions/cache@v4 with: path: ${{ env.UV_CACHE_DIR }} key: ${{ runner.os }}-uv-py3.13-${{ hashFiles('uv.lock') }} restore-keys: | ${{ runner.os }}-uv-py3.13- ${{ runner.os }}-uv- - name: Cache Hugging Face models uses: actions/cache@v4 with: path: ${{ env.HUGGINGFACE_HUB_CACHE }} key: ${{ runner.os }}-hf-${{ env.PLAMO_TRANSLATE_CLI_MODEL_CACHE_KEY }}-v1 restore-keys: | ${{ runner.os }}-hf-${{ env.PLAMO_TRANSLATE_CLI_MODEL_CACHE_KEY }}- ${{ runner.os }}-hf- - name: Install uv run: curl -LsSf https://astral.sh/uv/install.sh | sh - name: Add uv to PATH run: echo "$HOME/.cargo/bin" >> $GITHUB_PATH - name: Sync dependencies run: uv sync - name: Run integration tests run: uv run pytest -s tests/test_cli_integration.py ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # UV # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. #uv.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. #pdm.lock # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it # in version control. # https://pdm.fming.dev/latest/usage/project/#working-with-version-control .pdm.toml .pdm-python .pdm-build/ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ # Ruff stuff: .ruff_cache/ # PyPI configuration file .pypirc ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.md ================================================ # plamo-translate-cli A command-line interface for translation using the plamo-2-translate model with local execution. ## Features - Translate text between 16+ languages including Japanese, English, Chinese, Korean, and more - Simple command-line interface for easy integration into scripts and workflows - Supports various server backends (MLX, with planned support for Ollama and vLLM) - Currently, optimized for macOS with Apple Silicon using MLX framework ## Installation ### For macOS `plamo-translate` currently installs on Python 3.10 through 3.14 on macOS. No additional workaround is required for `sentencepiece` on Python 3.13 or 3.14 with current upstream releases. ```sh pip install plamo-translate ``` #### [`uv tool`](https://docs.astral.sh/uv/concepts/tools/) If you use [`uv`](https://github.com/astral-sh/uv) as a package manager rather than `pip`, you can install `plamo-translate` into an isolated environment: ```sh uv tool install -p 3.14 plamo-translate ``` ## Development ```sh uv sync source .venv/bin/activate ``` ## Requirements - Python 3.10 through 3.14 - Common dependencies: - mcp[cli] - numba - On macOS: - mlx-lm ## Usage ### Basic usage You can specify the input and output language by giving `--from` and `--to` options. If you don't specify them, the input/output language will be automatically selected from English or Japanese. #### Interactive mode ```sh $ plamo-translate Loading models...done! Interactive mode enabled. Type your input below (Ctrl+D to exit). > こんにちは、お元気ですか? Hello, how are you? > 「お腹減った〜何食べたい?」「私はうなぎ!」 "I'm hungry! What do you want to eat?" "I want eel!" > You translate ambiguous expression in Japanese into English very well. あなたは日本語の曖昧な表現を英語に翻訳するのがとても上手です。 ``` #### Pipe mode ```sh $ cat file.txt | plamo-translate The virtual worlds of the internet have experienced remarkable technological advancement. Meanwhile, the real world still contains numerous areas where technology has yet to make significant inroads, with many inefficient manual tasks and dangerous work still requiring human intervention. This situation stems from the fact that conventional technology has struggled to adapt to the dynamic changes and diverse conditions of the real world. PFN's core strengths lie in machine learning and deep learning technologies, which demonstrate exceptional flexibility in handling uncertainty and have the potential to create significant impact in the real world. For example, by applying deep learning technologies to robots that excel at repetitive tasks, we can enable them to make more human-like flexible judgments and perform complex tasks. To create meaningful impact in the real world, it's essential to push the boundaries of cutting-edge technology and research application domains where technological innovation can create tangible change. For these purposes, PFN assembles a team of exceptionally talented professionals with diverse expertise. ``` #### Server mode First, launch the server: ```sh $ plamo-translate server ``` Then, use the client mode: ```sh $ plamo-translate --input '家計は火の車だ' Our household is in financial trouble. ``` You can also use the interactive mode with the server: ```sh $ plamo-translate Loading models...done! Interactive mode enabled. Type your input below (Ctrl+D to exit). > 家計は火の車だ Our household is in financial trouble. ``` It can skip the loading time of the model, so it is useful when you want to use this tool frequently. ### Using from MCP Client The `plamo-translate server` command starts an MCP (Model Context Protocol) server. This allows `plamo-translate` to be used as a tool in other applications that support MCP, such as Claude Desktop. Here, we introduce how to use `plamo-translate` with Claude Desktop, which is a popular MCP client. 1. Start the `plamo-translate` server: ```sh plamo-translate server ``` 2. In a new terminal, run the following command to display the MCP configuration for Claude Desktop: ```sh plamo-translate show-claude-config ``` and you will see the configuration in JSON format as follows: ```json { "mcpServers": { "plamo-translate": { "command": "/Users/shunta/.linuxbrew/bin/npx", "args": [ "-y", "mcp-remote", "http://localhost:8000/mcp", "--allow-http", "--transport", "http-only" ], "env": { "PATH": "[THE SAME STRING AS YOUR CURRENT PATH ENVIRONMENT VARIABLE]", } } } } ``` 3. Copy the outputted configuration. 4. Paste this configuration into your Claude Desktop's MCP configuration file (on macOS, this is typically located at `~/Library/Application Support/Claude/claude_desktop_config.json`). Once configured, you can use `plamo-translate` directly from Claude Desktop. #### Select precision of the model weight You can specify the precision of the model weight by giving a `--precision` option. ```sh $ plamo-translate server --precision 8bit ``` ## Supported Languages - Japanese - Japanese(easy) - English ### Experimentally Supported Languages - Chinese - Taiwanese - Korean - Arabic - Italian - Indonesian - Dutch - Spanish - Thai - German - French - Vietnamese - Russian ## Server Backends - mlx: Optimized for macOS with Apple Silicon (default on macOS) ## Options - --input TEXT Input text to translate - --from TEXT Input language for translation (default: English) - --to TEXT Output language for translation (default: Japanese) - --precision Model weight precision to use. You can select from: [4bit, 8bit, bf16] (default: 4bit) ## Configuration You can configure the following parameters using environment variables: - `PLAMO_TRANSLATE_CLI_SERVER_START_PORT`: Specifies the starting port number for the server. - `PLAMO_TRANSLATE_CLI_SERVER_END_PORT`: Specifies the ending port number for the server. - `PLAMO_TRANSLATE_CLI_TEMP`: Sets the temperature for text generation. - `PLAMO_TRANSLATE_CLI_TOP_P`: Sets the top-p (nucleus) sampling probability. - `PLAMO_TRANSLATE_CLI_TOP_K`: Sets the top-k sampling number. - `PLAMO_TRANSLATE_CLI_REPETITION_PENALTY`: Sets the repetition penalty. - `PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE`: Sets the context size for repetition penalty. ## Deploy ```sh bash scripts/deploy.sh ``` ================================================ FILE: pyproject.toml ================================================ [project] name = "plamo-translate" version = "1.0.5" description = "A command-line interface for translation using the plamo-2-translate model with local execution." readme = "README.md" authors = [ { name = "Shunta Saito", email = "shunta@preferred.jp" }, ] requires-python = ">=3.10" keywords = [ "machine translation", "transformer", "nlp", "natural language processing", "deep learning", "mlx", "mlx-lm", "sentencepiece", "plamo", "plamo-translate", "plamo-translate-cli", ] license = "Apache-2.0" classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Operating System :: MacOS", "Topic :: Utilities", "Environment :: MacOS X", "Environment :: Console", ] dependencies = [ "mcp[cli]>=1.9.2", "numba>=0.60.0", "mlx-lm>=0.26.3 ; sys_platform == 'darwin'", ] [tool.uv] override-dependencies = ["transformers<5"] [build-system] requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build] exclude = ["tests", "docs", "examples", "scripts", "notebooks", "data", "tmp"] [tool.hatch.build.targets.wheel] compression = "stored" [tool.hatch.build.targets.sdist] compress-level = 0 [project.scripts] plamo-translate = "plamo_translate.main:main" [tool.ruff] line-length = 120 [dependency-groups] dev = [ "build>=1.2.2.post1", "ipdb>=0.13.13", "pytest>=8.4.0", "ruff>=0.11.12", "twine>=6.1.0", "wheel>=0.45.1", ] ================================================ FILE: scripts/convert_dataset.py ================================================ import argparse import json from pathlib import Path from datasets import Dataset from jinja2 import Template from mlx_lm.tokenizer_utils import load_tokenizer from mlx_lm.tuner import datasets if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--tokenizer-path", type=str) parser.add_argument("--dataset-jsonl-path", type=str) parser.add_argument("--chat-template-path", type=str) parser.add_argument("--pack-length", type=int, default=640) args = parser.parse_args() tokenizer_path = args.tokenizer_path dataset_jsonl_path = args.dataset_jsonl_path chat_template_path = args.chat_template_path tokenizer = load_tokenizer(Path(tokenizer_path)) with open(chat_template_path, "r") as f: chat_template = Template(f.read()) with open(dataset_jsonl_path, "r") as f: lines = [json.loads(line) for line in f.readlines()] dataset = [] prompts = [] n_toks = [] current_n_toks = 0 for line in lines: for input_text, output_text in zip(line["input"]["content"], line["output"]["content"]): try: if "\n" in input_text: input_text_str = input_text.split("\n")[1].strip() else: input_text_str = input_text.strip() except Exception: print(input_text) import ipdb ipdb.set_trace() try: if "\n" in output_text: output_text_str = output_text.split("\n")[1].strip() else: output_text_str = output_text.strip() except Exception: print(output_text) import ipdb ipdb.set_trace() # fmt: skip prompt = chat_template.render( messages=[ {"role": "user", "content": f"input lang={line['input']['lang']}\n{input_text_str}"}, {"role": "user", "content": f"output lang={line['output']['lang']}\n{output_text_str}"}, ] ) n_tok = len(tokenizer.encode(prompt.strip())) n_toks.append(n_tok) if current_n_toks + n_tok + 1 > args.pack_length: text = "<|plamo:bos|>".join(prompts) + "<|plamo:bos|>" n_pad = args.pack_length - len(tokenizer.encode(text)) if n_pad > 0: text += "<|plamo:pad|>" * n_pad dataset.append({"text": text}) prompts = [prompt.strip()] current_n_toks = n_tok else: prompts.append(prompt.strip()) current_n_toks += n_tok print(f"Max tokens in a batch: {max(n_toks)}") dataset = Dataset.from_list(dataset) dataset.save_to_disk("tmp/calibration_dataset") ================================================ FILE: scripts/deploy.sh ================================================ rm -rf dist ARCHFLAGS="-arch arm64" MACOSX_DEPLOYMENT_TARGET="11.0" \ uv build --wheel WHEEL_FILENAME=$(ls dist/plamo_translate-*.whl) uv run -m wheel tags \ --python-tag py3 \ --abi-tag none \ --platform-tag macosx_11_0_arm64 \ ${WHEEL_FILENAME} rm -rf ${WHEEL_FILENAME} uv run twine upload dist/* ================================================ FILE: src/plamo_translate/__init__.py ================================================ """PLaMo Translate CLI package.""" __version__ = "1.0.5" ================================================ FILE: src/plamo_translate/assets/chat_template.jinja2 ================================================ {{- "<|plamo:op|>dataset\ntranslation\n" -}} {% for message in messages %} {{- '<|plamo:op|>' + message['content']}} {%- if not loop.last %} {{- '\n'}} {%- endif %} {% endfor %} ================================================ FILE: src/plamo_translate/clients/__init__.py ================================================ ================================================ FILE: src/plamo_translate/clients/translate.py ================================================ import asyncio import logging from typing import AsyncGenerator, Dict, List from urllib.parse import urlunparse import mcp.types as types from mcp.client.session import ClientSession from mcp.client.streamable_http import streamablehttp_client from mcp.shared.session import RequestResponder from mcp.types import TextContent from plamo_translate.servers.utils import Message, TranslateRequest, update_config logger = logging.getLogger(__name__) async def message_handler( message: RequestResponder[types.ServerRequest, types.ClientResult] | types.ServerNotification | Exception, ) -> None: if isinstance(message, Exception): logger.error("Error: %s", message) return class MCPClient: def __init__(self, stream: bool) -> None: """Initialize the MCP client. Args: stream (bool): Whether to stream the translation results. """ self.stream = stream self.config = update_config() port = self.config.get("port", None) if port is None: raise ValueError("Port is not set in the configuration. Please start the MCP server first.") self.url = urlunparse(("http", f"127.0.0.1:{port}", "mcp", "", "", "")) async def translate(self, messages: List[Dict[str, str]]) -> AsyncGenerator[str, None]: """Translate messages. If stream=True, yields chunks as they arrive.""" async with streamablehttp_client(self.url) as ( read_stream, write_stream, get_session_id_callback, ): async with ClientSession( read_stream=read_stream, write_stream=write_stream, message_handler=message_handler, ) as session: await session.initialize() messages_obj = [Message(**message) for message in messages] request = TranslateRequest(messages=messages_obj, source_language="", target_language="") if self.stream: # For streaming, we'll need to handle the response differently # This will yield chunks as they arrive async for chunk in self._translate_stream(session, request): yield chunk else: # The messages should already have source and target languages, so omit to specify them again response = await session.call_tool( "plamo-translate", arguments={ "request": request, "stream": False, }, ) # Extract text from response content if response.content and len(response.content) > 0: content = response.content[0] if isinstance(content, TextContent): yield content.text else: raise ValueError(f"Unexpected content type: {type(content)}") else: raise ValueError("Empty response from translation tool") async def _translate_stream(self, session: ClientSession, request: TranslateRequest): """Handle streaming translation responses.""" # Use a queue to pass messages from progress_handler to the generator message_queue: asyncio.Queue[str] = asyncio.Queue() call_complete = asyncio.Event() async def progress_handler(progress: float, total: float | None, message: str | None) -> None: """Handle progress updates which might contain partial translations.""" if message: await message_queue.put(message) async def call_tool_wrapper(): """Wrapper to call the tool and signal completion""" try: response = await session.call_tool( "plamo-translate", arguments={ "request": request, "stream": True, }, progress_callback=progress_handler, ) # Put the final response in the queue if needed if response.content and len(response.content) > 0: content = response.content[0] if isinstance(content, TextContent): await message_queue.put(content.text) finally: call_complete.set() # Start the tool call in the background asyncio.create_task(call_tool_wrapper()) # Yield messages as they arrive chunks = [] while not call_complete.is_set() or not message_queue.empty(): try: message = await asyncio.wait_for(message_queue.get(), timeout=0.1) chunks.append(message) yield message except asyncio.TimeoutError: # No message available, continue waiting continue ================================================ FILE: src/plamo_translate/main.py ================================================ #!/usr/bin/env python3 import argparse import asyncio import atexit import json import logging import multiprocessing import os import readline import signal import subprocess import sys import time from pathlib import Path from typing import Dict, List from plamo_translate import __version__ from plamo_translate.clients import translate from plamo_translate.servers.utils import ( PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE, PLAMO_TRANSLATE_CLI_REPETITION_PENALTY, SUPPORTED_LANGUAGES, update_config, verify_mcp_server_ready, ) os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1" logger = logging.getLogger(__name__) def start_mcp_server(backend_type: str, log_level: str, show_progress: bool = False) -> None: # To avoid showing warnings related to resource_tracker signal.signal(signal.SIGTERM, lambda _signal_number, _frame: exit(0)) if os.environ.get("PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER") == "1": from plamo_translate.servers.mock import server as mock_server server = mock_server.PLaMoTranslateServer(log_level=log_level, show_progress=show_progress) try: server.run(transport="streamable-http") except Exception as e: print(f"Error during server running: {e}") elif backend_type == "mlx": from plamo_translate.servers.mlx import server as mlx_server server = mlx_server.PLaMoTranslateServer(log_level=log_level, show_progress=show_progress) try: server.run(transport="streamable-http") except Exception as e: print(f"Error during server running: {e}") else: raise ValueError(f"Unsupported backend type: {backend_type}") def check_server_running() -> bool: config = update_config() if "port" not in config: return False port = config["port"] tools = asyncio.run(verify_mcp_server_ready(port)) if "plamo-translate" in tools: return True return False def wait_for_server_ready() -> None: while not check_server_running(): time.sleep(0.1) async def print_translation( client: translate.MCPClient, messages: List[Dict[str, str]], stream: bool ) -> List[Dict[str, str]]: async for result in client.translate(messages): if not stream: print(result, end="", flush=True) else: messages[-1]["content"] += result print(result, end="", flush=True) return messages def run_translate(args: argparse.Namespace) -> None: from_lang = args.from_lang if from_lang != "": from_lang = f" lang={from_lang}" to = args.to if to != "": to = f" lang={to}" backend_type = args.backend_type stream = args.stream if args.input is None and not args.interactive: input_text = sys.stdin.read() args.input = input_text else: input_text = args.input messages: List[Dict[str, str]] = [] if not check_server_running(): if args.interactive: show_progress = True else: show_progress = False server = multiprocessing.Process( target=start_mcp_server, args=(backend_type, "CRITICAL", show_progress), daemon=True, ) server.start() wait_for_server_ready() client = translate.MCPClient(stream=stream) try: if args.interactive: history_file = Path.home() / ".plamo_translate_history" if not history_file.exists(): history_file.touch() try: readline.read_history_file(history_file) readline.set_history_length(-1) except Exception: print(f"History file {history_file} not found. Starting a new history file.") atexit.register(readline.write_history_file, history_file) print("Interactive mode enabled. Type your input below (Ctrl+D to exit).") while True: try: input_text = input("> ") if input_text.strip() == "": continue messages.append( { "role": "user", "content": f"input{from_lang}\n{input_text}", }, ) messages.append( { "role": "user", "content": f"output{to}\n", }, ) messages = asyncio.run(print_translation(client, messages, stream=args.stream)) except KeyboardInterrupt: print("\nTranslation interrupted by user (Ctrl+C).") sys.exit(0) break except EOFError: print("\nCtrl+D received. Exiting.") sys.exit(0) break else: # Non-interactive mode: translate the input once messages.append( { "role": "user", "content": f"input{from_lang}\n{input_text}", }, ) messages.append( { "role": "user", "content": f"output{to}\n", }, ) asyncio.run(print_translation(client, messages, stream=args.stream)) except Exception as e: raise e finally: sys.exit(0) def main() -> None: global_parser = argparse.ArgumentParser(add_help=False) global_parser.add_argument( "--version", "-v", action="version", version="%(prog)s {version}".format(version=__version__), help="Show program's version number and exit.", ) # Add arguments for the default command (translate) # These will be used if no subcommand is provided global_parser.add_argument("--input", type=str, help="Input text to translate", default=None) global_parser.add_argument( "--from", type=str, help="Input language for translation", default="English|Japanese", choices=SUPPORTED_LANGUAGES, dest="from_lang", ) global_parser.add_argument( "--to", type=str, help="Output language for translation", default="", choices=SUPPORTED_LANGUAGES + [""], ) global_parser.add_argument( "--backend-type", type=str, default="mlx", choices=["mlx"], help="Server backend to use (default: mlx on macOS, transformers elsewhere)", ) global_parser.add_argument( "--precision", "-p", type=str, default="4bit", choices=["4bit", "8bit", "bf16"], help="Model parameter's precision to use (default: 4bit)", ) global_parser.add_argument( "--no-stream", action="store_true", help="Enable batch processing mode for translation", ) global_parser.add_argument( "--interactive", "-i", action="store_true", help="Enable interactive mode for translation", ) # Create the parser for the "server" command parser = argparse.ArgumentParser(description="PLaMo Translate CLI", parents=[global_parser]) subparsers = parser.add_subparsers(dest="command", help="Command to run") _ = subparsers.add_parser("server", help="Run the server", parents=[global_parser]) _ = subparsers.add_parser( "show-claude-config", help="Show the MCP server config for Claude Desktop", parents=[global_parser] ) args = parser.parse_args() # Route to appropriate command handler if hasattr(args, "version") and args.version: # The version action should have already exited, but as a fallback: sys.exit(0) if PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is not None and PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is None: raise ValueError( "If PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is set, " "PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE must also be set." ) elif PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is None and PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is not None: raise ValueError( "If PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is set, " "PLAMO_TRANSLATE_CLI_REPETITION_PENALTY must also be set." ) if args.input is None and (args.interactive or sys.stdin.isatty()): args.interactive = True logging.basicConfig(level=logging.ERROR) os.environ["PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL"] = "CRITICAL" else: args.interactive = False logging.basicConfig(level=logging.CRITICAL) os.environ["PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL"] = "CRITICAL" args.stream = not args.no_stream if args.backend_type == "mlx": if args.precision == "4bit": model_name = "mlx-community/plamo-2-translate" elif args.precision == "8bit": model_name = "mlx-community/plamo-2-translate-8bit" elif args.precision == "bf16": model_name = "mlx-community/plamo-2-translate-bf16" update_config(backend_type=args.backend_type, model_name=model_name) if "PLAMO_TRANSLATE_CLI_MODEL_NAME" not in os.environ: os.environ["PLAMO_TRANSLATE_CLI_MODEL_NAME"] = model_name if args.command == "server": logging.basicConfig(level=logging.INFO) if check_server_running(): print("MCP server is already running. Skipping server start.") sys.exit(0) while not check_server_running(): try: logger.info("Starting server...") start_mcp_server(args.backend_type, "INFO", True) logger.info("The server is running (Ctrl+C to stop)") except KeyboardInterrupt: logger.error("\nCtrl+C received. Exiting.") break except EOFError: logger.error("\nCtrl+D received. Exiting.") break except Exception as e: logger.error(f"An error occurred: {str(e)}: {e}. Restarting server...") elif args.command == "show-claude-config": cmd = subprocess.run(["which", "npx"], check=True, capture_output=True, text=True) if cmd.returncode != 0: logger.error("npx command not found. Please install Node.js and npx.") exit(1) npx_path = cmd.stdout.strip() config = update_config() print( json.dumps( { "mcpServers": { "plamo-translate": { "command": npx_path, "args": [ "-y", "mcp-remote", f"http://localhost:{config['port']}/mcp", "--allow-http", "--transport", "http-only", ], "env": {"PATH": os.environ["PATH"]}, } } }, indent=2, ensure_ascii=False, ) ) else: run_translate(args) if __name__ == "__main__": main() ================================================ FILE: src/plamo_translate/servers/__init__.py ================================================ ================================================ FILE: src/plamo_translate/servers/mlx/__init__.py ================================================ ================================================ FILE: src/plamo_translate/servers/mlx/server.py ================================================ import asyncio import contextlib import importlib.resources import logging import os import subprocess import sys from typing import Callable, Tuple import mlx.core as mx import mlx.nn as nn from mcp.server.fastmcp import Context, FastMCP from mlx_lm.generate import stream_generate from mlx_lm.sample_utils import make_logits_processors, make_sampler from mlx_lm.tokenizer_utils import TokenizerWrapper from mlx_lm.utils import load from plamo_translate.servers.utils import ( INSTRUCTION, PLAMO_MAX_TOKENS, PLAMO_TRANSLATE_CLI_MODEL_NAME, PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE, PLAMO_TRANSLATE_CLI_REPETITION_PENALTY, PLAMO_TRANSLATE_CLI_TEMP, PLAMO_TRANSLATE_CLI_TOP_K, PLAMO_TRANSLATE_CLI_TOP_P, TranslateRequest, construct_llm_input, find_free_port, update_config, ) from plamo_translate.servers.warnings import ( build_optional_gpu_dependency_warning_options, suppress_optional_gpu_dependency_warnings, ) logger = logging.getLogger(__name__) class PLaMoTranslateServer(FastMCP): """PLaMo Translate Server using FastMCP.""" def __init__(self, log_level: str, show_progress: bool = False) -> None: super().__init__( name="plamo-translate", instructions=INSTRUCTION, log_level=log_level, stateless_http=False, host="127.0.0.1", port=find_free_port(), lifespan=self.lifespan, ) # Set environment variables to switch if it shows progress bars for loading models or not self.show_progress = show_progress model, tokenizer, sampler, logits_processors = self.load_model() self.model = model self.tokenizer = tokenizer self.sampler = sampler self.logits_processors = logits_processors self.add_tool( fn=self.translate, name="plamo-translate", description=INSTRUCTION, ) @contextlib.asynccontextmanager async def lifespan(self, server: FastMCP): try: async with contextlib.AsyncExitStack() as stack: # Pre-processings before a request is processed yield # Post-processings after a request is processed except Exception as e: logger.error(f"Error during lifespan: {str(e)} {e}") await stack.aclose() def load_model(self) -> Tuple[nn.Module, TokenizerWrapper, Callable[..., mx.array], list]: """Load the MLX model if not already loaded.""" try: ref = importlib.resources.files("plamo_translate.assets").joinpath("chat_template.jinja2") chat_template = ref.read_text(encoding="utf-8") except FileNotFoundError: raise RuntimeError("chat_template.jinja2 not found in assets directory") model_name = os.getenv("PLAMO_TRANSLATE_CLI_MODEL_NAME", PLAMO_TRANSLATE_CLI_MODEL_NAME) update_config(model_name=model_name) # Reload mlx_lm.utils here to refleect the environment variables for progress bars if self.show_progress: envs = os.environ.copy() envs["HF_HUB_DISABLE_PROGRESS_BARS"] = "0" subprocess.run( [ sys.executable, *build_optional_gpu_dependency_warning_options(), "-m", "mlx_lm", "generate", "--model", model_name, "--max-tokens", "1", "--trust-remote-code", ], env=envs, stdout=subprocess.DEVNULL, ) with suppress_optional_gpu_dependency_warnings(): model, tokenizer = load( model_name, model_config={"trust_remote_code": True}, tokenizer_config={ "trust_remote_code": True, "chat_template": chat_template, }, ) tokenizer.add_eos_token("<|plamo:op|>") sampler = make_sampler( temp=float(PLAMO_TRANSLATE_CLI_TEMP), top_p=float(PLAMO_TRANSLATE_CLI_TOP_P), top_k=int(PLAMO_TRANSLATE_CLI_TOP_K), ) logits_processors = make_logits_processors( repetition_penalty=( float(PLAMO_TRANSLATE_CLI_REPETITION_PENALTY) if PLAMO_TRANSLATE_CLI_REPETITION_PENALTY is not None else None ), repetition_context_size=( int(PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE) if PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE is not None else None ), ) return model, tokenizer, sampler, logits_processors async def translate(self, request: TranslateRequest, stream: bool, context: Context) -> str: """Run the translation tool""" logger.info(f"Received translation request: {context.request_id}") try: messages = construct_llm_input(request) prompt = self.tokenizer.apply_chat_template(messages, add_generation_prompt=False) # type:ignore[call-arg] # Generate translation translation = "" segments_count = 0 for segment in stream_generate( model=self.model, tokenizer=self.tokenizer, prompt=prompt, sampler=self.sampler, logits_processors=self.logits_processors, max_tokens=int(PLAMO_MAX_TOKENS), ): translation += segment.text segments_count += 1 if stream: # Send progress notification with the new segment await context.report_progress( progress=segments_count, total=None, # We don't know the total in advance message=segment.text, # Send the segment as the message ) # Small delay to ensure progress is sent await asyncio.sleep(0) if not stream: return translation else: return "" except Exception as e: logger.error(f"Translation error: {str(e)}") raise e ================================================ FILE: src/plamo_translate/servers/mock/__init__.py ================================================ from .server import PLaMoTranslateServer __all__ = ["PLaMoTranslateServer"] ================================================ FILE: src/plamo_translate/servers/mock/server.py ================================================ import asyncio import logging from typing import Dict from mcp.server.fastmcp import Context, FastMCP from plamo_translate.servers.utils import INSTRUCTION, TranslateRequest, find_free_port, update_config logger = logging.getLogger(__name__) MOCK_TRANSLATIONS: Dict[str, str] = { "Proud, but humble": "誇り高いが、謙虚です。", "Boldly do what no one has done before": "誰もしたことがないことを大胆にやりなさい。", } def _extract_input_text(request: TranslateRequest) -> str: for message in reversed(request.messages): if not message.content.startswith("input"): continue _, _, input_text = message.content.partition("\n") return input_text.strip().lstrip(">").strip() return request.messages[-1].content.strip().lstrip(">").strip() class PLaMoTranslateServer(FastMCP): """Lightweight MCP server used by the test suite.""" def __init__(self, log_level: str, show_progress: bool = False) -> None: super().__init__( name="plamo-translate", instructions=INSTRUCTION, log_level=log_level, stateless_http=False, host="127.0.0.1", port=find_free_port(), ) update_config(model_name="mock") self.show_progress = show_progress self.add_tool( fn=self.translate, name="plamo-translate", description=INSTRUCTION, ) async def translate(self, request: TranslateRequest, stream: bool, context: Context) -> str: input_text = _extract_input_text(request) translation = next( (candidate for source, candidate in MOCK_TRANSLATIONS.items() if source in input_text), f"[mock translation] {input_text}", ) if not stream: return translation for index, chunk in enumerate([translation], start=1): await context.report_progress(progress=index, total=1, message=chunk) await asyncio.sleep(0) return "" ================================================ FILE: src/plamo_translate/servers/utils.py ================================================ import asyncio import json import logging import os import socket import textwrap from contextlib import closing from tempfile import NamedTemporaryFile from pathlib import Path from typing import Any, Dict, List, Optional from mcp import ClientSession from mcp.client.streamable_http import streamablehttp_client from pydantic import BaseModel, Field logger = logging.getLogger(__name__) SUPPORTED_LANGUAGES = [ "Japanese", "Japanese(easy)", "English", "Chinese", "Taiwanese", "Korean", "Arabic", "Italian", "Indonesian", "Dutch", "Spanish", "Thai", "German", "French", "Vietnamese", "Russian", "English|Japanese", ] PLAMO_TRANSLATE_CLI_MODEL_NAME = os.environ.get("PLAMO_TRANSLATE_CLI_MODEL_NAME", "mlx-community/plamo-2-translate") PLAMO_TRANSLATE_CLI_SERVER_START_PORT = int(os.environ.get("PLAMO_TRANSLATE_CLI_SERVER_START_PORT", 30000)) PLAMO_TRANSLATE_CLI_SERVER_END_PORT = int(os.environ.get("PLAMO_TRANSLATE_CLI_SERVER_END_PORT", 30099)) PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL = os.environ.get("PLAMO_TRANSLATE_CLI_SERVER_LOG_LEVEL", "INFO") PLAMO_TRANSLATE_CLI_TEMP = os.environ.get("PLAMO_TRANSLATE_CLI_TEMP", "0.0") PLAMO_TRANSLATE_CLI_TOP_P = os.environ.get("PLAMO_TRANSLATE_CLI_TOP_P", "0.98") PLAMO_TRANSLATE_CLI_TOP_K = os.environ.get("PLAMO_TRANSLATE_CLI_TOP_K", "0") PLAMO_TRANSLATE_CLI_REPETITION_PENALTY = os.environ.get("PLAMO_TRANSLATE_CLI_REPETITION_PENALTY", None) PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE = os.environ.get("PLAMO_TRANSLATE_CLI_REPETITION_CONTEXT_SIZE", None) PLAMO_MAX_TOKENS = os.environ.get("PLAMO_MAX_TOKENS", "32768") SUPPORTED_LANGUAGES_LIST_STR = "\n-".join(SUPPORTED_LANGUAGES) INSTRUCTION = textwrap.dedent( f"""Use the `plamo-translate` tool to translate text between multiple languages. Supported languages include: - {SUPPORTED_LANGUAGES_LIST_STR} Use the tool by specifying the text and the source and target languages. """ ) async def verify_mcp_server_ready(port: int) -> List[str]: """Verify if the MCP server is ready to accept connections.""" try: url = f"http://127.0.0.1:{port}/mcp" async with streamablehttp_client(url) as ( read_stream, write_stream, get_session_id_callback, ): async with ClientSession( read_stream=read_stream, write_stream=write_stream, ) as session: await session.initialize() tools = await session.list_tools() return [tool.name for tool in tools.tools] except Exception: return [] def find_free_port( start_port: int = PLAMO_TRANSLATE_CLI_SERVER_START_PORT, end_port: int = PLAMO_TRANSLATE_CLI_SERVER_END_PORT, ) -> int: """ Find a port in the range [start_port, end_port]. """ config = update_config() # Phase 1: Check for existing MCP server with 'plamo-translate' tool if "port" in config: port = config["port"] try: tools = asyncio.run(verify_mcp_server_ready(port)) except Exception as e: logger.info(f"Failed to connect to MCP server on port {port}: {e}") tools = [] if "plamo-translate" in tools: logger.info(f"Found existing MCP server with 'plamo-translate' tool on port {port}.") return port previous_port = port else: previous_port = None # Phase 2: If no suitable MCP server found, find any free port in the range for port in range(start_port, end_port + 1): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: sock.settimeout(0.1) # Short timeout for connection attempt try: result = sock.connect_ex(("127.0.0.1", port)) if result != 0: # If connection failed (errno != 0), port is likely free logger.info(f"Found free port: {port}") if previous_port is not None and previous_port != port: logger.info(f"Updating MCP server port from {previous_port} to {port}.") update_config(port=port) return port except Exception: # This can happen if e.g. sock.connect_ex itself has issues, or port is restricted pass # Try next port raise RuntimeError( "Could not find a suitable MCP server with 'plamo-translate' tool " f"or a free port in the range {start_port}-{end_port}." ) def update_config(**kwargs) -> Dict[str, Any]: tmp_dir = os.environ.get("TMPDIR", None) if tmp_dir is None: raise ValueError("TMPDIR environment variable is not set. Please set it to a valid directory.") tmp_config_path = Path(tmp_dir) / "plamo-translate-config.json" if not tmp_config_path.exists(): if not kwargs: return {} config = kwargs _write_config(tmp_config_path, config, indent=4) logger.info( f"Created new temporary config file at {tmp_config_path} with initial values: " f"{json.dumps(config, indent=4, ensure_ascii=False)}" ) return config with tmp_config_path.open("r") as f: try: config = json.load(f) except json.JSONDecodeError: logger.warning(f"Config file {tmp_config_path} is corrupted. Recreating it.") config = {} if not kwargs: return config for key, value in kwargs.items(): config[key] = value _write_config(tmp_config_path, config) return config def _write_config(path: Path, config: Dict[str, Any], *, indent: int | None = None) -> None: path.parent.mkdir(parents=True, exist_ok=True) with NamedTemporaryFile("w", dir=path.parent, delete=False, encoding="utf-8") as tmp_file: json.dump(config, tmp_file, indent=indent) tmp_file.flush() os.fsync(tmp_file.fileno()) tmp_path = Path(tmp_file.name) tmp_path.replace(path) class Message(BaseModel): """Model for messages in translation request""" role: str = Field(..., description="Role of the message sender (e.g., 'user', 'assistant')") content: str = Field(..., description="Content of the message") class TranslateRequest(BaseModel): """Request model for translation""" messages: List[Message] = Field(..., description="List of messages for translation") source_language: Optional[str] = Field( "", description=( "Source language that is one of the followings: " f"{', '.join(SUPPORTED_LANGUAGES)}. " "Note that 'English|Japanese' is used to detect the input language automatically." ), ) target_language: Optional[str] = Field( "", description=( "Target language that is one of the followings: " f"{', '.join(SUPPORTED_LANGUAGES)}. " "This can be empty when the source language is 'English|Japanese'." ), ) def construct_llm_input(request: TranslateRequest) -> List[Message]: """Construct the input for the LLM from messages and languages""" # If it has already been constructed messages with lang=* part, return it as is if request.source_language == "" and request.target_language == "": return request.messages if request.source_language != "": source_text = request.messages[-1].content.strip() request.messages[-1].content = f"input lang={request.source_language}\n" + source_text if request.target_language != "": request.messages.append(Message(role="user", content=f"output lang={request.target_language}\n")) else: request.messages.append(Message(role="user", content="output\n")) return request.messages ================================================ FILE: src/plamo_translate/servers/warnings.py ================================================ import contextlib import re import warnings from collections.abc import Iterator OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES = ( "mamba_ssm could not be imported", "causal_conv1d could not be imported", ) @contextlib.contextmanager def suppress_optional_gpu_dependency_warnings() -> Iterator[None]: """Hide known optional dependency warnings emitted by remote model code.""" with warnings.catch_warnings(): for message in OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES: warnings.filterwarnings( action="ignore", message=rf"^{re.escape(message)}$", category=UserWarning, ) yield def build_optional_gpu_dependency_warning_options() -> list[str]: """Build `python -W` options that suppress known optional dependency warnings.""" options: list[str] = [] for message in OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES: options.extend(["-W", f"ignore:{message}:UserWarning"]) return options ================================================ FILE: tests/test_cli.py ================================================ import http.server import multiprocessing import os import socket import socketserver import subprocess import time import pytest from plamo_translate.main import check_server_running from plamo_translate.servers.utils import PLAMO_TRANSLATE_CLI_SERVER_START_PORT, update_config CLI_TIMEOUT_SECONDS = int(os.environ.get("PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS", "20")) SERVER_STARTUP_TIMEOUT_SECONDS = int(os.environ.get("PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS", "10")) @pytest.fixture(autouse=True) def isolated_test_environment(monkeypatch, tmp_path): monkeypatch.setenv( "PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER", os.environ.get("PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER", "1"), ) monkeypatch.setenv("TMPDIR", str(tmp_path)) def wait_for_server_ready(timeout: int = SERVER_STARTUP_TIMEOUT_SECONDS) -> None: deadline = time.monotonic() + timeout while time.monotonic() < deadline: if check_server_running(): return time.sleep(0.1) raise AssertionError("Timed out waiting for the MCP server to become ready.") def wait_for_port_in_use(port: int, timeout: int = SERVER_STARTUP_TIMEOUT_SECONDS) -> None: deadline = time.monotonic() + timeout while time.monotonic() < deadline: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock: if sock.connect_ex(("127.0.0.1", port)) == 0: return time.sleep(0.1) raise AssertionError(f"Timed out waiting for port {port} to start accepting connections.") def test_update_config_without_kwargs_is_read_only(tmp_path): config_path = tmp_path / "plamo-translate-config.json" assert update_config() == {} assert not config_path.exists(), "Read-only access should not create the config file" initial_config = {"port": PLAMO_TRANSLATE_CLI_SERVER_START_PORT} update_config(**initial_config) initial_contents = config_path.read_text() assert update_config() == initial_config assert config_path.read_text() == initial_contents, "Read-only access should not rewrite the config file" def stop_subprocess(process: subprocess.Popen[str] | None) -> None: if process is None: return process.terminate() try: process.wait(timeout=5) except subprocess.TimeoutExpired: process.kill() process.wait(timeout=5) def stop_multiprocess(process: multiprocessing.Process | None) -> None: if process is None: return process.terminate() process.join(timeout=5) if process.is_alive(): process.kill() process.join(timeout=5) def test_plamo_translate_without_server(): text_to_translate = "Proud, but humble" command = ["plamo-translate", "--from", "English", "--to", "Japanese", "--input", text_to_translate] result = subprocess.run(command, capture_output=True, text=True, timeout=CLI_TIMEOUT_SECONDS) assert result.returncode == 0 assert "誇り高" in result.stdout and "謙虚" in result.stdout def test_plamo_translate_server_simple_use(): first_process = None try: command = ["plamo-translate", "server"] first_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) wait_for_server_ready() config = update_config() print(f"Server started with config: {config}") assert "port" in config, "Server configuration should include a port" port = config["port"] assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT, f"Expected server port to be 8000, got {port}" text_to_translate = "Proud, but humble" result = subprocess.run( ["plamo-translate", "--input", text_to_translate, "--from", "English", "--to", "Japanese"], capture_output=True, text=True, timeout=CLI_TIMEOUT_SECONDS, ) assert "誇り高い" in result.stdout and "謙虚" in result.stdout result = subprocess.run( ["plamo-translate", "--from", "English", "--to", "Japanese"], input=text_to_translate, capture_output=True, text=True, timeout=CLI_TIMEOUT_SECONDS, ) assert "誇り高い" in result.stdout and "謙虚" in result.stdout finally: stop_subprocess(first_process) def test_plamo_translate_server_already_running(): first_process = None second_process = None try: command = ["plamo-translate", "server"] first_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) print("Starting first plamo-translate server process...") wait_for_server_ready() print("First server process started successfully.") # If the server is already running, the further call of `plamo-translate server` should not start a new server. second_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) stdout, _ = second_process.communicate(timeout=CLI_TIMEOUT_SECONDS) print(stdout.strip()) assert "MCP server is already running" in stdout config = update_config() print(f"Server started with config: {config}") assert "port" in config, "Server configuration should include a port" port = config["port"] assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT, f"Expected server port to be 8000, got {port}" finally: stop_subprocess(first_process) stop_subprocess(second_process) def start_http_server(): port = PLAMO_TRANSLATE_CLI_SERVER_START_PORT handler = http.server.SimpleHTTPRequestHandler with socketserver.TCPServer(("127.0.0.1", port), handler) as httpd: httpd.serve_forever() def test_plamo_translate_server_find_new_port(): http_server_process = None mcp_server_process = None try: http_server_process = multiprocessing.Process(target=start_http_server, daemon=True) http_server_process.start() print(f"HTTP server started on port {PLAMO_TRANSLATE_CLI_SERVER_START_PORT}") wait_for_port_in_use(PLAMO_TRANSLATE_CLI_SERVER_START_PORT) # The default port is used by the HTTP server, so the MCP server should use a different port command = ["plamo-translate", "server"] mcp_server_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) print("Starting plamo-translate server...") wait_for_server_ready() stop_subprocess(mcp_server_process) mcp_server_process = None config = update_config() print(f"Server started with config: {config}") assert "port" in config, "Server configuration should include a port" port = config["port"] assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT + 1, ( f"Expected server port to be {PLAMO_TRANSLATE_CLI_SERVER_START_PORT + 1}, got {port}" ) finally: stop_multiprocess(http_server_process) stop_subprocess(mcp_server_process) def test_plamo_translate_server_interactive(): mcp_server_process = None client_process = None try: command = ["plamo-translate", "server"] mcp_server_process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) wait_for_server_ready() config = update_config() print(f"Server started with config: {config}") assert "port" in config, "Server configuration should include a port" port = config["port"] assert port == PLAMO_TRANSLATE_CLI_SERVER_START_PORT, ( f"Expected server port to be {PLAMO_TRANSLATE_CLI_SERVER_START_PORT}, got {port}" ) client_command = ["plamo-translate", "-i", "--from", "English", "--to", "Japanese"] client_process = subprocess.Popen( client_command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, ) all_inputs = "\n".join(["Proud, but humble", "Boldly do what no one has done before"]) + "\n" stdout, stderr = client_process.communicate(input=all_inputs, timeout=CLI_TIMEOUT_SECONDS) assert "誇り高" in stdout and "謙虚" in stdout assert "大胆に" in stdout finally: stop_subprocess(mcp_server_process) stop_subprocess(client_process) ================================================ FILE: tests/test_cli_integration.py ================================================ import os import subprocess import time import pytest from plamo_translate.main import check_server_running from plamo_translate.servers.utils import PLAMO_TRANSLATE_CLI_SERVER_START_PORT, update_config CLI_TIMEOUT_SECONDS = int(os.environ.get("PLAMO_TRANSLATE_CLI_TEST_TIMEOUT_SECONDS", "900")) SERVER_STARTUP_TIMEOUT_SECONDS = int(os.environ.get("PLAMO_TRANSLATE_CLI_TEST_SERVER_STARTUP_TIMEOUT_SECONDS", "900")) @pytest.fixture(autouse=True) def integration_test_environment(monkeypatch, tmp_path): monkeypatch.setenv( "PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER", os.environ.get("PLAMO_TRANSLATE_CLI_USE_MOCK_SERVER", "0"), ) monkeypatch.setenv("TMPDIR", str(tmp_path)) def wait_for_server_ready(timeout: int = SERVER_STARTUP_TIMEOUT_SECONDS) -> None: deadline = time.monotonic() + timeout while time.monotonic() < deadline: if check_server_running(): return time.sleep(0.5) raise AssertionError("Timed out waiting for the MCP server to become ready.") def stop_subprocess(process: subprocess.Popen[str] | None) -> None: if process is None: return process.terminate() try: process.wait(timeout=5) except subprocess.TimeoutExpired: process.kill() process.wait(timeout=5) def test_plamo_translate_server_roundtrip_with_real_model(): server_process = None try: server_process = subprocess.Popen( ["plamo-translate", "server"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, ) wait_for_server_ready() config = update_config() assert config.get("port") == PLAMO_TRANSLATE_CLI_SERVER_START_PORT text_to_translate = "Proud, but humble" result = subprocess.run( ["plamo-translate", "--input", text_to_translate, "--from", "English", "--to", "Japanese"], capture_output=True, text=True, timeout=CLI_TIMEOUT_SECONDS, ) assert result.returncode == 0 assert "誇り高" in result.stdout and "謙虚" in result.stdout result = subprocess.run( ["plamo-translate", "--from", "English", "--to", "Japanese"], input=text_to_translate, capture_output=True, text=True, timeout=CLI_TIMEOUT_SECONDS, ) assert result.returncode == 0 assert "誇り高" in result.stdout and "謙虚" in result.stdout finally: stop_subprocess(server_process) ================================================ FILE: tests/test_warning_filters.py ================================================ import warnings from plamo_translate.servers.warnings import ( OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES, build_optional_gpu_dependency_warning_options, suppress_optional_gpu_dependency_warnings, ) def test_build_optional_gpu_dependency_warning_options(): assert build_optional_gpu_dependency_warning_options() == [ "-W", "ignore:mamba_ssm could not be imported:UserWarning", "-W", "ignore:causal_conv1d could not be imported:UserWarning", ] def test_suppress_optional_gpu_dependency_warnings_only_hides_known_messages(): with warnings.catch_warnings(record=True) as captured: warnings.simplefilter("always") with suppress_optional_gpu_dependency_warnings(): for message in OPTIONAL_GPU_DEPENDENCY_WARNING_MESSAGES: warnings.warn(message, UserWarning, stacklevel=1) warnings.warn("unexpected warning", UserWarning, stacklevel=1) assert [str(item.message) for item in captured] == ["unexpected warning"]