Repository: HKUDS/AnyTool
Branch: main
Commit: 506430fec133
Files: 132
Total size: 1.1 MB
Directory structure:
gitextract_sqfmt1l8/
├── .gitignore
├── COMMUNICATION.md
├── LICENSE
├── README.md
├── anytool/
│ ├── __init__.py
│ ├── __main__.py
│ ├── agents/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ └── grounding_agent.py
│ ├── config/
│ │ ├── __init__.py
│ │ ├── config_agents.json
│ │ ├── config_dev.json.example
│ │ ├── config_grounding.json
│ │ ├── config_mcp.json.example
│ │ ├── config_security.json
│ │ ├── constants.py
│ │ ├── grounding.py
│ │ ├── loader.py
│ │ └── utils.py
│ ├── grounding/
│ │ ├── backends/
│ │ │ ├── __init__.py
│ │ │ ├── gui/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── anthropic_client.py
│ │ │ │ ├── anthropic_utils.py
│ │ │ │ ├── config.py
│ │ │ │ ├── provider.py
│ │ │ │ ├── session.py
│ │ │ │ ├── tool.py
│ │ │ │ └── transport/
│ │ │ │ ├── actions.py
│ │ │ │ ├── connector.py
│ │ │ │ └── local_connector.py
│ │ │ ├── mcp/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── client.py
│ │ │ │ ├── config.py
│ │ │ │ ├── installer.py
│ │ │ │ ├── provider.py
│ │ │ │ ├── session.py
│ │ │ │ ├── tool_cache.py
│ │ │ │ ├── tool_converter.py
│ │ │ │ └── transport/
│ │ │ │ ├── connectors/
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── base.py
│ │ │ │ │ ├── http.py
│ │ │ │ │ ├── sandbox.py
│ │ │ │ │ ├── stdio.py
│ │ │ │ │ ├── utils.py
│ │ │ │ │ └── websocket.py
│ │ │ │ └── task_managers/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── sse.py
│ │ │ │ ├── stdio.py
│ │ │ │ ├── streamable_http.py
│ │ │ │ └── websocket.py
│ │ │ ├── shell/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── provider.py
│ │ │ │ ├── session.py
│ │ │ │ └── transport/
│ │ │ │ ├── connector.py
│ │ │ │ └── local_connector.py
│ │ │ └── web/
│ │ │ ├── __init__.py
│ │ │ ├── provider.py
│ │ │ └── session.py
│ │ └── core/
│ │ ├── exceptions.py
│ │ ├── grounding_client.py
│ │ ├── provider.py
│ │ ├── quality/
│ │ │ ├── __init__.py
│ │ │ ├── manager.py
│ │ │ ├── store.py
│ │ │ └── types.py
│ │ ├── search_tools.py
│ │ ├── security/
│ │ │ ├── __init__.py
│ │ │ ├── e2b_sandbox.py
│ │ │ ├── policies.py
│ │ │ └── sandbox.py
│ │ ├── session.py
│ │ ├── system/
│ │ │ ├── __init__.py
│ │ │ ├── provider.py
│ │ │ └── tool.py
│ │ ├── tool/
│ │ │ ├── __init__.py
│ │ │ ├── base.py
│ │ │ ├── local_tool.py
│ │ │ └── remote_tool.py
│ │ ├── transport/
│ │ │ ├── connectors/
│ │ │ │ ├── __init__.py
│ │ │ │ ├── aiohttp_connector.py
│ │ │ │ └── base.py
│ │ │ └── task_managers/
│ │ │ ├── __init__.py
│ │ │ ├── aiohttp_connection_manager.py
│ │ │ ├── async_ctx.py
│ │ │ ├── base.py
│ │ │ ├── noop.py
│ │ │ └── placeholder.py
│ │ └── types.py
│ ├── llm/
│ │ ├── __init__.py
│ │ └── client.py
│ ├── local_server/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── config.json
│ │ ├── feature_checker.py
│ │ ├── health_checker.py
│ │ ├── main.py
│ │ ├── platform_adapters/
│ │ │ ├── __init__.py
│ │ │ ├── linux_adapter.py
│ │ │ ├── macos_adapter.py
│ │ │ ├── pyxcursor.py
│ │ │ └── windows_adapter.py
│ │ ├── requirements.txt
│ │ ├── run.sh
│ │ └── utils/
│ │ ├── __init__.py
│ │ ├── accessibility.py
│ │ └── screenshot.py
│ ├── platform/
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── recording.py
│ │ ├── screenshot.py
│ │ └── system_info.py
│ ├── prompts/
│ │ ├── __init__.py
│ │ └── grounding_agent_prompts.py
│ ├── recording/
│ │ ├── __init__.py
│ │ ├── action_recorder.py
│ │ ├── manager.py
│ │ ├── recorder.py
│ │ ├── utils.py
│ │ ├── video.py
│ │ └── viewer.py
│ ├── tool_layer.py
│ └── utils/
│ ├── cli_display.py
│ ├── display.py
│ ├── logging.py
│ ├── telemetry/
│ │ ├── __init__.py
│ │ ├── events.py
│ │ ├── telemetry.py
│ │ └── utils.py
│ ├── ui.py
│ └── ui_integration.py
├── pyproject.toml
└── requirements.txt
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# OS files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
Desktop.ini
# IDE files
.vscode/
.idea/
.pytest_cache/
# Distribution / packaging
dist/
build/
*.egg-info/
*.egg
# Environment files
.env
# MCP files
anytool/config/config_mcp.json
# Logs
logs/
# Embedding cache
.anytool/
embedding_cache/
tool_quality/
# MCP tool cache
mcp_tool_cache.json
mcp_tool_cache_sanitized.json
# Config files
anytool/config/config_dev.json
# LLM keys
anytool/llm/remote_client/
# Local server temp files
anytool/local_server/temp/
examples/
================================================
FILE: COMMUNICATION.md
================================================
We provide QR codes for joining the HKUDS discussion groups on **WeChat** and **Feishu**.
You can join by scanning the QR codes below:
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2025 HKUDS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
❤️ Thanks for visiting ✨ AnyTool!
.*?)```",
re.S | re.I,
)
def __init__(
self,
session: "ShellSession",
client_password: str = "",
max_steps: int = 5,
security_manager: SecurityPolicyManager = None,
default_working_dir: str = None,
default_env: dict = None,
default_conda_env: str = None
):
self._session = session
self._llm = LLMClient()
self.client_password = client_password
self.max_steps = max_steps
self._system_info = None
self.security_manager = security_manager
self._default_working_dir = default_working_dir
self._default_env = default_env or {}
self._default_conda_env = default_conda_env
self._py_tool = PythonScriptTool(session, default_working_dir=default_working_dir, default_env=default_env, default_conda_env=default_conda_env)
self._bash_tool = BashScriptTool(session, default_working_dir=default_working_dir, default_env=default_env, default_conda_env=default_conda_env)
super().__init__()
async def _get_system_info(self):
"""
Get system information for shell agent.
First tries to get comprehensive info from local server's /platform endpoint.
Falls back to simple bash commands if that fails.
Returns:
Dict with at least 'platform' and 'username' keys
"""
if self._system_info is None:
try:
# Try to get system info from server via HTTP API
try:
from anytool.platform import SystemInfoClient
# Get base_url from connector
base_url = self._session.connector.base_url
# Create temporary client
async with SystemInfoClient(base_url=base_url, timeout=5) as client:
info = await client.get_system_info(use_cache=False)
if info:
# Use comprehensive info from server
self._system_info = {
"platform": info.get("system", "Linux"),
"username": info.get("username", "user"),
"machine": info.get("machine"),
"release": info.get("release"),
"full_info": info # Keep full info for reference
}
logger.debug(f"Got system info from server: {info.get('system')}")
return self._system_info
except ImportError:
logger.debug("SystemInfoClient not available, using bash commands")
# Fallback: use simple bash commands (original method)
platform_result = await self._session.connector.run_bash_script("uname -s", timeout=5)
username_result = await self._session.connector.run_bash_script("whoami", timeout=5)
platform = self._extract_output(platform_result).strip()
username = self._extract_output(username_result).strip()
self._system_info = {
"platform": platform,
"username": username
}
logger.debug(f"Got system info from bash: {platform}")
except Exception as e:
logger.warning(f"Failed to get system info: {e}, using defaults")
self._system_info = {"platform": "Linux", "username": "user"}
return self._system_info
async def _arun(self, task: str, timeout: int = 300):
from anytool.grounding.core.types import ToolResult, ToolStatus
sys_info = await self._get_system_info()
conversation_history = []
iteration = 0
last_error = None
# record the code history
code_history = []
# Build environment context
env_context = []
if self._default_working_dir:
env_context.append(f"Working Directory: {self._default_working_dir}")
if self._default_conda_env:
env_context.append(f"Conda Environment: {self._default_conda_env}")
if self._default_env:
env_vars = ", ".join([f"{k}={v}" for k, v in list(self._default_env.items())[:3]])
if len(self._default_env) > 3:
env_vars += f", ... (+{len(self._default_env)-3} more)"
env_context.append(f"Custom Environment Variables: {env_vars}")
env_section = "\n".join([f"# {ctx}" for ctx in env_context]) if env_context else ""
SHELL_AGENT_SYSTEM_PROMPT = f"""You are an expert system administrator and programmer focused on executing tasks efficiently.
# System: {sys_info["platform"]}, User: {sys_info["username"]}
{env_section}
# Your task: {task}
# IMPORTANT: You MUST provide exactly ONE code block in EVERY response
# Either ```bash or ```python - never respond without code
# Available actions:
1. Execute bash commands: ```bash ```
2. Write Python code: ```python ```
# Rules:
- ALWAYS include a code block in your response
- Write EXACTLY ONE code block per response
- If you need to understand the current environment, start with bash commands like: pwd, ls, ps, df, etc.
- If you get errors, analyze and fix them in the next iteration
- For sudo: use 'echo {self.client_password} | sudo -S '
- The environment (working directory, conda env) is managed automatically
# CRITICAL: Avoid quote escaping errors in bash:
- For complex string operations (JSON, multi-line text, special chars): ALWAYS use Python with heredoc
- Good: ```python ```
- Bad: bash commands with nested quotes like: echo "$(cat 'file' | grep "pattern")"
- When reading/writing files with complex content: prefer Python over bash
- When processing JSON: ALWAYS use Python's json module, never bash string manipulation
# Before executing, check if task output already exists:
- Use 'ls -la ' to check for existing files
- If files exist, read and verify them first before recreating
- Avoid redundant work - reuse existing valid outputs
# Task completion marking:
When you believe the task is COMPLETED, end your response with:
[TASK_COMPLETED: brief explanation of what was accomplished]
When you encounter an UNRECOVERABLE error that you cannot fix, end your response with:
[TASK_FAILED: brief explanation of why it cannot be completed]"""
conversation_history.append({"role": "system", "content": SHELL_AGENT_SYSTEM_PROMPT})
no_code_counter = 0
final_message = ""
while iteration < self.max_steps:
iteration += 1
logger.info(f"[ShellAgent] Step {iteration}/{self.max_steps}: Processing task")
try:
messages_text = LLMClient.format_messages_to_text(conversation_history)
response = await self._llm.complete(messages_text)
assistant_content = response["message"]["content"]
logger.debug(f"[ShellAgent] Step {iteration} LLM response: {assistant_content[:200]}...")
# extract and execute the code, and track the code block
code_info, execution_result = await self._execute_code_from_response(assistant_content)
if code_info:
code_history.append(code_info)
logger.info(f"[ShellAgent] Step {iteration} execution result: {execution_result[:100]}...")
if execution_result == "ERROR: No valid code block found":
no_code_counter += 1
if no_code_counter >= 3:
final_message = f"Task failed after {iteration} steps: LLM failed to provide code blocks repeatedly"
return ToolResult(
status=ToolStatus.ERROR,
content=final_message,
metadata={"tool": self._name, "code_history": code_history}
)
else:
no_code_counter = 0
completion_status = self._check_task_status(assistant_content, execution_result, last_error)
if completion_status["completed"]:
content_parts = [f"Task completed successfully after {iteration} steps"]
content_parts.append(f"\n{'='*60}")
content_parts.append(f"\nFinal Result:")
content_parts.append(execution_result)
if len(code_history) > 1:
content_parts.append(f"\n{'='*60}")
content_parts.append(f"\nExecution Summary ({len(code_history)} steps):")
for i, code_info in enumerate(code_history, 1):
lang = code_info.get("language", "unknown")
output = code_info.get("output", "")
output_preview = output[:200].replace('\n', ' ')
if len(output) > 200:
output_preview += "..."
content_parts.append(f"\n Step {i} [{lang}]: {output_preview}")
content_parts.append(f"\n{'='*60}")
content_parts.append(f"\nSummary: {completion_status['reason']}")
final_message = "\n".join(content_parts)
return ToolResult(
status=ToolStatus.SUCCESS,
content=final_message,
metadata={"tool": self._name, "code_history": code_history}
)
elif completion_status["failed"]:
final_message = f"Task failed after {iteration} steps: {completion_status['reason']}\nLast result: {execution_result}"
return ToolResult(
status=ToolStatus.ERROR,
content=final_message,
metadata={"tool": self._name, "code_history": code_history}
)
feedback = self._generate_feedback(execution_result, iteration, last_error)
conversation_history.extend([
{"role": "assistant", "content": assistant_content},
{"role": "user", "content": feedback}
])
last_error = execution_result if "ERROR" in execution_result else None
except Exception as e:
final_message = f"Tool execution failed at step {iteration}: {str(e)}"
return ToolResult(
status=ToolStatus.ERROR,
content=final_message,
metadata={"tool": self._name, "code_history": code_history}
)
final_message = f"Reached maximum steps ({self.max_steps}). Task may be too complex or impossible."
return ToolResult(
status=ToolStatus.ERROR,
content=final_message,
metadata={"tool": self._name, "code_history": code_history}
)
async def _execute_code_from_response(self, response: str):
"""
execute the code and track the code block
Returns:
Tuple[Optional[Dict], str]: (code_info, execution_result)
- code_info: {"lang": "python/bash", "code": "...", "status": "success/error"}
- execution_result: the execution result string
"""
matches = list(self._CODE_RGX.finditer(response))
if not matches:
return None, "ERROR: No valid code block found"
lang, code = matches[0]["lang"].lower(), matches[0]["code"].strip()
# standardize the language name
lang_normalized = "python" if lang in ["python", "py"] else "bash"
code_info = {
"lang": lang_normalized,
"code": code,
}
# Security check is only done at the Connector layer to avoid duplicate prompts
try:
if lang in ["python", "py"]:
helper = self._py_tool
result = await helper._arun(code)
elif lang in ["bash", "shell", "sh"]:
helper = self._bash_tool
result = await helper._arun(code)
else:
execution_result = f"ERROR: Unsupported language: {lang}"
code_info["status"] = "error"
return code_info, execution_result
execution_result = self._extract_output(result)
code_info["status"] = "success" if "ERROR" not in execution_result else "error"
return code_info, execution_result
except Exception as e:
execution_result = f"EXECUTION ERROR: {str(e)}"
code_info["status"] = "error"
return code_info, execution_result
def _generate_feedback(self, result: str, iteration: int, last_error: str) -> str:
feedback = f"Step {iteration} result:\n{result}\n\n"
if "ERROR" in result:
if last_error and last_error == result:
feedback += "Same error as previous step. Try a different approach.\n"
else:
feedback += "Error occurred. Analyze the error and fix it.\n"
else:
feedback += "Execution successful. Continue to next step if needed.\n"
feedback += "\nWhat's your next action? (Remember: provide exactly ONE code block)"
return feedback
def _extract_output(self, result):
if isinstance(result, dict):
# Check for execution errors
stderr = result.get("error") or result.get("stderr") or ""
returncode = result.get("returncode", 0)
stdout = result.get("content") or result.get("output") or result.get("stdout") or ""
# If there's a non-zero return code or stderr with actual errors, report it
if returncode != 0 or (stderr and len(stderr.strip()) > 0):
error_msg = f"EXECUTION ERROR (exit code {returncode}):\n"
if stderr:
error_msg += f"stderr: {stderr}\n"
if stdout:
error_msg += f"stdout: {stdout}"
return error_msg
return stdout or str(result)
return str(result)
def _check_task_status(self, response: str, execution_result: str, last_error: str) -> dict:
if "[TASK_COMPLETED:" in response:
reason = response.split("[TASK_COMPLETED:")[1].split("]")[0].strip()
return {"completed": True, "failed": False, "reason": reason}
if "[TASK_FAILED:" in response:
reason = response.split("[TASK_FAILED:")[1].split("]")[0].strip()
return {"completed": False, "failed": True, "reason": reason}
# Extended error pattern detection
error_patterns = [
"ERROR:",
"EXECUTION ERROR:",
"CommandNotFoundError",
"Traceback (most recent call last)",
"Exception:",
"PermissionError",
"FileNotFoundError",
"SyntaxError:",
"ImportError:",
"ModuleNotFoundError",
"No such file or directory",
"command not found",
]
has_error = any(pattern in execution_result for pattern in error_patterns)
if has_error:
if last_error and last_error == execution_result:
return {"completed": False, "failed": True, "reason": "Same error repeated - unable to resolve"}
return {"completed": False, "failed": False, "reason": "Execution error occurred"}
return {"completed": False, "failed": False, "reason": "Task in progress"}
================================================
FILE: anytool/grounding/backends/shell/transport/connector.py
================================================
import asyncio
from typing import Any, Optional, Dict
from anytool.grounding.core.transport.connectors import AioHttpConnector
from anytool.grounding.core.security import SecurityPolicyManager
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
class ShellConnector(AioHttpConnector):
"""
Shell backend HTTP connector
Basic routes:
POST /run_python {"code": str}
POST /run_bash_script {"script": str, "timeout": int, "working_dir": str | None}
"""
def __init__(
self,
vm_ip: str,
port: int = 5000,
*,
retry_times: int = 3,
retry_interval: float = 5,
security_manager: "SecurityPolicyManager | None" = None,
) -> None:
base_url = f"http://{vm_ip}:{port}"
super().__init__(base_url)
self.retry_times = retry_times
self.retry_interval = retry_interval
self._security_manager = security_manager
async def _retry_invoke(
self,
name: str,
payload: Dict[str, Any],
script_timeout: int,
*,
break_on_timeout: bool = False
):
"""
Execute HTTP request and retry
Args:
name: RPC method name
payload: Request payload
script_timeout: Script execution timeout
break_on_timeout: Whether to exit immediately on timeout (default False)
Returns:
Server response result
Raises:
Exception: Last exception thrown after all retries fail
"""
last_exc: Exception | None = None
# HTTP request timeout should be longer than script execution timeout, leaving buffer time
http_timeout = script_timeout + 60
for attempt in range(1, self.retry_times + 1):
try:
# Pass timeout parameter to server
result = await self.invoke(name, payload | {"timeout": script_timeout})
logger.info("%s executed successfully (attempt %d/%d)", name, attempt, self.retry_times)
return result
except asyncio.TimeoutError as exc:
# Timeout exception usually does not need to be retried (script execution time too long)
if break_on_timeout:
logger.error("%s timed out after %d seconds, aborting retry", name, script_timeout)
raise RuntimeError(
f"Script execution timed out after {script_timeout} seconds"
) from exc
last_exc = exc
if attempt == self.retry_times:
break
logger.warning(
"%s timed out (attempt %d/%d), retrying in %.1f seconds...",
name, attempt, self.retry_times, self.retry_interval
)
await asyncio.sleep(self.retry_interval)
except Exception as exc:
last_exc = exc
if attempt == self.retry_times:
break
logger.warning(
"%s failed (attempt %d/%d): %s, retrying in %.1f seconds...",
name, attempt, self.retry_times, exc, self.retry_interval
)
await asyncio.sleep(self.retry_interval)
error_msg = f"{name} failed after {self.retry_times} retries"
logger.error(error_msg)
raise last_exc or RuntimeError(error_msg)
async def run_python_script(
self,
code: str,
*,
timeout: int = 90,
working_dir: Optional[str] = None,
env: Optional[Dict[str, str]] = None,
conda_env: Optional[str] = None
) -> Any:
"""
Execute Python script on remote server
Args:
code: Python code string
timeout: Execution timeout in seconds (default 90 seconds)
working_dir: Working directory for script execution (optional)
env: Environment variables for script execution (optional)
conda_env: Conda environment name to activate (optional)
Returns:
Server response result
Raises:
PermissionError: Security policy blocked execution
RuntimeError: Execution failed or timed out
"""
if self._security_manager:
from anytool.grounding.core.types import BackendType
allowed = await self._security_manager.check_command_allowed(BackendType.SHELL, code)
if not allowed:
logger.error("SecurityPolicy blocked python code execution")
raise PermissionError("SecurityPolicy: python code execution blocked")
payload = {"code": code, "working_dir": working_dir, "env": env, "conda_env": conda_env}
logger.info(
"Executing python script with timeout=%d seconds%s%s%s",
timeout,
f", working_dir={working_dir}" if working_dir else "",
f", env={list(env.keys())}" if env else "",
f", conda_env={conda_env}" if conda_env else ""
)
# Python script timed out, exit immediately without retry (timeout usually means script logic problem)
return await self._retry_invoke(
"POST /run_python",
payload,
timeout,
break_on_timeout=True
)
async def run_bash_script(
self,
script: str,
*,
timeout: int = 90,
working_dir: Optional[str] = None,
env: Optional[Dict[str, str]] = None,
conda_env: Optional[str] = None
) -> Any:
"""
Execute Bash script on remote server
Args:
script: Bash script content (can be multi-line)
timeout: Execution timeout in seconds (default 90 seconds)
working_dir: Working directory for script execution (optional)
env: Environment variables for script execution (optional)
conda_env: Conda environment name to activate (optional)
Returns:
Server response result, containing status, output, error, returncode, etc.
Raises:
PermissionError: Security policy blocked execution
RuntimeError: Execution failed or timed out
"""
if self._security_manager:
from anytool.grounding.core.types import BackendType
allowed = await self._security_manager.check_command_allowed(BackendType.SHELL, script)
if not allowed:
logger.error("SecurityPolicy blocked bash script execution")
raise PermissionError("SecurityPolicy: bash script execution blocked")
payload = {"script": script, "working_dir": working_dir, "env": env, "conda_env": conda_env}
logger.info(
"Executing bash script with timeout=%d seconds%s%s%s",
timeout,
f", working_dir={working_dir}" if working_dir else "",
f", env={list(env.keys())}" if env else "",
f", conda_env={conda_env}" if conda_env else ""
)
# Bash script timed out, exit immediately without retry (timeout usually means script logic problem)
result = await self._retry_invoke(
"POST /run_bash_script",
payload,
timeout,
break_on_timeout=True
)
# Record execution result
if isinstance(result, dict) and "returncode" in result:
logger.info("Bash script executed with return code: %d", result.get("returncode", -1))
return result
================================================
FILE: anytool/grounding/backends/shell/transport/local_connector.py
================================================
"""
Local Shell Connector — execute Python / Bash scripts directly via subprocess.
This connector has the **same public API** as ShellConnector (HTTP version)
but runs everything in-process, removing the need for a local_server.
Return format is kept identical so that ShellSession / ShellAgentTool
work without any changes.
"""
import asyncio
import os
import platform
import tempfile
import uuid
from typing import Any, Optional, Dict
from anytool.grounding.core.transport.connectors.base import BaseConnector
from anytool.grounding.core.transport.task_managers.noop import NoOpConnectionManager
from anytool.grounding.core.security import SecurityPolicyManager
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
platform_name = platform.system()
# ---------------------------------------------------------------------------
# Conda helpers (mirrored from local_server/main.py)
# ---------------------------------------------------------------------------
def _get_conda_activation_prefix(conda_env: str | None) -> str:
"""Generate platform-specific conda activation prefix."""
if not conda_env:
return ""
if platform_name == "Windows":
conda_paths = [
os.path.expandvars(r"%USERPROFILE%\miniconda3\Scripts\activate.bat"),
os.path.expandvars(r"%USERPROFILE%\anaconda3\Scripts\activate.bat"),
r"C:\ProgramData\Miniconda3\Scripts\activate.bat",
r"C:\ProgramData\Anaconda3\Scripts\activate.bat",
]
for p in conda_paths:
if os.path.exists(p):
return f'call "{p}" {conda_env} && '
return f"conda activate {conda_env} && "
else:
conda_paths = [
os.path.expanduser("~/miniconda3/etc/profile.d/conda.sh"),
os.path.expanduser("~/anaconda3/etc/profile.d/conda.sh"),
"/opt/conda/etc/profile.d/conda.sh",
"/usr/local/miniconda3/etc/profile.d/conda.sh",
"/usr/local/anaconda3/etc/profile.d/conda.sh",
]
for p in conda_paths:
if os.path.exists(p):
return f'source "{p}" && conda activate {conda_env} && '
return f"conda activate {conda_env} && "
def _wrap_script_with_conda(script: str, conda_env: str | None) -> str:
"""Wrap bash script with conda activation if needed."""
if not conda_env:
return script
if platform_name == "Windows":
prefix = _get_conda_activation_prefix(conda_env)
return f"{prefix}{script}"
else:
conda_paths = [
os.path.expanduser("~/miniconda3/etc/profile.d/conda.sh"),
os.path.expanduser("~/anaconda3/etc/profile.d/conda.sh"),
os.path.expanduser("~/opt/anaconda3/etc/profile.d/conda.sh"),
"/opt/conda/etc/profile.d/conda.sh",
]
conda_sh = None
for p in conda_paths:
if os.path.exists(p):
conda_sh = p
break
if conda_sh:
return (
f'#!/bin/bash\n'
f'if [ -f "{conda_sh}" ]; then\n'
f' . "{conda_sh}"\n'
f' conda activate {conda_env} 2>/dev/null || true\n'
f'fi\n\n'
f'{script}\n'
)
else:
logger.warning(
"Conda environment '%s' requested but conda not found. "
"Executing with system Python.", conda_env
)
return script
class LocalShellConnector(BaseConnector[Any]):
"""
Shell connector that runs scripts **locally** using asyncio subprocesses,
bypassing the Flask local_server entirely.
Public API is compatible with ``ShellConnector`` so that ``ShellSession``
works without modification.
"""
def __init__(
self,
*,
retry_times: int = 3,
retry_interval: float = 5,
security_manager: "SecurityPolicyManager | None" = None,
) -> None:
super().__init__(NoOpConnectionManager())
self.retry_times = retry_times
self.retry_interval = retry_interval
self._security_manager = security_manager
# Provide base_url = None so ShellSession._get_system_info falls back
# to bash-based detection instead of HTTP.
self.base_url: str | None = None
# ------------------------------------------------------------------
# connect / disconnect (mostly no-ops for local execution)
# ------------------------------------------------------------------
async def connect(self) -> None:
"""No real connection to establish for local mode."""
if self._connected:
return
await super().connect()
logger.info("LocalShellConnector: ready (local mode, no server required)")
# ------------------------------------------------------------------
# Core execution helpers
# ------------------------------------------------------------------
async def _run_subprocess(
self,
cmd: list[str],
*,
timeout: int = 90,
working_dir: str | None = None,
env: dict[str, str] | None = None,
) -> Dict[str, Any]:
"""Run a command via asyncio subprocess and return a result dict
matching the format returned by the local_server endpoints."""
exec_env = os.environ.copy()
if env:
exec_env.update(env)
cwd = working_dir or os.getcwd()
try:
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=cwd,
env=exec_env,
)
stdout_b, stderr_b = await asyncio.wait_for(
proc.communicate(), timeout=timeout
)
stdout = stdout_b.decode("utf-8", errors="replace") if stdout_b else ""
stderr = stderr_b.decode("utf-8", errors="replace") if stderr_b else ""
returncode = proc.returncode or 0
return {
"status": "success" if returncode == 0 else "error",
"output": stdout,
"content": stdout or "Code executed successfully (no output)",
"error": stderr,
"returncode": returncode,
}
except asyncio.TimeoutError:
return {
"status": "error",
"output": f"Execution timed out after {timeout} seconds",
"content": f"Execution timed out after {timeout} seconds",
"error": "",
"returncode": -1,
}
except Exception as e:
return {
"status": "error",
"output": "",
"content": "",
"error": str(e),
"returncode": -1,
}
async def _run_shell_command(
self,
shell_cmd: str,
*,
timeout: int = 90,
working_dir: str | None = None,
env: dict[str, str] | None = None,
) -> Dict[str, Any]:
"""Run a shell command string (used for conda-wrapped scripts)."""
exec_env = os.environ.copy()
if env:
exec_env.update(env)
cwd = working_dir or os.getcwd()
try:
proc = await asyncio.create_subprocess_shell(
shell_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT,
cwd=cwd,
env=exec_env,
)
stdout_b, _ = await asyncio.wait_for(
proc.communicate(), timeout=timeout
)
stdout = stdout_b.decode("utf-8", errors="replace") if stdout_b else ""
returncode = proc.returncode or 0
return {
"status": "success" if returncode == 0 else "error",
"output": stdout,
"content": stdout or "Code executed successfully (no output)",
"error": "",
"returncode": returncode,
}
except asyncio.TimeoutError:
return {
"status": "error",
"output": f"Script execution timed out after {timeout} seconds",
"content": f"Script execution timed out after {timeout} seconds",
"error": "",
"returncode": -1,
}
except Exception as e:
return {
"status": "error",
"output": "",
"content": "",
"error": str(e),
"returncode": -1,
}
# ------------------------------------------------------------------
# Public API (same signatures as ShellConnector)
# ------------------------------------------------------------------
async def run_python_script(
self,
code: str,
*,
timeout: int = 90,
working_dir: Optional[str] = None,
env: Optional[Dict[str, str]] = None,
conda_env: Optional[str] = None,
) -> Any:
"""Execute a Python script locally.
Return format matches the server's ``/run_python`` endpoint.
"""
# Security check
if self._security_manager:
from anytool.grounding.core.types import BackendType
allowed = await self._security_manager.check_command_allowed(
BackendType.SHELL, code
)
if not allowed:
logger.error("SecurityPolicy blocked python code execution")
raise PermissionError("SecurityPolicy: python code execution blocked")
# Write code to temp file (same as local_server)
suffix = uuid.uuid4().hex
if platform_name == "Windows":
temp_filename = os.path.join(tempfile.gettempdir(), f"python_exec_{suffix}.py")
else:
temp_filename = f"/tmp/python_exec_{suffix}.py"
try:
with open(temp_filename, "w") as f:
f.write(code)
logger.info(
"Executing python script locally with timeout=%d seconds%s%s%s",
timeout,
f", working_dir={working_dir}" if working_dir else "",
f", env={list(env.keys())}" if env else "",
f", conda_env={conda_env}" if conda_env else "",
)
if conda_env:
activation = _get_conda_activation_prefix(conda_env)
if activation:
python_cmd = "python" if platform_name == "Windows" else "python3"
full_cmd = f'{activation}{python_cmd} "{temp_filename}"'
result = await self._run_shell_command(
full_cmd, timeout=timeout, working_dir=working_dir, env=env
)
else:
python_cmd = "python" if platform_name == "Windows" else "python3"
result = await self._run_subprocess(
[python_cmd, temp_filename],
timeout=timeout,
working_dir=working_dir,
env=env,
)
else:
python_cmd = "python" if platform_name == "Windows" else "python3"
result = await self._run_subprocess(
[python_cmd, temp_filename],
timeout=timeout,
working_dir=working_dir,
env=env,
)
return result
finally:
if os.path.exists(temp_filename):
os.remove(temp_filename)
async def run_bash_script(
self,
script: str,
*,
timeout: int = 90,
working_dir: Optional[str] = None,
env: Optional[Dict[str, str]] = None,
conda_env: Optional[str] = None,
) -> Any:
"""Execute a Bash script locally.
Return format matches the server's ``/run_bash_script`` endpoint.
"""
# Security check
if self._security_manager:
from anytool.grounding.core.types import BackendType
allowed = await self._security_manager.check_command_allowed(
BackendType.SHELL, script
)
if not allowed:
logger.error("SecurityPolicy blocked bash script execution")
raise PermissionError("SecurityPolicy: bash script execution blocked")
# Wrap with conda if needed
final_script = _wrap_script_with_conda(script, conda_env)
# Write to temp file (same as local_server)
suffix = uuid.uuid4().hex
if platform_name == "Windows":
temp_filename = os.path.join(tempfile.gettempdir(), f"bash_exec_{suffix}.sh")
else:
temp_filename = f"/tmp/bash_exec_{suffix}.sh"
try:
with open(temp_filename, "w") as f:
f.write(final_script)
os.chmod(temp_filename, 0o755)
logger.info(
"Executing bash script locally with timeout=%d seconds%s%s%s",
timeout,
f", working_dir={working_dir}" if working_dir else "",
f", env={list(env.keys())}" if env else "",
f", conda_env={conda_env}" if conda_env else "",
)
shell_cmd = ["bash", temp_filename] if platform_name == "Windows" else ["/bin/bash", temp_filename]
result = await self._run_subprocess(
shell_cmd,
timeout=timeout,
working_dir=working_dir,
env=env,
)
return result
finally:
if os.path.exists(temp_filename):
os.unlink(temp_filename)
# ------------------------------------------------------------------
# BaseConnector abstract methods
# ------------------------------------------------------------------
async def invoke(self, name: str, params: dict[str, Any]) -> Any:
"""Dispatch by name — same routing as ShellConnector via AioHttpConnector."""
name_upper = name.strip().upper()
if "/RUN_PYTHON" in name_upper:
return await self.run_python_script(
params.get("code", ""),
timeout=params.get("timeout", 90),
working_dir=params.get("working_dir"),
env=params.get("env"),
conda_env=params.get("conda_env"),
)
elif "/RUN_BASH_SCRIPT" in name_upper:
return await self.run_bash_script(
params.get("script", ""),
timeout=params.get("timeout", 90),
working_dir=params.get("working_dir"),
env=params.get("env"),
conda_env=params.get("conda_env"),
)
else:
raise NotImplementedError(f"LocalShellConnector does not support: {name}")
async def request(self, *args: Any, **kwargs: Any) -> Any:
"""Not used in local mode."""
raise NotImplementedError(
"LocalShellConnector does not support raw HTTP requests"
)
================================================
FILE: anytool/grounding/backends/web/__init__.py
================================================
from .provider import WebProvider
from .session import WebSession
__all__ = [
"WebProvider",
"WebSession"
]
================================================
FILE: anytool/grounding/backends/web/provider.py
================================================
from typing import Dict, Any
from anytool.grounding.core.types import BackendType, SessionConfig
from anytool.grounding.core.provider import Provider
from .session import WebSession
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
class WebProvider(Provider[WebSession]):
DEFAULT_SID = BackendType.WEB.value
def __init__(self, config: Dict[str, Any] = None):
super().__init__(BackendType.WEB, config)
async def initialize(self) -> None:
"""Initialize Web Provider and create default session"""
if not self.is_initialized:
logger.info("Initializing Web provider (Knowledge Research)")
# Auto-create default session
await self.create_session(SessionConfig(
session_name=self.DEFAULT_SID,
backend_type=BackendType.WEB,
connection_params={}
))
self.is_initialized = True
async def create_session(self, session_config: SessionConfig) -> WebSession:
"""Create Web session"""
session_name = session_config.session_name
if session_name in self._sessions:
logger.warning(f"Session {session_name} already exists, returning existing session")
return self._sessions[session_name]
# Create WebSession with auto-connect and auto-initialize enabled
session = WebSession(
session_id=session_name,
config=session_config,
auto_connect=True,
auto_initialize=True
)
self._sessions[session_name] = session
logger.info(f"Created Web session (Knowledge Research): {session_name}")
return session
async def close_session(self, session_name: str) -> None:
"""Close Web session"""
session = self._sessions.pop(session_name, None)
if session:
await session.disconnect()
logger.info(f"Closed Web session: {session_name}")
================================================
FILE: anytool/grounding/backends/web/session.py
================================================
import os
from typing import Dict, Any, Optional
from anytool.grounding.core.session import BaseSession
from anytool.grounding.core.types import BackendType, SessionConfig
from anytool.grounding.core.tool import BaseTool
from anytool.grounding.core.transport.connectors import BaseConnector
from anytool.llm import LLMClient
from anytool.utils.logging import Logger
from dotenv import load_dotenv
load_dotenv()
logger = Logger.get_logger(__name__)
try:
from openai import AsyncOpenAI
OPENAI_AVAILABLE = True
except ImportError:
OPENAI_AVAILABLE = False
class WebConnector(BaseConnector):
def __init__(self, api_key: str, base_url: str):
self.api_key = api_key
self.base_url = base_url
self.client: Optional[AsyncOpenAI] = None
self._connected = False
async def connect(self) -> None:
if self._connected:
return
if not OPENAI_AVAILABLE:
raise RuntimeError(
"OpenAI library not available. Install with: pip install openai"
)
if not self.api_key:
raise RuntimeError(
"API key not provided. Set OPENROUTER_API_KEY environment variable "
"or provide deep_research_api_key in config."
)
self.client = AsyncOpenAI(
base_url=self.base_url,
api_key=self.api_key
)
self._connected = True
logger.info(f"Web connector connected to {self.base_url}")
async def disconnect(self) -> None:
if not self._connected:
return
self.client = None
self._connected = False
logger.info("Web connector disconnected")
@property
def is_connected(self) -> bool:
return self._connected
async def invoke(self, name: str, params: dict) -> Any:
if name == "chat_completion":
if not self.client:
raise RuntimeError("Client not connected")
return await self.client.chat.completions.create(**params)
raise NotImplementedError(f"Unknown method: {name}")
async def request(self, *args: Any, **kwargs: Any) -> Any:
raise NotImplementedError("Web backend uses invoke() instead of request()")
class WebSession(BaseSession):
backend_type = BackendType.WEB
def __init__(
self,
*,
session_id: str,
config: SessionConfig,
deep_research_api_key: Optional[str] = None,
deep_research_base_url: str = "https://openrouter.ai/api/v1",
auto_connect: bool = True,
auto_initialize: bool = True
):
api_key = deep_research_api_key or os.getenv("OPENROUTER_API_KEY")
connector = WebConnector(
api_key=api_key or "", # Empty string will raise an error when connect
base_url=deep_research_base_url
)
super().__init__(
connector=connector,
session_id=session_id,
backend_type=BackendType.WEB,
auto_connect=auto_connect,
auto_initialize=auto_initialize
)
self.config = config
@property
def web_connector(self) -> WebConnector:
return self.connector
async def initialize(self) -> Dict[str, Any]:
"""Connect to WebConnector and register tools.
BaseSession in __aenter__ will call connect() according to auto_connect,
but in provider.create_session directly instantiating Session will not trigger this logic.
Therefore, we need to explicitly ensure that the connection is established, avoiding AttributeError
when DeepResearchTool is called and `self.web_connector.client` is still None.
"""
# If the connection is not established, connect explicitly
if not self.is_connected:
try:
await self.connect()
except Exception as e:
logger.error(f"Failed to connect WebSession {self.session_id}: {e}")
raise
if self.tools:
logger.debug(f"Web session {self.session_id} already initialized, skipping")
return {
"tools": [t.name for t in self.tools],
"backend": BackendType.WEB.value
}
self.tools = [DeepResearchTool(session=self)]
logger.info(f"Initialized Web session {self.session_id} with AI Deep Research tool")
return {
"tools": [t.name for t in self.tools],
"backend": BackendType.WEB.value
}
class DeepResearchTool(BaseTool):
backend_type = BackendType.WEB
_name = "deep_research_agent"
_description = """Knowledge Research Tool - Primary tool for acquiring external knowledge
PURPOSE:
Acquires comprehensive knowledge from the web through deep research and analysis.
Powered by Perplexity AI's sonar-deep-research model, then post-processed to extract
actionable insights and concise summaries. The main tool for gathering information
beyond existing knowledge base.
WHEN TO USE:
- Information needed on professional/technical topics
- Research on technical problems, concepts, or implementations
- Understanding of latest developments, trends, or news
- Comparison of different approaches, tools, or solutions
- Factual information, definitions, or explanations required
- Synthesis from multiple authoritative sources needed
HOW IT WORKS:
1. Conducts deep web search using Perplexity's sonar-deep-research
2. Analyzes and synthesizes information from multiple sources
3. Post-processes to distill knowledge-dense summary retaining critical details
4. Returns comprehensive summary ready for immediate use
RETURNS:
Knowledge-dense comprehensive summary (400-600 words) that:
- Retains important details and technical specifics
- Focuses on substantive knowledge without losing critical information
- Organized and structured for clarity
- Directly usable by agents for decision-making and task execution
NOT DESIGNED FOR:
- Tasks requiring browser interaction or UI manipulation
- Direct file downloads or web scraping operations
- Real-time system operations or executions
USAGE GUIDELINES:
- Frame clear, specific questions (e.g., "Explain the architecture of Transformer models")
- Specify context when needed (e.g., "Compare PostgreSQL vs MySQL for high-concurrency scenarios")
- Suitable for any knowledge or information acquisition needs
"""
def __init__(
self,
session: WebSession
):
super().__init__()
self._session = session
self._llm = LLMClient()
async def _arun(self, query: str) -> str:
if not query:
return "ERROR: Missing required parameter: query"
try:
# Step 1: Deep research
logger.info(f"Start deep research: {query}")
completion = await self._session.web_connector.client.chat.completions.create(
model="perplexity/sonar-deep-research",
messages=[{"role": "user", "content": query}]
)
full_answer = completion.choices[0].message.content
logger.info(f"Research completed, length: {len(full_answer)} characters")
# Step 2: Use LLMClient to generate summary and distill key points
logger.info(f"Begin to distill key points...")
SUMMARY_AGENT_PROMPT = f"""Please distill the following deep research results into a knowledge-dense summary. Requirements:
Provide a comprehensive yet concise summary (400-600 words):
- Focus on SUBSTANTIVE knowledge and key information
- Retain important details, technical specifics, and concrete facts
- Do NOT sacrifice critical information for brevity
- Organize information clearly and logically with proper structure
- Remove only redundancy and verbose explanations
- Include actionable insights and decision-relevant information
- Make it directly usable for task execution and decision-making
Output ONLY the summary text, no additional formatting or JSON structure needed.
Deep Research Results:
{full_answer}
"""
summary_response = await self._llm.complete(SUMMARY_AGENT_PROMPT)
summary = summary_response["message"]["content"].strip()
logger.info(f"Summary generation completed")
return summary
except Exception as e:
logger.error(f"Deep research failed: {e}")
return f"ERROR: AI research failed: {e}"
================================================
FILE: anytool/grounding/core/exceptions.py
================================================
"""
Unified exception & error-code definitions for the grounding framework
"""
from enum import Enum, auto
from typing import Any, Dict
class ErrorCode(str, Enum):
# generic
UNKNOWN = auto()
CONFIG_INVALID = auto()
# provider / session / connector
PROVIDER_ERROR = auto()
SESSION_NOT_FOUND = auto()
# connection
CONNECTION_FAILED = auto()
CONNECTION_TIMEOUT = auto()
# tool
TOOL_NOT_FOUND = auto()
TOOL_EXECUTION_FAIL = auto()
AMBIGUOUS_TOOL = auto()
class GroundingError(Exception):
"""
Framework-wide base exception.
Parameters
----------
message : str
Human readable error message.
code : ErrorCode
One of the error codes defined above.
retryable : bool
Whether the caller may retry the operation automatically.
context : Dict[str, Any]
Extra key-value pairs (e.g. tool_name, session_id) for logging / metrics.
"""
__slots__ = ("message", "code", "retryable", "context")
def __init__(
self,
message: str,
*,
code: ErrorCode = ErrorCode.UNKNOWN,
retryable: bool = False,
**context: Any,
):
super().__init__(f"[{code}] {message}")
self.message: str = message
self.code: ErrorCode = code
self.retryable: bool = retryable
self.context: Dict[str, Any] = context
def to_dict(self) -> Dict[str, Any]:
"""Serialize error for structured logging / JSON response."""
return {
"code": self.code.value,
"message": self.message,
"retryable": self.retryable,
"context": self.context,
}
def __str__(self) -> str:
return f"[{self.code}] {self.message}"
def __repr__(self) -> str:
return f"GroundingError(code={self.code}, msg={self.message!r})"
================================================
FILE: anytool/grounding/core/grounding_client.py
================================================
import asyncio
import time
from collections import OrderedDict
from datetime import datetime
from typing import Any, Dict, List, Optional
from .types import BackendType, SessionConfig, SessionInfo, SessionStatus, ToolResult
from .exceptions import ErrorCode, GroundingError
from .tool import BaseTool
from .provider import Provider, ProviderRegistry
from .session import BaseSession
from .search_tools import SearchCoordinator
from anytool.config import GroundingConfig, get_config
from anytool.config.utils import get_config_value
from anytool.utils.logging import Logger
import importlib
class GroundingClient:
"""
Global Entry, Facing Agent/Application, only concerned with Provider & Session
"""
def __init__(self, config: Optional[GroundingConfig] = None, recording_manager=None) -> None:
# Initialize logger first (needed by other initialization steps)
self._logger = Logger.get_logger(__name__)
self._config: GroundingConfig = config or get_config()
self._registry: ProviderRegistry = ProviderRegistry()
# Register providers from config
self._register_providers_from_config()
# Session
self._sessions: Dict[str, BaseSession] = {}
self._session_info: Dict[str, SessionInfo] = {}
self._server_session_map: dict[tuple[BackendType, str], str] = {} # (backend, server) -> session_name
# Tool cache
self._tool_cache: "OrderedDict[str, tuple[List[BaseTool], float]]" = OrderedDict()
self._tool_cache_ttl: int = get_config_value(self._config, "tool_cache_ttl", 300)
self._tool_cache_maxsize: int = get_config_value(self._config, "tool_cache_maxsize", 300)
# Concurrent control
self._lock = asyncio.Lock()
self._cache_lock = asyncio.Lock()
# Tool search coordinator
self._search_coordinator: Optional[SearchCoordinator] = None
# Recording manager (optional, for GUI intermediate step recording)
self._recording_manager = recording_manager
# Tool quality manager
self._quality_manager = self._init_quality_manager()
# Register SystemProvider (requires GroundingClient instance, so must be done after __init__)
self._register_system_provider()
def _register_providers_from_config(self) -> None:
"""
Based on GroundingConfig.enabled_backends, register Provider instances to
self._registry. Here only do *instantiation*, not await initialize(),
to avoid blocking the event loop in the import stage; Provider will be lazily initialized when it is first used.
Note: SystemProvider is skipped here and registered separately in _register_system_provider()
because it requires a GroundingClient instance.
"""
if not self._config.enabled_backends:
self._logger.warning("No enabled_backends defined in config")
return
for item in self._config.enabled_backends:
be_name: str | None = item.get("name")
cls_path: str | None = item.get("provider_cls")
if not (be_name and cls_path):
self._logger.warning("Invalid backend entry: %s", item)
continue
backend = BackendType(be_name.lower())
# Skip system backend - it will be registered separately
if backend == BackendType.SYSTEM:
self._logger.debug("Skipping system backend in config registration (will be registered separately)")
continue
if backend in self._registry.list():
continue # Already registered
# Dynamically import Provider class
try:
module_path, _, cls_name = cls_path.rpartition(".")
module = importlib.import_module(module_path)
prov_cls = getattr(module, cls_name)
except (ModuleNotFoundError, AttributeError) as e:
self._logger.error("Import provider failed: %s (%s)", cls_path, e)
continue
backend_cfg = self._config.get_backend_config(be_name)
provider: Provider = prov_cls(backend_cfg)
self._registry.register(provider)
def _register_system_provider(self) -> None:
"""
Register SystemProvider separately because it requires GroundingClient instance.
SystemProvider provides meta-level tools for querying system state (list providers, tools, etc.)
and is always available regardless of configuration.
"""
try:
from .system import SystemProvider
system_provider = SystemProvider(self)
self._registry.register(system_provider)
self._logger.debug("SystemProvider registered successfully")
except Exception as e:
self._logger.warning(f"Failed to register SystemProvider: {e}")
def _init_quality_manager(self):
"""Initialize tool quality manager based on config."""
try:
# Check if quality tracking is enabled in config
quality_config = getattr(self._config, 'tool_quality', None)
if not quality_config or not getattr(quality_config, 'enabled', True):
self._logger.debug("Tool quality tracking disabled")
return None
from .quality import ToolQualityManager, set_quality_manager
from pathlib import Path
cache_dir = getattr(quality_config, 'cache_dir', None)
if cache_dir:
cache_dir = Path(cache_dir)
manager = ToolQualityManager(
cache_dir=cache_dir,
enable_persistence=getattr(quality_config, 'enable_persistence', True),
auto_save=True,
evolve_interval=getattr(quality_config, 'evolve_interval', 5),
)
# Set as global manager for BaseTool access
set_quality_manager(manager)
self._logger.info(
f"ToolQualityManager initialized "
f"(records={len(manager._records)})"
)
return manager
except Exception as e:
self._logger.warning(f"Failed to initialize ToolQualityManager: {e}")
return None
@property
def quality_manager(self):
"""Get the tool quality manager."""
return self._quality_manager
# Quality API for Upper Layer
def get_quality_report(self) -> Dict[str, Any]:
"""
Get comprehensive tool quality report.
"""
if not self._quality_manager:
return {"status": "disabled", "message": "Quality tracking not enabled"}
return self._quality_manager.get_quality_report()
async def evolve_quality(self) -> Dict[str, Any]:
"""
Run quality self-evolution cycle.
This triggers:
- Tool change detection
- Description re-evaluation for updated tools
- Adaptive quality weight computation
Call this periodically or after tool set changes.
"""
if not self._quality_manager:
return {"status": "disabled"}
# Get all tools
all_tools = await self.list_tools()
return await self._quality_manager.evolve(all_tools)
def get_tool_insights(self, tool: BaseTool) -> Dict[str, Any]:
"""
Get detailed quality insights for a specific tool.
"""
if not self._quality_manager:
return {"status": "disabled"}
return self._quality_manager.get_tool_insights(tool)
def register_provider(self, provider: Provider) -> None:
self._registry.register(provider)
def get_provider(self, backend: BackendType) -> Provider:
return self._registry.get(backend)
def list_providers(self) -> Dict[BackendType, Provider]:
return self._registry.list()
@property
def recording_manager(self):
"""Get the recording manager."""
return self._recording_manager
@recording_manager.setter
def recording_manager(self, manager):
"""
Set or update the recording manager.
This allows coordinator to inject recording_manager after GroundingClient creation.
"""
self._recording_manager = manager
self._logger.info("GroundingClient: RecordingManager updated")
async def initialize_all_providers(self) -> None:
await asyncio.gather(*[provider.initialize() for provider in self._registry.list().values() if not provider.is_initialized])
async def create_session(
self,
*,
backend: BackendType,
name: str | None = None,
connection_params: Dict[str, Any] | None = None,
server: str | None = None,
**options,
) -> str:
"""
Create and initialize Session, return "session_name" (external visible)
name is auto generated when it's None: -
MCP backend needs to provide server
"""
async with self._lock:
# Check concurrent sessions limit
max_sessions = get_config_value(self._config, "max_concurrent_sessions", 100)
if len(self._sessions) >= max_sessions:
raise GroundingError(f"Reached maximum session limit: {max_sessions}")
# Session naming strategy
if server: # Only MCP will pass in server
name = name or f"{backend.value}-{server}"
else:
name = name or backend.value # Other backends have a fixed 1 session
if name in self._sessions:
# Reuse existing session
self._logger.warning("Session '%s' exists, reusing.", name)
return name
# Get Provider (initialize if first time)
provider = self._registry.get(backend)
if not provider.is_initialized:
await provider.initialize()
if backend == BackendType.MCP:
if server is None:
raise GroundingError("Must specify 'server' when creating MCP session")
# Construct SessionConfig, pass to Provider to create
connection_params = connection_params or {}
if server:
connection_params.setdefault("server", server)
# Inject recording_manager for GUI backend (for intermediate step recording)
if backend == BackendType.GUI and self._recording_manager is not None:
connection_params.setdefault("recording_manager", self._recording_manager)
sess_cfg = SessionConfig(
session_name=name, # Use external visible name
backend_type=backend,
connection_params=connection_params,
**options,
)
session_obj = await provider.create_session(sess_cfg)
# Store session and monitoring info
async with self._lock:
self._sessions[name] = session_obj
now = datetime.utcnow()
self._session_info[name] = SessionInfo(
session_name=name,
backend_type=backend,
status=SessionStatus.CONNECTED,
created_at=now,
last_activity=now,
)
if server:
self._server_session_map[(backend, server)] = name
self._logger.info("Session created: %s", name)
return name
def list_sessions(self) -> List[str]:
return list(self._sessions.keys())
async def close_session(self, name: str) -> None:
async with self._lock:
session = self._sessions.pop(name, None)
info = self._session_info.pop(name, None)
self._tool_cache.pop(name, None)
for k, v in list(self._server_session_map.items()):
if v == name:
self._server_session_map.pop(k)
if not session:
self._logger.warning("Session '%s' not found", name)
return
try:
provider = self._registry.get(info.backend_type) if info else None
if provider:
await provider.close_session(name)
else:
# Fallback: if no provider, disconnect directly
await session.disconnect()
finally:
self._logger.info("Session closed: %s", name)
async def close_all_sessions(self) -> None:
for sid in list(self._sessions.keys()):
await self.close_session(sid)
async def ensure_session(self, backend: BackendType, server: str | None = None) -> str:
sid = backend.value if server is None else f"{backend.value}-{server}"
if sid not in self._sessions:
await self.create_session(backend=backend, name=sid, server=server)
return sid
def get_session_info(self, name: str) -> SessionInfo:
"""Get session monitoring info"""
if name not in self._session_info:
raise ErrorCode.SESSION_NOT_FOUND(name)
return self._session_info[name]
def get_session(self, name: str) -> BaseSession:
"""Get session"""
if name not in self._sessions:
raise ErrorCode.SESSION_NOT_FOUND(name)
return self._sessions[name]
async def _fetch_tools(
self,
backend: BackendType,
*,
session_name: str | None = None,
use_cache: bool = False,
bind_runtime_info: bool = True,
) -> List[BaseTool]:
"""
Fetch tools from provider.
Args:
backend: Backend type
session_name:
- None: fetch all tools from all sessions of this backend
- str: fetch tools from specific session
use_cache: Whether to use cache
bind_runtime_info: Whether to bind runtime info to tool instances
"""
now = time.time()
# Auto-generate cache_scope from parameters
if session_name:
cache_scope = session_name
else:
cache_scope = f"backend-{backend.value}"
# Check cache
if use_cache:
async with self._cache_lock:
if cache_scope in self._tool_cache:
tools, ts = self._tool_cache[cache_scope]
if now - ts < self._tool_cache_ttl:
self._tool_cache.move_to_end(cache_scope)
return tools
provider = self._registry.get(backend)
if not provider.is_initialized:
await provider.initialize()
tools = await provider.list_tools(session_name=session_name)
if bind_runtime_info:
# If session_name is specified, bind all tools to that session
if session_name:
server_name = None
if backend == BackendType.MCP:
server_name = session_name.replace(f"{backend.value}-", "", 1)
for tool in tools:
tool.bind_runtime_info(
backend=backend,
session_name=session_name,
server_name=server_name,
grounding_client=self,
)
else:
# No session_name specified - get tools from all sessions
# For each backend, find the default/primary session
# For Shell/Web/GUI: use the default session (backend.value)
# For MCP: tools should already be bound by the provider
default_session_name = None
# Try to find an existing session for this backend
for sid, info in self._session_info.items():
if info.backend_type == backend:
default_session_name = sid
break
# Fallback: use backend default naming
if not default_session_name:
default_session_name = backend.value
server_name = None
if backend == BackendType.MCP and default_session_name:
server_name = default_session_name.replace(f"{backend.value}-", "", 1)
for tool in tools:
# Only bind if tool doesn't have runtime info already
# (some providers like MCP bind runtime info during list_tools)
if not tool.is_bound:
tool.bind_runtime_info(
backend=backend,
session_name=default_session_name,
server_name=server_name,
grounding_client=self,
)
elif not tool.runtime_info.grounding_client:
# Tool has runtime info but no grounding_client, add it
tool.bind_runtime_info(
backend=tool.runtime_info.backend,
session_name=tool.runtime_info.session_name,
server_name=tool.runtime_info.server_name,
grounding_client=self,
)
# Save to cache
if use_cache:
async with self._cache_lock:
self._tool_cache[cache_scope] = (tools, now)
self._tool_cache.move_to_end(cache_scope)
while len(self._tool_cache) > self._tool_cache_maxsize:
self._tool_cache.popitem(last=False)
return tools
async def list_tools(
self,
backend: BackendType | list[BackendType] | None = None,
session_name: str | None = None,
*,
use_cache: bool = False,
) -> List[BaseTool]:
"""
List tools from backend(s) or session.
1. session_name is provided → return tools from that session
2. backend is list → return tools from multiple backends
3. backend is single → return tools from that backend
4. backend is None → return tools from all backends
Args:
backend: Single backend, list of backends, or None for all
session_name: Specific session name (overrides backend parameter)
use_cache: Whether to use cache
Returns:
List of tools
"""
# Session-level
if session_name:
if session_name not in self._sessions:
raise ErrorCode.SESSION_NOT_FOUND(session_name)
backend_type = self._session_info[session_name].backend_type
return await self._fetch_tools(
backend_type,
session_name=session_name,
use_cache=use_cache,
)
# Multiple backends
if isinstance(backend, list):
tools: List[BaseTool] = []
for be in backend:
backend_tools = await self._fetch_tools(
be,
session_name=None, # Provider aggregates all sessions
use_cache=use_cache,
)
tools.extend(backend_tools)
return tools
# Single backend
if backend is not None:
return await self._fetch_tools(
backend,
session_name=None,
use_cache=use_cache,
)
# All backends
tools: List[BaseTool] = []
for backend_type in self._registry.list().keys():
backend_tools = await self._fetch_tools(
backend_type,
session_name=None,
use_cache=use_cache,
)
tools.extend(backend_tools)
return tools
async def list_backend_tools(
self,
backend: BackendType | list[BackendType] | None = None,
use_cache: bool = False
) -> list[BaseTool]:
return await self.list_tools(backend=backend, session_name=None, use_cache=use_cache)
async def list_session_tools(
self,
session_name: str,
use_cache: bool = False
) -> list[BaseTool]:
if session_name not in self._session_info:
raise ErrorCode.SESSION_NOT_FOUND(session_name)
backend = self._session_info[session_name].backend_type
return await self.list_tools(backend, session_name, use_cache)
async def list_all_backend_tools(
self,
use_cache: bool = False
) -> Dict[BackendType, list[BaseTool]]:
"""List static tools for every registered backend."""
result = {}
for backend_type in self.list_providers().keys():
tools = await self.list_backend_tools(backend=backend_type, use_cache=use_cache)
result[backend_type] = tools
return result
async def search_tools(
self,
task_description: str,
*,
backend: BackendType | list[BackendType] | None = None,
session_name: str | None = None,
max_tools: int | None = None,
search_mode: str | None = None,
use_cache: bool = True,
llm_callable = None,
enable_llm_filter: bool | None = None,
llm_filter_threshold: int | None = None,
enable_cache_persistence: bool | None = None,
cache_dir: str | None = None,
) -> list[BaseTool]:
"""
Search tools from backend(s) or session.
Args:
task_description: Task description for searching relevant tools
backend: Backend type(s) to search
session_name: Specific session to search
max_tools: Maximum number of tools to return
search_mode: Search mode ("semantic", "keyword", "hybrid")
use_cache: Whether to use cached tool list
llm_callable: LLM client for intelligent filtering
enable_llm_filter: Whether to use LLM pre-filtering
llm_filter_threshold: Threshold for applying LLM filter
enable_cache_persistence: Whether to persist embeddings to disk. If None, uses config value.
cache_dir: Directory for persistent cache. If None, uses config value or default.
"""
candidate_tools = await self.list_tools(
backend=backend,
session_name=session_name,
use_cache=use_cache,
)
if not candidate_tools:
self._logger.warning("No candidate tools found for search")
return []
# lazy initialize SearchCoordinator (or recreate if parameters changed)
if self._search_coordinator is None:
# Get quality ranking settings from config
quality_config = getattr(self._config, 'tool_quality', None)
enable_quality_ranking = getattr(quality_config, 'enable_quality_ranking', True) if quality_config else True
self._search_coordinator = SearchCoordinator(
max_tools=max_tools,
llm=llm_callable,
enable_llm_filter=enable_llm_filter,
llm_filter_threshold=llm_filter_threshold,
enable_cache_persistence=enable_cache_persistence,
cache_dir=cache_dir,
quality_manager=self._quality_manager,
enable_quality_ranking=enable_quality_ranking,
)
# execute search and sort
try:
filtered_tools = await self._search_coordinator._arun(
task_prompt=task_description,
candidate_tools=candidate_tools,
max_tools=max_tools,
mode=search_mode,
)
return filtered_tools
except Exception as exc:
self._logger.error(f"Tool search failed: {exc}")
# fallback: return top N tools
fallback_max = max_tools or self._config.tool_search.max_tools
return candidate_tools[:fallback_max]
def get_last_search_debug_info(self) -> Optional[Dict[str, Any]]:
"""Get debug info from the last tool search operation.
Returns:
Dict containing search debug info, or None if no search has been performed.
"""
if self._search_coordinator is None:
return None
return self._search_coordinator.get_last_search_debug_info()
async def get_tools_with_auto_search(
self,
*,
task_description: str | None = None,
backend: BackendType | list[BackendType] | None = None,
session_name: str | None = None,
max_tools: int | None = None,
search_mode: str | None = None,
use_cache: bool = True,
llm_callable = None,
enable_llm_filter: bool | None = None,
llm_filter_threshold: int | None = None,
enable_cache_persistence: bool | None = None,
cache_dir: str | None = None,
) -> list[BaseTool]:
"""
Intelligent tool retrieval: automatically decides whether to return all tools or trigger search.
Logic:
- If tool_count <= max_tools: return all tools directly
- If tool_count > max_tools: trigger search and return top max_tools
Args:
task_description: Task description (required for search if triggered).
If None, search will not be triggered even if tool count exceeds max_tools.
backend: Backend type(s) to query
session_name: Specific session name
max_tools: Maximum number of tools to return. Also acts as the threshold for triggering search.
- None: Use value from config (default: 30)
search_mode: Search mode ("semantic", "keyword", "hybrid")
use_cache: Whether to use cache
llm_callable: LLM client (for intelligent filtering)
enable_llm_filter: Whether to use LLM for backend/server pre-filtering.
- None: Use config default
- False: Disable LLM filter, use tool-level search only
- True: Enable LLM filter
llm_filter_threshold: Only apply LLM filter when tool count > this threshold.
- None: Use default (50)
- N: Only apply LLM filter when > N tools
enable_cache_persistence: Whether to persist embeddings to disk. If None, uses config value.
cache_dir: Directory for persistent cache. If None, uses config value or default.
Returns:
List of tools (at most max_tools)
Examples:
# Scenario 1: Auto-detect whether search is needed
tools = await gc.get_tools_with_auto_search(
task_description="Create a flowchart",
backend=BackendType.MCP
)
# Scenario 2: Custom max_tools
tools = await gc.get_tools_with_auto_search(
task_description="Edit file",
backend=BackendType.SHELL,
max_tools=30 # Return at most 30 tools
)
# Scenario 3: Disable search (return all tools regardless of count)
tools = await gc.get_tools_with_auto_search(
backend=BackendType.MCP # No task_description = no search
)
"""
# Fetch all candidate tools
all_tools = await self.list_tools(
backend=backend,
session_name=session_name,
use_cache=use_cache,
)
if not all_tools:
self._logger.warning("No tools found")
return []
# Determine max_tools from config if not provided
if max_tools is None:
max_tools = self._config.tool_search.max_tools
# Decide whether search is needed
tools_count = len(all_tools)
need_search = tools_count > max_tools and task_description is not None
if need_search:
self._logger.info(
f"Tool count ({tools_count}) > max_tools ({max_tools}), "
f"triggering search to filter relevant tools..."
)
return await self.search_tools(
task_description=task_description,
backend=backend,
session_name=session_name,
max_tools=max_tools,
search_mode=search_mode,
use_cache=use_cache,
llm_callable=llm_callable,
enable_llm_filter=enable_llm_filter,
llm_filter_threshold=llm_filter_threshold,
enable_cache_persistence=enable_cache_persistence,
cache_dir=cache_dir,
)
else:
if task_description is None:
self._logger.debug(
f"No task description provided, returning all {tools_count} tools"
)
else:
self._logger.debug(
f"Tool count ({tools_count}) ≤ max_tools ({max_tools}), "
f"returning all tools without search"
)
return all_tools
async def invoke_tool(
self,
tool: BaseTool | str,
parameters: Dict[str, Any] | None = None,
*,
backend: BackendType | None = None,
session_name: str | None = None,
server: str | None = None,
keep_session: bool = False,
**kwargs
) -> ToolResult:
"""
Universal tool invocation method.
Supports multiple calling patterns:
1. Using BaseTool instance with bound runtime info
2. Using BaseTool instance with explicit backend/session
3. Using tool name with automatic lookup
4. Using tool name with explicit backend/session/server
Args:
tool: BaseTool instance or tool name string
parameters: Tool parameters as dict
backend: Backend type (optional for BaseTool with runtime_info)
session_name: Session name (optional for BaseTool with runtime_info)
server: Server name (for MCP, optional for BaseTool with runtime_info)
keep_session: Whether to keep session alive after invocation
**kwargs: Alternative parameter passing
Returns:
ToolResult
Examples:
# Pattern 1: Tool instance with runtime info (from list_tools)
tools = await gc.list_tools()
tool = next(t for t in tools if t.name == "read_file")
result = await gc.invoke_tool(tool, {"path": "/tmp/a.txt"})
# Pattern 2: Tool instance with explicit backend/session
my_tool = MyTool()
result = await gc.invoke_tool(
my_tool,
{"arg": "value"},
backend=BackendType.SHELL
)
# Pattern 3: Tool name with automatic lookup
result = await gc.invoke_tool("read_file", {"path": "/tmp/a.txt"})
# Pattern 4: Tool name with explicit backend/server
result = await gc.invoke_tool(
"read_file",
{"path": "/tmp/a.txt"},
backend=BackendType.MCP,
server="filesystem"
)
"""
params = parameters or kwargs
# BaseTool instance
if isinstance(tool, BaseTool):
tool_name = tool.schema.name
# Try to use bound runtime info first
if tool.is_bound and not (backend or session_name or server):
# Use runtime info
runtime_backend = tool.runtime_info.backend
runtime_session = tool.runtime_info.session_name
runtime_server = tool.runtime_info.server_name
else:
# Use provided or tool's default backend
runtime_backend = backend or tool.backend_type
runtime_session = session_name
runtime_server = server
if runtime_backend == BackendType.NOT_SET:
raise GroundingError(
f"Cannot invoke tool '{tool_name}': no backend specified. "
f"Either bind runtime info or provide backend parameter.",
code=ErrorCode.TOOL_EXECUTION_FAIL
)
# Tool name string
elif isinstance(tool, str):
tool_name = tool
# If explicit backend/session provided, use them
if backend or session_name:
runtime_session = session_name
runtime_server = server
# Infer backend: prefer explicit backend; otherwise get from session
if backend is not None:
runtime_backend = backend
else:
if runtime_session not in self._session_info:
raise ErrorCode.SESSION_NOT_FOUND(runtime_session)
runtime_backend = self._session_info[
runtime_session
].backend_type
else:
# Auto-lookup: search for the tool
all_tools = await self.list_tools(use_cache=True)
matching = [t for t in all_tools if t.name == tool_name]
if not matching:
raise GroundingError(
f"Tool '{tool_name}' not found",
code=ErrorCode.TOOL_NOT_FOUND
)
if len(matching) > 1:
sources = [
f"{t.runtime_info.backend.value}/{t.runtime_info.session_name}"
for t in matching if t.is_bound
]
raise GroundingError(
f"Multiple tools named '{tool_name}' found in: {sources}. "
f"Please specify 'backend' or 'session_name' parameter.",
code=ErrorCode.AMBIGUOUS_TOOL
)
# Use the found tool's runtime info
found_tool = matching[0]
runtime_backend = found_tool.runtime_info.backend
runtime_session = found_tool.runtime_info.session_name
runtime_server = found_tool.runtime_info.server_name
# Execute the tool
# Ensure session exists (except for SYSTEM backend which doesn't use sessions)
# Check if session really exists - cached tools have session_name but session may not be running
if runtime_backend != BackendType.SYSTEM:
if not runtime_session or runtime_session not in self._sessions:
runtime_session = await self.ensure_session(runtime_backend, runtime_server)
try:
provider = self._registry.get(runtime_backend)
# SystemProvider doesn't use sessions, pass a dummy value
session_param = runtime_session if runtime_session else "system"
result = await provider.call_tool(session_param, tool_name, params)
# Update last_activity in session_info (skip for SYSTEM backend)
if runtime_backend != BackendType.SYSTEM and runtime_session and runtime_session in self._session_info:
async with self._lock:
old_info = self._session_info[runtime_session]
self._session_info[runtime_session] = old_info.model_copy(
update={"last_activity": datetime.utcnow()}
)
return result
finally:
# Auto-close session if requested (skip for SYSTEM backend)
if runtime_backend != BackendType.SYSTEM and not keep_session and runtime_session:
if runtime_server or runtime_session.startswith(runtime_backend.value):
await self.close_session(runtime_session)
================================================
FILE: anytool/grounding/core/provider.py
================================================
"""
provider is to manage sessions of a backend, if the backend is mcp, then provider will manage sessions through servers
"""
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional, Generic, TypeVar
from .tool import BaseTool
from .types import BackendType, SessionConfig, ToolResult, ToolStatus
from .session import BaseSession
from .security.policies import SecurityPolicyManager
from anytool.config import get_config
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
TSession = TypeVar('TSession', bound=BaseSession)
class Provider(ABC, Generic[TSession]):
"""Backend provider base class"""
def __init__(self, backend_type: BackendType, config: Dict[str, Any] = None):
self.backend_type = backend_type
self.config = config or {}
self.is_initialized = False
self._sessions: Dict[str, TSession] = {} # session management
self._session_counter: int = 0
self.security_manager = SecurityPolicyManager()
self._setup_security_policy(config)
def _setup_security_policy(self, config: dict | None = None):
security_policy = get_config().get_security_policy(self.backend_type.value)
self.security_manager.set_backend_policy(BackendType.SHELL, security_policy)
async def ensure_initialized(self) -> None:
"""
Internal helper. Guarantee that `initialize()` has been executed
"""
if not self.is_initialized:
await self.initialize()
@abstractmethod
async def initialize(self) -> None:
"""Initialize provider, call `create_session` to create all sessions if not exist
Subclasses should set `self.is_initialized = True` after successful initialization
"""
pass
@abstractmethod
async def create_session(self, session_config: SessionConfig) -> TSession:
"""Create session, update _sessions"""
pass
@abstractmethod
async def close_session(self, session_name: str) -> None:
"""Close session"""
pass
def list_sessions(self) -> List[str]:
"""Get all session IDs"""
return list(self._sessions.keys())
def get_session(self, session_name: str) -> Optional[TSession]:
"""Get session object by ID"""
return self._sessions.get(session_name)
async def close_all_sessions(self) -> None:
"""Provider shutdown cleanup"""
for session_name in list(self._sessions.keys()):
try:
await self.close_session(session_name)
except Exception as e:
print(f"Error closing session {session_name}: {e}")
self._sessions.clear()
self.is_initialized = False
def __repr__(self) -> str:
return (f"Provider(backend={self.backend_type.value}, "
f"initialized={self.is_initialized}, "
f"sessions={len(self._sessions)}, "
f"config_items={len(self.config)})")
async def list_tools(self, session_name: Optional[str] = None) -> List[BaseTool]:
"""
Return BaseTool list.
If session_name is specified, only return the tools of the specified session.
If session_name is not specified, return all tools of all sessions.
"""
await self.ensure_initialized()
if session_name:
session = self._sessions.get(session_name)
return await session.list_tools() if session else []
tools: list[BaseTool] = []
for sess in self._sessions.values():
tools.extend(await sess.list_tools())
return tools
async def call_tool(
self,
session_name: str,
tool_name: str,
parameters: Dict[str, Any] | None = None,
) -> ToolResult:
await self.ensure_initialized()
parameters = parameters or {}
session = self._sessions.get(session_name)
if session is None:
return ToolResult(
status=ToolStatus.ERROR,
content="",
error=f"Session '{session_name}' not found",
metadata={"session_name": session_name, "tool_name": tool_name},
)
try:
return await session.call_tool(tool_name, parameters)
except Exception as e:
logger.error("Execute tool error: %s @%s - %s", tool_name, session_name, e)
return ToolResult(
status=ToolStatus.ERROR,
content="",
error=str(e),
metadata={"session_name": session_name, "tool_name": tool_name},
)
class ProviderRegistry:
"""
Maintain mapping of BackendType -> Provider, and provide dynamic registration / retrieval capabilities
"""
def __init__(self) -> None:
self._providers: dict[BackendType, Provider] = {}
def register(self, provider: "Provider") -> None:
self._providers[provider.backend_type] = provider
logger.debug("Provider for %s registered", provider.backend_type)
def get(self, backend: BackendType) -> "Provider":
if backend not in self._providers:
raise KeyError(f"Provider for '{backend.value}' not registered")
return self._providers[backend]
def list(self) -> dict[BackendType, "Provider"]:
return dict(self._providers)
================================================
FILE: anytool/grounding/core/quality/__init__.py
================================================
from .types import ToolQualityRecord, ExecutionRecord, DescriptionQuality
from .manager import ToolQualityManager
from .store import QualityStore
# Global manager instance
_global_manager: "ToolQualityManager | None" = None
def get_quality_manager() -> "ToolQualityManager | None":
"""Get the global quality manager instance."""
return _global_manager
def set_quality_manager(manager: "ToolQualityManager") -> None:
"""Set the global quality manager instance."""
global _global_manager
_global_manager = manager
__all__ = [
"ToolQualityRecord",
"ExecutionRecord",
"DescriptionQuality",
"ToolQualityManager",
"QualityStore",
"get_quality_manager",
"set_quality_manager",
]
================================================
FILE: anytool/grounding/core/quality/manager.py
================================================
"""
Tool Quality Manager
Core API (called by main flow):
- record_execution(): Called by BaseTool after execution
- adjust_ranking(): Called by SearchCoordinator for quality-aware sorting
- evolve(): Called periodically by ToolLayer for self-evolution
Query API (for inspection/debugging):
- get_quality_report(), get_tool_insights()
"""
import hashlib
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple, TYPE_CHECKING
from .types import ToolQualityRecord, ExecutionRecord, DescriptionQuality
from .store import QualityStore
from anytool.utils.logging import Logger
from anytool.config.constants import PROJECT_ROOT
if TYPE_CHECKING:
from anytool.grounding.core.tool import BaseTool
from anytool.grounding.core.types import ToolResult
from anytool.llm import LLMClient
logger = Logger.get_logger(__name__)
class ToolQualityManager:
"""
Manages tool quality tracking and quality-aware ranking.
Features:
- Track execution success rate and latency
- LLM-based description quality evaluation (optional, requires llm_client)
- Persistent memory across sessions
- Quality-integrated tool ranking
- Incremental update detection
"""
def __init__(
self,
*,
cache_dir: Optional[Path] = None,
llm_client: Optional["LLMClient"] = None,
enable_persistence: bool = True,
auto_save: bool = True,
evolve_interval: int = 5,
):
self._cache_dir = cache_dir or PROJECT_ROOT / ".anytool" / "tool_quality"
self._llm_client = llm_client
self._enable_persistence = enable_persistence
self._auto_save = auto_save
self._evolve_interval = evolve_interval
# In-memory cache
self._records: Dict[str, ToolQualityRecord] = {}
self._global_execution_count: int = 0
self._last_evolve_count: int = 0
# Persistent store
self._store = QualityStore(self._cache_dir) if enable_persistence else None
# Load from disk
if self._store:
self._records, self._global_execution_count = self._store.load_all()
self._last_evolve_count = (self._global_execution_count // self._evolve_interval) * self._evolve_interval
logger.info(
f"ToolQualityManager initialized "
f"(persistence={enable_persistence}, records={len(self._records)}, "
f"global_count={self._global_execution_count}, evolve_interval={self._evolve_interval})"
)
def get_tool_key(self, tool: "BaseTool") -> str:
"""Generate unique key for a tool."""
from anytool.grounding.core.types import BackendType
if tool.is_bound:
backend = tool.runtime_info.backend.value
server = tool.runtime_info.server_name or "default"
else:
backend = tool.backend_type.value if tool.backend_type != BackendType.NOT_SET else "unknown"
server = "default"
return f"{backend}:{server}:{tool.name}"
def _compute_description_hash(self, tool: "BaseTool") -> str:
"""Compute hash of tool description for change detection."""
content = f"{tool.name}|{tool.description or ''}|{tool.schema.parameters}"
return hashlib.md5(content.encode()).hexdigest()[:16]
def get_record(self, tool: "BaseTool") -> ToolQualityRecord:
"""Get or create quality record for a tool."""
key = self.get_tool_key(tool)
if key not in self._records:
backend, server, name = key.split(":", 2)
self._records[key] = ToolQualityRecord(
tool_key=key,
backend=backend,
server=server,
tool_name=name,
description_hash=self._compute_description_hash(tool),
)
return self._records[key]
def get_quality_score(self, tool: "BaseTool") -> float:
"""Get quality score for a tool (0-1)."""
return self.get_record(tool).quality_score
# Execution Tracking
async def record_execution(
self,
tool: "BaseTool",
result: "ToolResult",
execution_time_ms: float,
) -> None:
"""Record tool execution result and increment global counter."""
record = self.get_record(tool)
# Extract error message if failed
error_message = None
if result.is_error and result.error:
error_message = str(result.error)[:500]
# Add execution record
record.add_execution(ExecutionRecord(
timestamp=datetime.now(),
success=result.is_success,
execution_time_ms=execution_time_ms,
error_message=error_message,
))
# Increment global execution count
self._global_execution_count += 1
# Auto-save
if self._auto_save and self._store:
await self._store.save_record(record, self._records, self._global_execution_count)
logger.debug(
f"Recorded execution: {record.tool_key} "
f"success={result.is_success} time={execution_time_ms:.0f}ms "
f"(global_count={self._global_execution_count})"
)
async def evaluate_description(
self,
tool: "BaseTool",
force: bool = False,
) -> Optional[DescriptionQuality]:
"""
Evaluate tool description quality using LLM.
"""
if not self._llm_client:
logger.debug("LLM client not available for description evaluation")
return None
record = self.get_record(tool)
# Skip if already evaluated and not forced
if record.description_quality and not force:
# Check if description changed
current_hash = self._compute_description_hash(tool)
if current_hash == record.description_hash:
return record.description_quality
# Build evaluation prompt
desc = tool.description or "No description provided"
if len(desc) > 4000:
desc = desc[:4000] + "\n... (truncated for length)"
params = tool.schema.parameters or {}
if params:
param_lines = []
# Extract parameter names and types from JSON schema
if "properties" in params:
for param_name, param_info in params.get("properties", {}).items():
param_type = param_info.get("type", "unknown")
param_desc = param_info.get("description", "")
param_lines.append(f"- {param_name} ({param_type}): {param_desc}" if param_desc else f"- {param_name} ({param_type})")
param_text = "\n".join(param_lines) if param_lines else "No parameter descriptions available"
else:
param_text = "No parameters"
prompt = f"""# Task: Evaluate this tool's documentation quality
## Tool Information
Name: {tool.name}
Description:
{desc}
Parameters:
{param_text}
## Evaluation Task
Rate the documentation on two dimensions (0.0 to 1.0 scale):
### 1. Clarity
How clear is the tool's purpose and usage?
- 0.0-0.3: No description or completely unclear
- 0.4-0.6: Basic purpose but vague
- 0.7-0.8: Clear purpose and functionality
- 0.9-1.0: Very clear with usage examples or context
### 2. Completeness
Are inputs/outputs properly documented?
- 0.0-0.3: Missing critical information
- 0.4-0.6: Basic info but lacks details
- 0.7-0.8: Well documented with types
- 0.9-1.0: Comprehensive with constraints and examples
## Scoring Guidelines
- Short descriptions can score high if clear and accurate
- If parameters exist but aren't explained in description, reduce completeness score
- Missing description means clarity = 0.0
## Output
Respond with JSON only:
```json
{{
"reasoning": "Brief 1-2 sentence analysis",
"clarity": 0.8,
"completeness": 0.7
}}
```"""
try:
response = await self._llm_client.complete(prompt)
content = response["message"]["content"]
# Parse JSON response
import json
# Extract complete JSON object
def extract_json_object(text: str) -> str | None:
"""Extract first complete JSON object from text by counting braces."""
start = text.find('{')
if start == -1:
return None
count = 0
in_string = False
escape_next = False
for i, char in enumerate(text[start:], start):
if escape_next:
escape_next = False
continue
if char == '\\':
escape_next = True
continue
if char == '"' and not escape_next:
in_string = not in_string
continue
if not in_string:
if char == '{':
count += 1
elif char == '}':
count -= 1
if count == 0:
return text[start:i+1]
return None
json_str = extract_json_object(content)
if not json_str:
logger.warning(f"Could not find JSON in LLM response for {tool.name}")
return None
data = json.loads(json_str)
# Extract and validate scores with robust error handling
def safe_float(value, default=0.5, min_val=0.0, max_val=1.0):
"""Safely convert to float and clamp to valid range."""
try:
if value is None:
return default
f = float(value)
return max(min_val, min(max_val, f))
except (ValueError, TypeError):
logger.warning(f"Invalid score value: {value}, using default {default}")
return default
clarity = safe_float(data.get("clarity"), default=0.5)
completeness = safe_float(data.get("completeness"), default=0.5)
reasoning = str(data.get("reasoning", ""))[:500] # Limit reasoning length
quality = DescriptionQuality(
clarity=clarity,
completeness=completeness,
evaluated_at=datetime.now(),
reasoning=reasoning,
)
# Update record
record.description_quality = quality
record.description_hash = self._compute_description_hash(tool)
record.last_updated = datetime.now()
# Save
if self._auto_save and self._store:
await self._store.save_record(record, self._records, self._global_execution_count)
logger.info(f"Evaluated description: {tool.name} score={quality.overall_score:.2f}")
return quality
except Exception as e:
logger.error(f"Description evaluation failed for {tool.name}: {e}")
return None
# Quality-Aware Ranking
def adjust_ranking(
self,
tools_with_scores: List[Tuple["BaseTool", float]],
) -> List[Tuple["BaseTool", float]]:
"""
Adjust tool ranking using penalty-based approach.
Args:
tools_with_scores: List of (tool, semantic_score) tuples
"""
adjusted = []
for tool, semantic_score in tools_with_scores:
penalty = self.get_penalty(tool)
adjusted_score = semantic_score * penalty
adjusted.append((tool, adjusted_score))
# Sort by adjusted score (descending)
adjusted.sort(key=lambda x: x[1], reverse=True)
return adjusted
def get_penalty(self, tool: "BaseTool") -> float:
"""Get penalty factor for a tool (0.2-1.0)."""
return self.get_record(tool).penalty
# Change Detection
def check_changes(self, tools: List["BaseTool"]) -> Dict[str, str]:
"""
Check for tool changes (new/updated/unchanged).
Returns dict: {tool_key: "new"|"updated"|"unchanged"}
"""
changes = {}
for tool in tools:
key = self.get_tool_key(tool)
current_hash = self._compute_description_hash(tool)
if key not in self._records:
changes[key] = "new"
elif self._records[key].description_hash != current_hash:
changes[key] = "updated"
# Clear old evaluation on description change
self._records[key].description_quality = None
self._records[key].description_hash = current_hash
else:
changes[key] = "unchanged"
new_count = sum(1 for v in changes.values() if v == "new")
updated_count = sum(1 for v in changes.values() if v == "updated")
if new_count or updated_count:
logger.info(f"Tool changes: {new_count} new, {updated_count} updated")
return changes
async def save(self) -> None:
"""
Manually save all records to disk.
Note: Usually not needed - auto_save handles persistence in
record_execution(), evaluate_description(), and evolve().
Provided as public API for explicit save when needed.
"""
if self._store:
await self._store.save_all(self._records)
def clear_cache(self) -> None:
"""Clear all cached data."""
self._records.clear()
if self._store:
self._store.clear()
def get_stats(self) -> Dict:
"""
Get quality tracking statistics.
Note: Query API for inspection, may not be called in main flow.
"""
if not self._records:
return {"total_tools": 0}
records = list(self._records.values())
return {
"total_tools": len(records),
"total_executions": sum(r.total_calls for r in records),
"avg_success_rate": (
sum(r.success_rate for r in records) / len(records)
if records else 0
),
"avg_quality_score": (
sum(r.quality_score for r in records) / len(records)
if records else 0
),
"tools_with_description_eval": sum(
1 for r in records if r.description_quality
),
}
def get_top_tools(
self,
n: int = 10,
backend: Optional[str] = None,
min_calls: int = 3,
) -> List[ToolQualityRecord]:
"""
Get top N tools by quality score.
Args:
n: Number of tools to return
backend: Filter by backend type (optional)
min_calls: Minimum calls required (to filter untested tools)
"""
records = [
r for r in self._records.values()
if r.total_calls >= min_calls
and (backend is None or r.backend == backend)
]
records.sort(key=lambda r: r.quality_score, reverse=True)
return records[:n]
def get_problematic_tools(
self,
success_rate_threshold: float = 0.5,
min_calls: int = 5,
) -> List[ToolQualityRecord]:
"""
Get tools with low success rate (candidates for review/removal).
Args:
success_rate_threshold: Tools below this rate are flagged
min_calls: Minimum calls required (avoid flagging new tools)
"""
return [
r for r in self._records.values()
if r.total_calls >= min_calls
and r.recent_success_rate < success_rate_threshold
]
def get_quality_report(self) -> Dict:
"""
Generate comprehensive quality report for upper layer.
Returns structured report with:
- Overall stats
- Per-backend breakdown
- Top/problematic tools
- Improvement suggestions
"""
if not self._records:
return {"status": "no_data", "message": "No quality data collected yet"}
records = list(self._records.values())
tested_records = [r for r in records if r.total_calls >= 3]
# Per-backend stats
backends = {}
for r in records:
if r.backend not in backends:
backends[r.backend] = {
"tools": 0,
"total_calls": 0,
"success_count": 0,
"servers": set()
}
backends[r.backend]["tools"] += 1
backends[r.backend]["total_calls"] += r.total_calls
backends[r.backend]["success_count"] += r.success_count
backends[r.backend]["servers"].add(r.server)
# Convert sets to counts
for b in backends:
backends[b]["servers"] = len(backends[b]["servers"])
backends[b]["success_rate"] = (
backends[b]["success_count"] / backends[b]["total_calls"]
if backends[b]["total_calls"] > 0 else 0
)
# Top and problematic tools
top_tools = self.get_top_tools(5)
problematic = self.get_problematic_tools()
return {
"summary": {
"total_tools": len(records),
"tested_tools": len(tested_records),
"total_executions": sum(r.total_calls for r in records),
"overall_success_rate": (
sum(r.success_count for r in records) /
max(1, sum(r.total_calls for r in records))
),
"avg_quality_score": (
sum(r.quality_score for r in tested_records) / len(tested_records)
if tested_records else 0
),
},
"by_backend": backends,
"top_tools": [
{"key": r.tool_key, "score": r.quality_score, "success_rate": r.success_rate}
for r in top_tools
],
"problematic_tools": [
{"key": r.tool_key, "success_rate": r.success_rate, "calls": r.total_calls}
for r in problematic
],
"recommendations": self._generate_recommendations(records, problematic),
}
def _generate_recommendations(
self,
records: List[ToolQualityRecord],
problematic: List[ToolQualityRecord],
) -> List[str]:
"""Generate actionable recommendations based on quality data."""
recommendations = []
# Check for problematic tools
if problematic:
tool_names = [r.tool_name for r in problematic[:3]]
recommendations.append(
f"Review low-success tools: {', '.join(tool_names)}"
)
# Check for tools needing description evaluation
unevaluated = [r for r in records if not r.description_quality and r.total_calls >= 3]
if unevaluated:
recommendations.append(
f"{len(unevaluated)} tools need description quality evaluation"
)
# Check for low description quality
poor_docs = [
r for r in records
if r.description_quality and r.description_quality.overall_score < 0.5
]
if poor_docs:
recommendations.append(
f"{len(poor_docs)} tools have poor documentation quality"
)
return recommendations
def compute_adaptive_quality_weight(self) -> float:
"""
Compute adaptive quality weight based on data confidence.
Returns higher weight when we have more reliable quality data,
lower weight when data is sparse.
"""
if not self._records:
return 0.1 # Low weight when no data
records = list(self._records.values())
tested_count = sum(1 for r in records if r.total_calls >= 3)
if tested_count == 0:
return 0.1
# More tested tools -> higher confidence -> higher weight
coverage = tested_count / len(records)
# Average calls per tested tool -> data richness
avg_calls = sum(r.total_calls for r in records) / len(records)
richness = min(1.0, avg_calls / 20) # Cap at 20 calls average
# Combine coverage and richness
confidence = (coverage * 0.5 + richness * 0.5)
# Map to weight range [0.1, 0.5]
weight = 0.1 + confidence * 0.4
return round(weight, 2)
def should_reevaluate_description(self, tool: "BaseTool") -> bool:
"""
Check if a tool's description should be re-evaluated.
Triggers re-evaluation when:
- Description hash changed
- Success rate dropped significantly
- No evaluation yet but enough calls
"""
record = self._records.get(self.get_tool_key(tool))
if not record:
return True
# Check hash change
current_hash = self._compute_description_hash(tool)
if current_hash != record.description_hash:
return True
# No evaluation yet but enough data
if not record.description_quality and record.total_calls >= 5:
return True
# Success rate dropped significantly (maybe description is misleading)
if record.description_quality and record.total_calls >= 10:
if record.recent_success_rate < 0.5 and record.description_quality.overall_score > 0.7:
# High doc quality but low success -> mismatch
return True
return False
async def evolve(self, tools: List["BaseTool"]) -> Dict:
"""
Run self-evolution cycle on given tools.
This method:
1. Detects tool changes
2. Re-evaluates descriptions where needed
3. Updates quality weights
4. Returns evolution report
"""
report = {
"changes_detected": {},
"descriptions_evaluated": 0,
"adaptive_weight": 0.0,
"recommendations": [],
}
# 1. Detect changes
report["changes_detected"] = self.check_changes(tools)
# 2. Find tools needing re-evaluation
needs_eval = [t for t in tools if self.should_reevaluate_description(t)]
# 3. Evaluate descriptions (limit to avoid too many LLM calls)
if needs_eval and self._llm_client:
for tool in needs_eval[:5]: # Max 5 per cycle
result = await self.evaluate_description(tool, force=True)
if result:
report["descriptions_evaluated"] += 1
# 4. Compute adaptive weight
report["adaptive_weight"] = self.compute_adaptive_quality_weight()
# 5. Generate recommendations
problematic = self.get_problematic_tools()
report["recommendations"] = self._generate_recommendations(
list(self._records.values()), problematic
)
# 6. Update last evolve count
self._last_evolve_count = self._global_execution_count
# Save
if self._store:
await self._store.save_all(self._records, self._global_execution_count)
logger.info(
f"Evolution cycle complete: "
f"changes={len([v for v in report['changes_detected'].values() if v != 'unchanged'])}, "
f"evaluated={report['descriptions_evaluated']}, "
f"weight={report['adaptive_weight']}, "
f"global_count={self._global_execution_count}"
)
return report
def should_evolve(self) -> bool:
"""Check if evolution should be triggered based on global execution count."""
return self._global_execution_count >= self._last_evolve_count + self._evolve_interval
def get_tool_insights(self, tool: "BaseTool") -> Dict:
"""
Get detailed insights for a specific tool (for debugging/analysis).
Returns comprehensive info about tool's quality history.
"""
record = self._records.get(self.get_tool_key(tool))
if not record:
return {"status": "not_tracked", "tool": tool.name}
# Count recent failures
recent_failures_count = sum(
1 for e in record.recent_executions[-20:]
if not e.success
)
return {
"tool_key": record.tool_key,
"total_calls": record.total_calls,
"success_rate": record.success_rate,
"recent_success_rate": record.recent_success_rate,
"avg_execution_time_ms": record.avg_execution_time_ms,
"quality_score": record.quality_score,
"description_quality": {
"overall_score": record.description_quality.overall_score,
"clarity": record.description_quality.clarity,
"completeness": record.description_quality.completeness,
"reasoning": record.description_quality.reasoning,
} if record.description_quality else None,
"recent_failures_count": recent_failures_count,
"first_seen": record.first_seen.isoformat(),
"last_updated": record.last_updated.isoformat(),
}
================================================
FILE: anytool/grounding/core/quality/store.py
================================================
"""
Persistent storage for tool quality data.
"""
import json
import asyncio
from pathlib import Path
from typing import Dict, List, Optional
from .types import ToolQualityRecord
from anytool.utils.logging import Logger
from anytool.config.constants import PROJECT_ROOT
logger = Logger.get_logger(__name__)
class QualityStore:
"""
Persistent storage for tool quality records.
Storage structure:
/.anytool/tool_quality/
├── records.json # All quality records
└── records_backup.json # Backup on save
"""
VERSION = 1
def __init__(self, cache_dir: Optional[Path] = None):
if cache_dir is None:
cache_dir = PROJECT_ROOT / ".anytool" / "tool_quality"
self._cache_dir = Path(cache_dir)
self._cache_dir.mkdir(parents=True, exist_ok=True)
self._records_file = self._cache_dir / "records.json"
self._backup_file = self._cache_dir / "records_backup.json"
self._write_lock = asyncio.Lock()
logger.debug(f"QualityStore initialized at {self._cache_dir}")
def load_all(self) -> tuple[Dict[str, ToolQualityRecord], int]:
"""Load all quality records and global execution count from disk.
Returns:
Tuple of (records_dict, global_execution_count)
"""
if not self._records_file.exists():
return {}, 0
try:
with open(self._records_file, "r", encoding="utf-8") as f:
data = json.load(f)
# Version check
if data.get("version") != self.VERSION:
logger.warning(f"Cache version mismatch, clearing cache")
return {}, 0
records = {}
for key, record_data in data.get("records", {}).items():
try:
records[key] = ToolQualityRecord.from_dict(record_data)
except Exception as e:
logger.warning(f"Failed to load record {key}: {e}")
global_count = data.get("global_execution_count", 0)
logger.info(f"Loaded {len(records)} quality records from cache (global_count={global_count})")
return records, global_count
except Exception as e:
logger.error(f"Failed to load quality cache: {e}")
return {}, 0
async def save_all(self, records: Dict[str, ToolQualityRecord], global_execution_count: int = 0) -> None:
"""Save all quality records and global execution count to disk."""
async with self._write_lock:
try:
# Backup existing file
if self._records_file.exists():
import shutil
shutil.copy(self._records_file, self._backup_file)
data = {
"version": self.VERSION,
"global_execution_count": global_execution_count,
"records": {
key: record.to_dict()
for key, record in records.items()
}
}
with open(self._records_file, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2, ensure_ascii=False)
logger.debug(f"Saved {len(records)} quality records to cache (global_count={global_execution_count})")
except Exception as e:
logger.error(f"Failed to save quality cache: {e}")
async def save_record(self, record: ToolQualityRecord, all_records: Dict[str, ToolQualityRecord], global_execution_count: int = 0) -> None:
"""Save a single record (saves all for simplicity)."""
all_records[record.tool_key] = record
await self.save_all(all_records, global_execution_count)
def clear(self) -> None:
"""Clear all cached data."""
if self._records_file.exists():
self._records_file.unlink()
if self._backup_file.exists():
self._backup_file.unlink()
logger.info("Quality cache cleared")
================================================
FILE: anytool/grounding/core/quality/types.py
================================================
"""
Data types for tool quality tracking.
"""
from dataclasses import dataclass, field
from datetime import datetime
from typing import ClassVar, Dict, List, Optional, Any
@dataclass
class ExecutionRecord:
"""Single execution record."""
timestamp: datetime
success: bool
execution_time_ms: float
error_message: Optional[str] = None
@dataclass
class DescriptionQuality:
"""LLM-evaluated description quality."""
clarity: float # 0-1: Is the purpose and usage clear?
completeness: float # 0-1: Are inputs/outputs documented?
evaluated_at: datetime
reasoning: str = "" # LLM's reasoning for the scores
@property
def overall_score(self) -> float:
"""Computed overall score (average of all dimensions)."""
return (self.clarity + self.completeness) / 2
@dataclass
class ToolQualityRecord:
"""
Complete quality record for a tool.
Key: "{backend}:{server}:{tool_name}"
"""
tool_key: str
backend: str
server: str
tool_name: str
# Execution stats
total_calls: int = 0
success_count: int = 0
total_execution_time_ms: float = 0.0
# Recent execution history (rolling window)
recent_executions: List[ExecutionRecord] = field(default_factory=list)
# Description quality (LLM-evaluated)
description_quality: Optional[DescriptionQuality] = None
# Metadata
description_hash: Optional[str] = None
first_seen: datetime = field(default_factory=datetime.now)
last_updated: datetime = field(default_factory=datetime.now)
# Keep only recent N executions
MAX_RECENT_EXECUTIONS: ClassVar[int] = 100
# Penalty threshold: only penalize tools with success rate below this value
# Tools with success rate >= this threshold get penalty = 1.0 (no penalty)
PENALTY_THRESHOLD: ClassVar[float] = 0.4
@property
def success_rate(self) -> float:
"""Overall success rate."""
if self.total_calls == 0:
return 0.0
return self.success_count / self.total_calls
@property
def avg_execution_time_ms(self) -> float:
"""Average execution time."""
if self.total_calls == 0:
return 0.0
return self.total_execution_time_ms / self.total_calls
@property
def recent_success_rate(self) -> float:
"""Success rate from recent executions."""
if not self.recent_executions:
return self.success_rate
successes = sum(1 for e in self.recent_executions if e.success)
return successes / len(self.recent_executions)
@property
def consecutive_failures(self) -> int:
"""Count consecutive failures from the most recent execution."""
count = 0
for exec_record in reversed(self.recent_executions):
if not exec_record.success:
count += 1
else:
break
return count
@property
def penalty(self) -> float:
"""
Compute penalty factor based on failure rate.
Design principles:
- Only penalize tools with success rate < PENALTY_THRESHOLD (default 40%)
- New tools (< 3 calls) get no penalty to allow fair evaluation
Returns value between 0.2-1.0:
- 1.0: No penalty (success rate >= threshold or insufficient data)
- 0.2: Maximum penalty (consistently failing tool)
"""
if self.total_calls < 3:
return 1.0
success_rate = self.recent_success_rate
threshold = self.PENALTY_THRESHOLD
if success_rate >= threshold:
return 1.0
# Linear mapping: penalty = 0.3 + (success_rate / threshold) * 0.7
base_penalty = 0.3 + (success_rate / threshold) * 0.7
# Extra penalty for consecutive failures (indicates systematic issues)
consec = self.consecutive_failures
if consec >= 3:
# 3 consecutive → extra 0.1, 5 consecutive → extra 0.3
extra_penalty = min(0.3, (consec - 2) * 0.1)
base_penalty -= extra_penalty
# Clamp to [0.2, 1.0]
return max(0.2, min(1.0, base_penalty))
@property
def quality_score(self) -> float:
"""
Legacy quality score for backward compatibility.
Now delegates to penalty property.
"""
return self.penalty
def add_execution(self, record: ExecutionRecord) -> None:
"""Add execution record and update stats."""
self.total_calls += 1
self.total_execution_time_ms += record.execution_time_ms
if record.success:
self.success_count += 1
self.recent_executions.append(record)
# Trim to max size
if len(self.recent_executions) > self.MAX_RECENT_EXECUTIONS:
self.recent_executions = self.recent_executions[-self.MAX_RECENT_EXECUTIONS:]
self.last_updated = datetime.now()
def to_dict(self) -> Dict[str, Any]:
"""Serialize to dict for persistence."""
return {
"tool_key": self.tool_key,
"backend": self.backend,
"server": self.server,
"tool_name": self.tool_name,
"total_calls": self.total_calls,
"success_count": self.success_count,
"total_execution_time_ms": self.total_execution_time_ms,
"recent_executions": [
{
"timestamp": e.timestamp.isoformat(),
"success": e.success,
"execution_time_ms": e.execution_time_ms,
"error_message": e.error_message,
}
for e in self.recent_executions
],
"description_quality": {
"clarity": self.description_quality.clarity,
"completeness": self.description_quality.completeness,
"evaluated_at": self.description_quality.evaluated_at.isoformat(),
"reasoning": self.description_quality.reasoning,
} if self.description_quality else None,
"description_hash": self.description_hash,
"first_seen": self.first_seen.isoformat(),
"last_updated": self.last_updated.isoformat(),
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "ToolQualityRecord":
"""Deserialize from dict."""
record = cls(
tool_key=data["tool_key"],
backend=data["backend"],
server=data["server"],
tool_name=data["tool_name"],
total_calls=data.get("total_calls", 0),
success_count=data.get("success_count", 0),
total_execution_time_ms=data.get("total_execution_time_ms", 0.0),
description_hash=data.get("description_hash"),
first_seen=datetime.fromisoformat(data["first_seen"]),
last_updated=datetime.fromisoformat(data["last_updated"]),
)
# Parse recent executions
for e in data.get("recent_executions", []):
record.recent_executions.append(ExecutionRecord(
timestamp=datetime.fromisoformat(e["timestamp"]),
success=e["success"],
execution_time_ms=e["execution_time_ms"],
error_message=e.get("error_message"),
))
# Parse description quality
dq = data.get("description_quality")
if dq:
record.description_quality = DescriptionQuality(
clarity=dq.get("clarity", 0.5), # Fallback for old data
completeness=dq.get("completeness", 0.5),
evaluated_at=datetime.fromisoformat(dq["evaluated_at"]),
reasoning=dq.get("reasoning", ""), # Optional field
)
return record
================================================
FILE: anytool/grounding/core/search_tools.py
================================================
from anytool.grounding.core.tool.base import BaseTool
import re
import os
import numpy as np
import httpx
from typing import Iterable, List, Tuple, Dict, Optional, Any, TYPE_CHECKING
from enum import Enum
import json
import pickle
from pathlib import Path
from datetime import datetime
from .tool import BaseTool
from .types import BackendType
from anytool.llm import LLMClient
from anytool.utils.logging import Logger
from anytool.config.constants import PROJECT_ROOT
if TYPE_CHECKING:
from .quality import ToolQualityManager
logger = Logger.get_logger(__name__)
class SearchMode(str, Enum):
SEMANTIC = "semantic"
KEYWORD = "keyword"
HYBRID = "hybrid"
class ToolRanker:
"""
ToolRanker: rank tools by keyword, semantic or hybrid
"""
# Cache version for persistent storage - increment when cache format changes
CACHE_VERSION = 1
def __init__(
self,
model_name: Optional[str] = None,
cache_dir: Optional[str | Path] = None,
enable_cache_persistence: bool = False
):
"""Initialize ToolRanker.
Args:
model_name: Embedding model name. If None, will use env or config value.
cache_dir: Directory to store persistent embedding cache.
enable_cache_persistence: Whether to persist embeddings to disk.
"""
# Check for remote API config from environment
self._api_base_url = os.getenv("EMBEDDING_BASE_URL")
self._api_key = os.getenv("EMBEDDING_API_KEY")
self._use_remote_api = bool(self._api_key and self._api_base_url)
# Get model name: env > param > config > default
if model_name is None:
model_name = os.getenv("EMBEDDING_MODEL")
if model_name is None:
try:
from anytool.config import get_config
config = get_config()
model_name = config.tool_search.embedding_model
except Exception as exc:
logger.warning(f"Failed to load config, using default model: {exc}")
model_name = "BAAI/bge-small-en-v1.5"
self._model_name = model_name
self._embed_model = None # lazy load
self._embedding_fn = None
if self._use_remote_api:
logger.info(f"Using remote embedding API: {self._api_base_url}, model: {model_name}")
# Persistent cache settings
self._enable_cache_persistence = enable_cache_persistence
if cache_dir is None:
cache_dir = PROJECT_ROOT / ".anytool" / "embedding_cache"
self._cache_dir = Path(cache_dir)
# Log cache settings
logger.info(
f"ToolRanker initialized: enable_cache_persistence={enable_cache_persistence}, "
f"cache_dir={self._cache_dir}"
)
# Structured in-memory cache
# Structure: {backend: {server: {tool_name: {"embedding": np.ndarray, "description": str, "cached_at": str}}}}
self._structured_cache: Dict[str, Dict[str, Dict[str, Dict[str, Any]]]] = {}
# For backward compatibility and quick lookup: {text -> (backend, server, tool_name)}
self._text_to_key: Dict[str, Tuple[str, str, str]] = {}
# Load persistent cache if enabled
if self._enable_cache_persistence:
logger.info(f"Loading persistent cache from {self._cache_dir}")
self._load_persistent_cache()
def _get_cache_key(self, tool: BaseTool) -> Tuple[str, str, str]:
"""Get structured cache key (backend, server, tool_name) from tool."""
if tool.is_bound:
backend = tool.runtime_info.backend.value
server = tool.runtime_info.server_name or "default"
else:
if not tool.backend_type or tool.backend_type == BackendType.NOT_SET:
backend = "UNKNOWN"
else:
backend = tool.backend_type.value
server = "default"
return (backend, server, tool.name)
def _get_cache_file_path(self) -> Path:
"""Get the cache file path for the current model."""
# Use model name in filename to support multiple models
safe_model_name = self._model_name.replace("/", "_").replace("\\", "_")
return self._cache_dir / f"embeddings_{safe_model_name}_v{self.CACHE_VERSION}.pkl"
def _load_persistent_cache(self) -> None:
"""Load embeddings from disk cache."""
cache_file = self._get_cache_file_path()
if not cache_file.exists():
logger.debug(f"No persistent cache found at {cache_file}")
return
try:
with open(cache_file, 'rb') as f:
data = pickle.load(f)
# Validate cache version
if isinstance(data, dict) and data.get("version") == self.CACHE_VERSION:
self._structured_cache = data.get("embeddings", {})
self._rebuild_text_index()
# Count total embeddings
total = sum(
len(tools)
for backend in self._structured_cache.values()
for tools in backend.values()
)
logger.info(f"Loaded {total} embeddings from cache: {cache_file}")
else:
logger.warning(f"Cache version mismatch or invalid format, starting fresh")
self._structured_cache = {}
except Exception as exc:
logger.warning(f"Failed to load persistent cache: {exc}")
self._structured_cache = {}
def _rebuild_text_index(self) -> None:
"""Rebuild text-to-key mapping for quick lookup."""
self._text_to_key.clear()
for backend, servers in self._structured_cache.items():
for server, tools in servers.items():
for tool_name, tool_data in tools.items():
desc = tool_data.get("description", "")
text = f"{tool_name}: {desc}"
self._text_to_key[text] = (backend, server, tool_name)
def _save_persistent_cache(self) -> None:
"""Save embeddings to disk cache."""
if not self._enable_cache_persistence or not self._structured_cache:
return
cache_file = self._get_cache_file_path()
try:
# Create directory if it doesn't exist
cache_file.parent.mkdir(parents=True, exist_ok=True)
# Build cache data with metadata
cache_data = {
"version": self.CACHE_VERSION,
"model_name": self._model_name,
"last_updated": datetime.now().isoformat(),
"embeddings": self._structured_cache
}
# Save cache
with open(cache_file, 'wb') as f:
pickle.dump(cache_data, f, protocol=pickle.HIGHEST_PROTOCOL)
# Count total embeddings
total = sum(
len(tools)
for backend in self._structured_cache.values()
for tools in backend.values()
)
logger.debug(f"Saved {total} embeddings to cache: {cache_file}")
except Exception as exc:
logger.warning(f"Failed to save persistent cache: {exc}")
def rank(
self,
query: str,
tools: List[BaseTool],
*,
top_k: int = 50,
mode: SearchMode = SearchMode.SEMANTIC,
) -> List[Tuple[BaseTool, float]]:
if mode == SearchMode.KEYWORD:
return self._keyword_search(query, tools, top_k)
if mode == SearchMode.SEMANTIC:
return self._semantic_search(query, tools, top_k)
# hybrid
return self._hybrid_search(query, tools, top_k)
@staticmethod
def _tokenize(text: str) -> list[str]:
tokens = re.split(r"[^\w]+", text.lower())
tokens = [tok for tok in tokens if tok]
return tokens
def _keyword_search(
self, query: str, tools: Iterable[BaseTool], top_k: int
) -> List[Tuple[BaseTool, float]]:
try:
from rank_bm25 import BM25Okapi # type: ignore
except ImportError:
BM25Okapi = None # fallback below
tool_list = list(tools)
if not tool_list:
return []
corpus_tokens: list[list[str]] = [self._tokenize(f"{t.name} {t.description}") for t in tool_list]
query_tokens = self._tokenize(query)
if BM25Okapi and corpus_tokens:
bm25 = BM25Okapi(corpus_tokens)
scores = bm25.get_scores(query_tokens)
scored = [(t, float(s)) for t, s in zip(tool_list, scores, strict=True)]
else:
# fallback: simple term overlap ratio
q_set = set(query_tokens)
scored = []
for t, toks in zip(tool_list, corpus_tokens, strict=True):
if not toks:
scored.append((t, 0.0)) # Include tool with 0 score
continue
overlap = q_set.intersection(toks)
score = len(overlap) / len(q_set) if len(q_set) > 0 else 0.0
scored.append((t, score))
scored.sort(key=lambda x: x[1], reverse=True)
result = scored[:top_k]
# If no matches found (all scores are 0), return all tools
if not result or all(score == 0.0 for _, score in result):
logger.debug(f"Keyword search found no matches, returning all {len(tool_list)} tools")
return [(t, 0.0) for t in tool_list]
return result
def _ensure_model(self) -> bool:
"""Ensure embedding model is ready (local or remote)."""
if self._embedding_fn is not None:
return True
if self._use_remote_api:
return self._init_remote_embedding()
return self._init_local_embedding()
def _init_remote_embedding(self) -> bool:
"""Initialize remote embedding API (OpenRouter/OpenAI compatible)."""
try:
def embed_texts(texts: List[str]) -> List[np.ndarray]:
with httpx.Client(timeout=60.0) as client:
response = client.post(
f"{self._api_base_url}/embeddings",
headers={
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json"
},
json={"model": self._model_name, "input": texts}
)
response.raise_for_status()
data = response.json()
return [np.array(item["embedding"]) for item in data["data"]]
self._embedding_fn = embed_texts
logger.info(f"Remote embedding API initialized: {self._model_name}")
return True
except Exception as exc:
logger.error(f"Failed to initialize remote embedding API: {exc}")
return False
def _init_local_embedding(self) -> bool:
"""Initialize local fastembed model."""
try:
from fastembed import TextEmbedding
logger.debug(f"fastembed imported successfully, loading model: {self._model_name}")
except ImportError as e:
logger.warning(
f"fastembed not installed (ImportError: {e}), semantic search unavailable. "
f"Install with: pip install fastembed"
)
return False
try:
logger.info(f"Loading embedding model: {self._model_name}...")
self._embed_model = TextEmbedding(model_name=self._model_name)
self._embedding_fn = lambda txts: list(self._embed_model.embed(txts))
logger.info(f"Embedding model '{self._model_name}' loaded successfully")
return True
except Exception as exc:
logger.error(f"Embedding model '{self._model_name}' loading failed: {exc}")
return False
def _get_embedding(self, tool: BaseTool) -> Optional[np.ndarray]:
"""Get embedding from structured cache."""
backend, server, tool_name = self._get_cache_key(tool)
if backend not in self._structured_cache:
return None
if server not in self._structured_cache[backend]:
return None
if tool_name not in self._structured_cache[backend][server]:
return None
return self._structured_cache[backend][server][tool_name].get("embedding")
def _set_embedding(self, tool: BaseTool, embedding: np.ndarray) -> None:
"""Store embedding in structured cache."""
backend, server, tool_name = self._get_cache_key(tool)
# Initialize nested structure if needed
if backend not in self._structured_cache:
self._structured_cache[backend] = {}
if server not in self._structured_cache[backend]:
self._structured_cache[backend][server] = {}
# Store embedding with metadata
self._structured_cache[backend][server][tool_name] = {
"embedding": embedding,
"description": tool.description or "",
"cached_at": datetime.now().isoformat()
}
# Update text index for backward compatibility
text = f"{tool.name}: {tool.description}"
self._text_to_key[text] = (backend, server, tool_name)
def _semantic_search(
self, query: str, tools: Iterable[BaseTool], top_k: int
) -> List[Tuple[BaseTool, float]]:
if not self._ensure_model():
logger.debug("Semantic search unavailable, returning empty list")
return []
tools_list = list(tools)
# Collect embeddings with cache reuse
missing_tools = [t for t in tools_list if self._get_embedding(t) is None]
cache_updated = False
if missing_tools:
try:
# Generate embeddings for missing tools
missing_texts = [f"{t.name}: {t.description}" for t in missing_tools]
new_embs = self._embedding_fn(missing_texts)
for tool, emb in zip(missing_tools, new_embs, strict=True):
self._set_embedding(tool, emb)
cache_updated = True
logger.debug(f"Computed embeddings for {len(missing_tools)} new tools")
except Exception as exc:
logger.error("Failed to generate embeddings: %s", exc)
return []
# Save to persistent cache if updated
if cache_updated:
self._save_persistent_cache()
try:
q_emb = self._embedding_fn([query])[0]
except Exception as exc:
logger.error("Failed to embed query: %s", exc)
return []
scored: list[tuple[BaseTool, float]] = []
for t in tools_list:
emb = self._get_embedding(t)
if emb is None:
# Should not happen, but handle gracefully
logger.warning(f"No embedding found for tool: {t.name}")
scored.append((t, 0.0))
continue
# Calculate cosine similarity with zero-division protection
q_norm = np.linalg.norm(q_emb)
emb_norm = np.linalg.norm(emb)
if q_norm == 0 or emb_norm == 0:
sim = 0.0
else:
sim = float(np.dot(q_emb, emb) / (q_norm * emb_norm))
scored.append((t, sim))
scored.sort(key=lambda x: x[1], reverse=True)
return scored[:top_k]
def _hybrid_search(
self, query: str, tools: Iterable[BaseTool], top_k: int
) -> List[Tuple[BaseTool, float]]:
# keyword filter
kw_top = self._keyword_search(query, tools, top_k * 3)
if not kw_top:
# No keyword matches, try semantic search
semantic_results = self._semantic_search(query, tools, top_k)
if semantic_results:
return semantic_results
# Both failed, return top N tools
logger.warning("Both keyword and semantic search failed, returning top N tools")
return [(t, 0.0) for t in list(tools)[:top_k]]
# semantic ranking on keyword results
semantic_results = self._semantic_search(query, [t for t, _ in kw_top], top_k)
if semantic_results:
return semantic_results
# Semantic unavailable, return keyword results
logger.debug("Semantic search unavailable, using keyword results only")
return kw_top[:top_k]
def get_cache_stats(self) -> Dict[str, Any]:
"""Get statistics about the embedding cache.
Returns:
Dict with structure: {
"total_embeddings": int,
"backends": {
"backend_name": {
"total": int,
"servers": {
"server_name": int # count of tools
}
}
}
}
"""
stats = {
"total_embeddings": 0,
"backends": {}
}
for backend, servers in self._structured_cache.items():
backend_total = 0
server_stats = {}
for server, tools in servers.items():
tool_count = len(tools)
backend_total += tool_count
server_stats[server] = tool_count
stats["backends"][backend] = {
"total": backend_total,
"servers": server_stats
}
stats["total_embeddings"] += backend_total
return stats
def clear_cache(self, backend: Optional[str] = None, server: Optional[str] = None) -> int:
"""Clear embeddings from cache.
Args:
backend: If provided, only clear this backend. If None, clear all.
server: If provided (and backend is provided), only clear this server.
Returns:
Number of embeddings cleared.
"""
cleared_count = 0
if backend is None:
# Clear everything
for b in self._structured_cache.values():
for s in b.values():
cleared_count += len(s)
self._structured_cache.clear()
self._text_to_key.clear()
elif server is None:
# Clear specific backend
if backend in self._structured_cache:
for s in self._structured_cache[backend].values():
cleared_count += len(s)
del self._structured_cache[backend]
# Rebuild text index
self._rebuild_text_index()
else:
# Clear specific backend+server
if backend in self._structured_cache and server in self._structured_cache[backend]:
cleared_count = len(self._structured_cache[backend][server])
del self._structured_cache[backend][server]
# Clean up empty backend
if not self._structured_cache[backend]:
del self._structured_cache[backend]
# Rebuild text index
self._rebuild_text_index()
# Save after clearing
if cleared_count > 0 and self._enable_cache_persistence:
self._save_persistent_cache()
logger.info(f"Cleared {cleared_count} embeddings from cache")
return cleared_count
class SearchDebugInfo:
"""Debug information from tool search process."""
def __init__(self):
self.search_mode: str = ""
self.total_candidates: int = 0
self.mcp_count: int = 0
self.non_mcp_count: int = 0
# LLM filter info
self.llm_filter_used: bool = False
self.llm_brief_plan: str = ""
self.llm_utility_tools: Dict[str, List[str]] = {} # server -> tool names
self.llm_domain_servers: List[str] = []
self.llm_utility_count: int = 0
self.llm_domain_count: int = 0
# Semantic search scores
self.tool_scores: List[Dict[str, Any]] = [] # [{name, server, score, selected}]
# Final selected tools
self.selected_tools: List[Dict[str, Any]] = [] # [{name, server, backend}]
def to_dict(self) -> Dict[str, Any]:
return {
"search_mode": self.search_mode,
"total_candidates": self.total_candidates,
"mcp_count": self.mcp_count,
"non_mcp_count": self.non_mcp_count,
"llm_filter": {
"used": self.llm_filter_used,
"brief_plan": self.llm_brief_plan,
"utility_tools": self.llm_utility_tools,
"domain_servers": self.llm_domain_servers,
"utility_count": self.llm_utility_count,
"domain_count": self.llm_domain_count,
},
"tool_scores": self.tool_scores,
"selected_tools": self.selected_tools,
}
class SearchCoordinator(BaseTool):
_name = "_filter_tools"
_description = "Internal helper: filter & rank tools from a given list."
# Fallback defaults when config loading fails
DEFAULT_MAX_TOOLS: int = 20
DEFAULT_LLM_FILTER: bool = True
DEFAULT_LLM_THRESHOLD: int = 50
DEFAULT_CACHE_PERSISTENCE: bool = False
DEFAULT_SEARCH_MODE: str = "hybrid"
@classmethod
def get_parameters_schema(cls) -> Dict[str, Any]:
"""Override to avoid JSON schema generation for list[BaseTool] parameter.
The _arun method uses `candidate_tools: list[BaseTool]` which cannot be
converted to JSON Schema because BaseTool is an ABC class, not a Pydantic model.
Since this is an internal tool, we return an empty schema.
"""
return {}
def __init__(
self,
*,
max_tools: Optional[int] = None,
llm: LLMClient = LLMClient(),
enable_llm_filter: Optional[bool] = None,
llm_filter_threshold: Optional[int] = None,
enable_cache_persistence: Optional[bool] = None,
cache_dir: Optional[str | Path] = None,
quality_manager: Optional["ToolQualityManager"] = None,
enable_quality_ranking: bool = True,
):
"""Create a SearchCoordinator.
Args:
max_tools: max number of tools to return. If None, will use the value from config.
llm: optional async LLM, used to filter backend/server first
enable_llm_filter: whether to use LLM to pre-filter by backend/server.
If None, uses config value.
llm_filter_threshold: only apply LLM filter when tool count > this threshold.
If None, always apply (when enabled).
enable_cache_persistence: whether to persist embeddings to disk. If None, uses config value.
cache_dir: directory to store persistent embedding cache. If None, uses config value or default.
"""
super().__init__()
# Load config (may be None if loading fails)
tool_search_config = None
try:
from anytool.config import get_config
tool_search_config = getattr(get_config(), 'tool_search', None)
except Exception as exc:
logger.warning(f"Failed to load config: {exc}")
def resolve(user_value, config_attr: str, default):
"""Priority: user_value → config → default"""
if user_value is not None:
return user_value
if tool_search_config is not None:
config_value = getattr(tool_search_config, config_attr, None)
if config_value is not None:
return config_value
return default
# Resolve each setting with priority: user → config → default
self.max_tools = resolve(max_tools, 'max_tools', self.DEFAULT_MAX_TOOLS)
enable_llm_filter = resolve(enable_llm_filter, 'enable_llm_filter', self.DEFAULT_LLM_FILTER)
llm_filter_threshold = resolve(llm_filter_threshold, 'llm_filter_threshold', self.DEFAULT_LLM_THRESHOLD)
enable_cache_persistence = resolve(enable_cache_persistence, 'enable_cache_persistence', self.DEFAULT_CACHE_PERSISTENCE)
cache_dir = resolve(cache_dir, 'cache_dir', None)
self._default_mode = resolve(None, 'search_mode', self.DEFAULT_SEARCH_MODE)
# Log cache settings for debugging
logger.info(
f"SearchCoordinator initialized with cache settings: "
f"enable_cache_persistence={enable_cache_persistence}, cache_dir={cache_dir}"
)
self._ranker = ToolRanker(
enable_cache_persistence=enable_cache_persistence,
cache_dir=cache_dir
)
self._llm: LLMClient | None = llm if llm is not None else LLMClient()
# LLM filter settings
self._enable_llm_filter = enable_llm_filter
self._llm_filter_threshold = llm_filter_threshold
# Quality-aware ranking settings
self._quality_manager = quality_manager
self._enable_quality_ranking = enable_quality_ranking
# Debug info from last search
self._last_search_debug_info: Optional[SearchDebugInfo] = None
async def _arun(
self,
task_prompt: str,
candidate_tools: list[BaseTool],
*,
max_tools: int | None = None,
mode: str | None = None, # "semantic" | "keyword" | "hybrid"
) -> list[BaseTool]:
max_tools = self.max_tools if max_tools is None else max_tools
mode = self._default_mode if mode is None else mode
# Initialize debug info
debug_info = SearchDebugInfo()
debug_info.search_mode = mode
debug_info.total_candidates = len(candidate_tools)
self._last_search_debug_info = debug_info
# Cache check
cache_key = (id(candidate_tools), task_prompt, mode, max_tools)
if not hasattr(self, "_query_cache"):
self._query_cache: Dict[tuple, list[BaseTool]] = {}
if cache_key in self._query_cache:
return self._query_cache[cache_key]
# Split MCP tools and non-MCP tools
# Non-MCP tools (shell, gui, web, etc.) are always included, skip all filtering
mcp_tools = []
non_mcp_tools = []
for t in candidate_tools:
if t.is_bound:
backend = t.runtime_info.backend.value
else:
backend = t.backend_type.value if t.backend_type else "UNKNOWN"
if backend.lower() == "mcp":
mcp_tools.append(t)
else:
non_mcp_tools.append(t)
debug_info.mcp_count = len(mcp_tools)
debug_info.non_mcp_count = len(non_mcp_tools)
logger.info(f"Tool split: {len(mcp_tools)} MCP, {len(non_mcp_tools)} non-MCP (always included)")
# If MCP tools within limit, return all
if len(mcp_tools) <= max_tools:
result = mcp_tools + non_mcp_tools
self._query_cache[cache_key] = result
self._populate_selected_tools(debug_info, result)
return result
mcp_count = len(mcp_tools)
should_use_llm_filter = (
self._llm and
self._enable_llm_filter and
mcp_count > self._llm_filter_threshold
)
# Path 1: LLM pre-filter (large MCP tool set)
if should_use_llm_filter:
logger.info(f"Path 1: MCP count ({mcp_count}) > threshold, using LLM filter...")
debug_info.llm_filter_used = True
try:
utility_tools, domain_tools, llm_filter_info = await self._llm_filter_with_planning(
task_prompt, mcp_tools
)
# Record LLM filter results
debug_info.llm_brief_plan = llm_filter_info.get("brief_plan", "")
debug_info.llm_utility_tools = llm_filter_info.get("utility_tools", {})
debug_info.llm_domain_servers = llm_filter_info.get("domain_servers", [])
utility_count = len(utility_tools)
domain_count = len(domain_tools)
debug_info.llm_utility_count = utility_count
debug_info.llm_domain_count = domain_count
total_count = utility_count + domain_count
if total_count <= max_tools:
mcp_result = utility_tools + domain_tools
else:
# Exceeds limit: keep utility, search domain
domain_quota = max(max_tools - utility_count, 5)
logger.info(
f"Total ({total_count}) > max_tools ({max_tools}), "
f"keeping {utility_count} utility, searching {domain_count} domain (quota: {domain_quota})"
)
# Compute scores for utility tools (marked as LLM-selected)
if utility_tools:
utility_ranked = self._ranker.rank(
task_prompt, utility_tools,
top_k=len(utility_tools), mode=SearchMode(mode)
)
self._record_tool_scores(debug_info, utility_ranked, is_selected=True)
if domain_tools:
# Rank all domain tools to see all scores for debugging
all_domain_ranked = self._ranker.rank(
task_prompt, domain_tools,
top_k=len(domain_tools), mode=SearchMode(mode)
)
# Save scores for all domain tools (mark which ones are selected)
for i, (tool, score) in enumerate(all_domain_ranked):
server_name = None
if tool.is_bound and tool.runtime_info:
server_name = tool.runtime_info.server_name
debug_info.tool_scores.append({
"name": tool.name,
"server": server_name,
"score": round(score, 4),
"selected": i < domain_quota,
})
searched_domain = [t for t, _ in all_domain_ranked[:domain_quota]]
else:
searched_domain = []
mcp_result = utility_tools + searched_domain
except Exception as exc:
logger.warning(f"LLM filter failed ({exc}), fallback to direct ranking")
ranked = self._ranker.rank(task_prompt, mcp_tools, top_k=max_tools, mode=SearchMode(mode))
self._record_tool_scores(debug_info, ranked, is_selected=True)
mcp_result = [t for t, _ in ranked]
# Path 2: Plan-enhanced search (small MCP tool set)
else:
logger.info(f"Path 2: MCP count ({mcp_count}) <= threshold, using enhanced search...")
debug_info.llm_filter_used = False
if self._llm:
try:
enhanced_query = await self._generate_search_query(task_prompt)
except Exception:
enhanced_query = task_prompt
else:
enhanced_query = task_prompt
try:
ranked = self._ranker.rank(
enhanced_query, mcp_tools,
top_k=max_tools, mode=SearchMode(mode)
)
# Record all scores from semantic search
self._record_tool_scores(debug_info, ranked, is_selected=True)
mcp_result = [t for t, _ in ranked]
except Exception:
ranked = self._ranker._keyword_search(
enhanced_query, mcp_tools, max_tools
)
self._record_tool_scores(debug_info, ranked, is_selected=True)
mcp_result = [t for t, _ in ranked]
# Apply quality ranking on MCP results
if self._enable_quality_ranking and self._quality_manager and mcp_result:
try:
ranked_with_scores = [(t, 1.0) for t in mcp_result]
ranked_with_scores = self._quality_manager.adjust_ranking(ranked_with_scores)
mcp_result = [t for t, _ in ranked_with_scores]
except Exception:
pass
# Limit MCP tools, then combine with non-MCP tools
mcp_result = mcp_result[:max_tools]
result = mcp_result + non_mcp_tools
# Populate final selected tools in debug info
self._populate_selected_tools(debug_info, result)
self._log_search_results(candidate_tools, result, mode)
self._query_cache[cache_key] = result
return result
def _record_tool_scores(
self,
debug_info: SearchDebugInfo,
ranked: List[Tuple[BaseTool, float]],
is_selected: bool = False
) -> None:
"""Record tool scores from ranking results."""
for tool, score in ranked:
server_name = None
if tool.is_bound and tool.runtime_info:
server_name = tool.runtime_info.server_name
debug_info.tool_scores.append({
"name": tool.name,
"server": server_name,
"score": round(score, 4),
"selected": is_selected,
})
def _populate_selected_tools(
self,
debug_info: SearchDebugInfo,
tools: List[BaseTool]
) -> None:
"""Populate selected tools in debug info."""
for tool in tools:
backend = "UNKNOWN"
server_name = None
if tool.is_bound and tool.runtime_info:
backend = tool.runtime_info.backend.value
server_name = tool.runtime_info.server_name
elif tool.backend_type:
backend = tool.backend_type.value
debug_info.selected_tools.append({
"name": tool.name,
"server": server_name,
"backend": backend,
})
async def _llm_filter_with_planning(
self,
task_prompt: str,
tools: list[BaseTool]
) -> tuple[list[BaseTool], list[BaseTool], Dict[str, Any]]:
"""
LLM pre-filter for MCP servers.
Returns (utility_tools, domain_tools, llm_filter_info).
"""
from collections import defaultdict
# Group tools by server name
server_tools: Dict[str, list[BaseTool]] = defaultdict(list)
for t in tools:
if t.is_bound and t.runtime_info:
server = t.runtime_info.server_name or "default"
else:
server = "unknown"
server_tools[server].append(t)
# Build tool name -> tool object mapping
tool_name_map: Dict[str, BaseTool] = {t.name: t for t in tools}
# Build server description with tool names
lines: list[str] = ["Available MCP servers:"]
lines.append("")
for server, tool_list in server_tools.items():
lines.append(f"### Server: {server} ({len(tool_list)} tools)")
tool_names = [t.name for t in tool_list]
lines.append(f" All tools: {', '.join(tool_names)}")
if tool_list:
lines.append(f" Example capabilities:")
for tool in tool_list[:5]:
tool_desc = tool.description or "No description"
if len(tool_desc) > 100:
tool_desc = tool_desc[:97] + "..."
lines.append(f" - {tool.name}: {tool_desc}")
lines.append("")
servers_block = "\n".join(lines)
TOOL_FILTER_SYSTEM_PROMPT = f"""You are an expert tool selection assistant.
# Your task
Analyze the given task and determine which MCP servers and tools are needed.
Think about how you would accomplish this task step by step, then classify needed servers and tools.
# Important guidelines
- **Focus on tool names and capabilities**: Carefully examine the tool names to understand what each server can do
- **Be inclusive for domain servers**: If a server has tools that might be relevant to the core task, include it
- **Be precise for utility tools**: Only select the specific auxiliary tools needed (e.g., file save, time query)
- **When in doubt, include in domain_servers**: It's better to include a server than miss relevant tools
{servers_block}
# Output format
Return ONLY a JSON object (no markdown, no explanation):
{{
"brief_plan": "1-2 sentence execution plan",
"utility_tools": {{
"server1": ["tool1", "tool2"]
}},
"domain_servers": ["server2", "server3"]
}}
- **utility_tools**: Dict mapping server name to list of specific tool names.
These are auxiliary tools for supporting operations (e.g., filesystem: ["write_file"], time-server: ["get_time"]).
Only include the specific tools needed, NOT the entire server.
- **domain_servers**: Server names that directly provide the main capabilities for the task.
All tools from these servers will be considered. Be inclusive here."""
user_query = f"Task: {task_prompt}\n\nClassify the needed servers and tools."
messages_text = LLMClient.format_messages_to_text([
{"role": "system", "content": TOOL_FILTER_SYSTEM_PROMPT},
{"role": "user", "content": user_query}
])
resp = await self._llm.complete(messages_text)
content = resp["message"]["content"].strip()
# Extract JSON
code_block_pattern = r'```(?:json)?\s*\n?(.*?)\n?```'
match = re.search(code_block_pattern, content, re.DOTALL)
if match:
content = match.group(1).strip()
else:
json_match = re.search(r'\{.*\}', content, re.DOTALL)
if json_match:
content = json_match.group()
try:
result = json.loads(content)
except json.JSONDecodeError as e:
logger.warning(f"Failed to parse LLM response: {e}")
return [], tools
# Parse utility_tools: {server: [tool_names]}
utility_tools_config = result.get("utility_tools", {})
domain_servers = set(result.get("domain_servers", []))
brief_plan = result.get("brief_plan", "N/A")
logger.info(f"LLM Planning: {brief_plan}")
logger.info(f"Utility tools: {utility_tools_config}")
logger.info(f"Domain servers: {domain_servers}")
# Collect utility tools (specific tools only)
utility_tools = []
for server_name, tool_names in utility_tools_config.items():
if server_name in server_tools:
server_tool_names = {t.name for t in server_tools[server_name]}
for tool_name in tool_names:
if tool_name in server_tool_names and tool_name in tool_name_map:
utility_tools.append(tool_name_map[tool_name])
# Collect domain tools (entire servers)
domain_tools = []
for server, tool_list in server_tools.items():
if server in domain_servers:
domain_tools.extend(tool_list)
logger.info(f"LLM filter result: {len(utility_tools)} utility tools, {len(domain_tools)} domain tools")
# Build LLM filter info for debugging
llm_filter_info = {
"brief_plan": brief_plan,
"utility_tools": utility_tools_config,
"domain_servers": list(domain_servers),
}
# Fallback if no match
if not utility_tools and not domain_tools:
logger.warning(f"LLM filter matched 0 tools, returning all as domain")
return [], tools, llm_filter_info
return utility_tools, domain_tools, llm_filter_info
async def _generate_search_query(self, task_prompt: str) -> str:
prompt = f"""Task: {task_prompt}
List keywords for the capabilities needed (comma-separated, brief):"""
resp = await self._llm.complete(prompt)
capabilities = resp["message"]["content"].strip().replace("\n", " ")
enhanced_query = f"{task_prompt} {capabilities}"
logger.debug(f"Enhanced search query: {enhanced_query[:150]}...")
return enhanced_query
def _log_search_results(self, all_tools: list[BaseTool], filtered_tools: list[BaseTool], mode: str) -> None:
"""
Log search results in a concise, grouped format.
Shows backend/server breakdown and tool names (truncated if too many).
"""
from collections import defaultdict
# Group filtered tools by backend and server
grouped: Dict[str, Dict[str | None, list[str]]] = defaultdict(lambda: defaultdict(list))
for t in filtered_tools:
# Get backend and server info
if t.is_bound:
backend = t.runtime_info.backend.value
server = t.runtime_info.server_name if backend.lower() == "mcp" else None
else:
if not t.backend_type or t.backend_type == BackendType.NOT_SET:
backend = "UNKNOWN"
server = None
else:
backend = t.backend_type.value
server = None
grouped[backend][server].append(t.name)
# Build concise summary
lines = [f"\n{'='*60}"]
lines.append(f"🔍 Tool Search Results (mode: {mode})")
lines.append(f" {len(all_tools)} candidates → {len(filtered_tools)} selected tools")
lines.append(f"{'='*60}")
for backend, srv_map in sorted(grouped.items()):
backend_total = sum(len(tools) for tools in srv_map.values())
lines.append(f"\n📦 {backend} ({backend_total} tools)")
for server, tool_names in sorted(srv_map.items()):
if backend.lower() == "mcp" and server:
prefix = f" └─ {server}: "
else:
prefix = f" └─ "
# Limit display to avoid overwhelming output
if len(tool_names) <= 8:
tools_display = ", ".join(tool_names)
else:
tools_display = ", ".join(tool_names[:8]) + f" ... (+{len(tool_names)-8} more)"
lines.append(f"{prefix}{tools_display}")
lines.append(f"{'='*60}\n")
# Use info level so users can see it
logger.info("\n".join(lines))
@staticmethod
def _format_tool_list(tools: list[BaseTool]) -> str:
rows = [f"{i}. **{t.name}**: {t.description}" for i, t in enumerate(tools, 1)]
return f"Total {len(tools)} tools, list out directly:\n\n" + "\n".join(rows)
@staticmethod
def _format_ranked(results: list[tuple[BaseTool, float]], mode: SearchMode) -> str:
lines = [f"Search results (mode={mode}) total {len(results)}:\n"]
for i, (tool, score) in enumerate(results, 1):
lines.append(f"{i}. {tool.name} (score: {score:.3f})\n {tool.description}")
return "\n".join(lines)
def _run(self, *args, **kwargs):
raise NotImplementedError("SearchCoordinator only supports asynchronous calls. Use _arun instead.")
def get_embedding_cache_stats(self) -> Dict[str, Any]:
"""Get statistics about the embedding cache.
Returns:
Dict with cache statistics including total embeddings and breakdown by backend/server.
"""
return self._ranker.get_cache_stats()
def clear_embedding_cache(self, backend: Optional[str] = None, server: Optional[str] = None) -> int:
"""Clear embeddings from cache.
Args:
backend: If provided, only clear this backend. If None, clear all.
server: If provided (and backend is provided), only clear this server.
Returns:
Number of embeddings cleared.
"""
return self._ranker.clear_cache(backend=backend, server=server)
def get_last_search_debug_info(self) -> Optional[Dict[str, Any]]:
"""Get debug info from the last search operation.
Returns:
Dict containing search debug info, or None if no search has been performed.
Includes:
- search_mode: The search mode used
- total_candidates: Total number of candidate tools
- mcp_count/non_mcp_count: Tool counts by type
- llm_filter: LLM filter information if used
- tool_scores: Similarity scores for each tool
- selected_tools: Final selected tools
"""
if self._last_search_debug_info is None:
return None
return self._last_search_debug_info.to_dict()
================================================
FILE: anytool/grounding/core/security/__init__.py
================================================
from .sandbox import BaseSandbox, SandboxManager
from .policies import SecurityPolicyManager, SecurityPolicy
# Try to import E2BSandbox (optional dependency)
try:
from .e2b_sandbox import E2BSandbox
E2B_AVAILABLE = True
except ImportError:
E2BSandbox = None
E2B_AVAILABLE = False
__all__ = [
"BaseSandbox",
"SandboxManager",
"SecurityPolicyManager",
"SecurityPolicy"
]
if E2B_AVAILABLE:
__all__.append("E2BSandbox")
================================================
FILE: anytool/grounding/core/security/e2b_sandbox.py
================================================
"""
E2B Sandbox implementation.
This module provides a concrete implementation of BaseSandbox using E2B.
"""
import os
from typing import Any, Dict, Optional, TYPE_CHECKING
from anytool.utils.logging import Logger
from .sandbox import BaseSandbox
from ..types import SandboxOptions
logger = Logger.get_logger(__name__)
# Import E2B SDK components (optional dependency)
if TYPE_CHECKING:
# For type checking purposes only
try:
from e2b_code_interpreter import CommandHandle, Sandbox
except ImportError:
CommandHandle = None # type: ignore
Sandbox = None # type: ignore
try:
logger.debug("Attempting to import e2b_code_interpreter...")
from e2b_code_interpreter import ( # type: ignore
CommandHandle,
Sandbox,
)
logger.debug("Successfully imported e2b_code_interpreter")
E2B_AVAILABLE = True
except ImportError as e:
logger.debug(f"Failed to import e2b_code_interpreter: {e}")
CommandHandle = None # type: ignore
Sandbox = None # type: ignore
E2B_AVAILABLE = False
class E2BSandbox(BaseSandbox):
"""E2B sandbox implementation for secure code execution."""
def __init__(self, options: SandboxOptions):
"""Initialize E2B sandbox.
Args:
options: Sandbox configuration options including:
- api_key: E2B API key (or use E2B_API_KEY env var)
- sandbox_template_id: Template ID for the sandbox (default: "base")
- timeout: Command execution timeout in seconds
"""
super().__init__(options)
if not E2B_AVAILABLE:
raise ImportError(
"E2B SDK (e2b-code-interpreter) not found. Please install it with "
"'pip install e2b-code-interpreter'."
)
# Get API key from options or environment
self.api_key = options.get("api_key") or os.environ.get("E2B_API_KEY")
if not self.api_key:
raise ValueError(
"E2B API key is required. Provide it via 'options.api_key'"
" or the E2B_API_KEY environment variable."
)
# Get sandbox configuration
self.sandbox_template_id = options.get("sandbox_template_id", "base")
self.timeout = options.get("timeout", 600) # Default 10 minutes
# Sandbox instance (using Any to avoid import issues with optional dependency)
self._sandbox: Any = None
self._process: Any = None
async def start(self) -> bool:
"""Start the E2B sandbox instance.
Returns:
True if sandbox started successfully, False otherwise.
"""
if self._active:
logger.debug("E2B sandbox already active")
return True
try:
logger.debug(f"Creating E2B sandbox with template: {self.sandbox_template_id}")
self._sandbox = Sandbox(
template=self.sandbox_template_id,
api_key=self.api_key,
)
self._active = True
logger.info(f"E2B sandbox started successfully (template: {self.sandbox_template_id})")
return True
except Exception as e:
logger.error(f"Failed to start E2B sandbox: {e}")
self._active = False
return False
async def stop(self) -> None:
"""Stop the E2B sandbox instance."""
if not self._active:
logger.debug("E2B sandbox not active")
return
try:
# Terminate any running process
if self._process:
try:
logger.debug("Terminating sandbox process")
self._process.kill()
except Exception as e:
logger.warning(f"Error terminating sandbox process: {e}")
finally:
self._process = None
# Close the sandbox
if self._sandbox:
try:
logger.debug("Closing E2B sandbox instance")
self._sandbox.kill()
logger.info("E2B sandbox stopped successfully")
except Exception as e:
logger.warning(f"Error closing E2B sandbox: {e}")
finally:
self._sandbox = None
self._active = False
except Exception as e:
logger.error(f"Error stopping E2B sandbox: {e}")
raise
async def execute_safe(self, command: str, **kwargs) -> Any:
"""Execute a command safely in the E2B sandbox.
Args:
command: The command to execute
**kwargs: Additional options:
- envs: Environment variables (dict)
- timeout: Command timeout in milliseconds
- background: Run in background (bool)
- on_stdout: Stdout callback function
- on_stderr: Stderr callback function
Returns:
CommandHandle object representing the running process
"""
if not self._active or not self._sandbox:
raise RuntimeError("E2B sandbox is not active. Call start() first.")
try:
# Extract execution options
envs = kwargs.get("envs", {})
timeout = kwargs.get("timeout", self.timeout * 1000) # Convert to ms
background = kwargs.get("background", False)
on_stdout = kwargs.get("on_stdout")
on_stderr = kwargs.get("on_stderr")
logger.debug(f"Executing command in E2B sandbox: {command}")
# Execute the command
self._process = self._sandbox.commands.run(
command,
envs=envs,
timeout=timeout,
background=background,
on_stdout=on_stdout,
on_stderr=on_stderr,
)
return self._process
except Exception as e:
logger.error(f"Failed to execute command in E2B sandbox: {e}")
raise
def get_connector(self) -> Any:
"""Get the underlying E2B sandbox connector.
Returns:
The E2B Sandbox instance, or None if not active.
"""
return self._sandbox
def get_host(self, port: int) -> str:
"""Get the host URL for a specific port.
Args:
port: The port number to get the host for
Returns:
The host URL string
Raises:
RuntimeError: If sandbox is not active
"""
if not self._active or not self._sandbox:
raise RuntimeError("E2B sandbox is not active. Call start() first.")
return self._sandbox.get_host(port)
@property
def sandbox(self) -> Any:
"""Get the underlying E2B Sandbox instance."""
return self._sandbox
@property
def process(self) -> Any:
"""Get the current running process handle."""
return self._process
================================================
FILE: anytool/grounding/core/security/policies.py
================================================
import asyncio
import sys
from typing import Callable, Awaitable, Dict, Optional
from ..types import SecurityPolicy, BackendType
PromptFunc = Callable[[str], Awaitable[bool]]
# ANSI color codes
class Colors:
RESET = "\033[0m"
BOLD = "\033[1m"
RED = "\033[91m"
YELLOW = "\033[93m"
GREEN = "\033[92m"
CYAN = "\033[96m"
GRAY = "\033[90m"
WHITE = "\033[97m"
class SecurityPolicyManager:
def __init__(self, prompt: PromptFunc | None = None):
self._policies: Dict[BackendType, SecurityPolicy] = {}
self._global_policy: Optional[SecurityPolicy] = None
self._prompt: PromptFunc | None = prompt or self._default_cli_prompt
async def _default_cli_prompt(self, message: str) -> bool:
# Clean and professional prompt using unified display
from anytool.utils.display import Box, BoxStyle, colorize, print_separator
print()
print_separator(70, 'y', 2)
print(f" {colorize('⚠️ Security Policy Warning', color=Colors.RED, bold=True)}")
print_separator(70, 'y', 2)
print(f" {message}")
print_separator(70, 'gr', 2)
print(f" {colorize('[y/yes]', color=Colors.GREEN)} Allow | {colorize('[n/no]', color=Colors.RED)} Deny")
print_separator(70, 'gr', 2)
print(f" {colorize('Your choice:', bold=True)} ", end="", flush=True)
answer = await asyncio.get_running_loop().run_in_executor(None, sys.stdin.readline)
response = answer.strip().lower() in {"y", "yes"}
if response:
print(f" {colorize('✓ Allowed', color=Colors.GREEN)}\n")
else:
print(f" {colorize('✗ Denied', color=Colors.RED)}\n")
return response
def set_global_policy(self, policy: SecurityPolicy) -> None:
self._global_policy = policy
def set_backend_policy(self, backend_type: BackendType, policy: SecurityPolicy) -> None:
self._policies[backend_type] = policy
def get_policy(self, backend_type: BackendType) -> SecurityPolicy:
policy = self._policies.get(backend_type)
if policy:
return policy
if self._global_policy:
return self._global_policy
return SecurityPolicy()
async def _ask_user(self, message: str) -> bool:
"""If prompt is provided, ask user for confirmation, otherwise default to deny"""
if self._prompt:
try:
return await self._prompt(message)
except Exception:
return False
return False
async def check_command_allowed(self, backend_type: BackendType, command: str) -> bool:
policy = self.get_policy(backend_type)
if policy.check(command=command):
return True
# Find dangerous tokens
dangerous_tokens = policy.find_dangerous_tokens(command)
# Extract only lines containing dangerous commands
lines = command.split('\n')
dangerous_lines = []
for i, line in enumerate(lines):
line_lower = line.lower()
if any(token in line_lower for token in dangerous_tokens):
# Add line number and the line itself
dangerous_lines.append((i + 1, line.strip()))
# If no specific dangerous lines found but policy failed, show first few lines
if not dangerous_lines:
dangerous_lines = [(i + 1, line.strip()) for i, line in enumerate(lines[:5])]
# Format dangerous lines for display (limit to 10 lines)
max_display_lines = 10
if len(dangerous_lines) > max_display_lines:
display_lines = dangerous_lines[:max_display_lines]
truncated = True
else:
display_lines = dangerous_lines
truncated = False
# Build formatted command display
formatted_cmd_lines = []
for line_num, line in display_lines:
# Truncate very long lines
if len(line) > 80:
line = line[:77] + "..."
formatted_cmd_lines.append(f" L{line_num}: {line}")
if truncated:
formatted_cmd_lines.append(" ... (more lines)")
formatted_command = '\n'.join(formatted_cmd_lines)
# Show which dangerous commands were detected
dangerous_list = ', '.join([f"{Colors.RED}{tok}{Colors.RESET}" for tok in dangerous_tokens[:5]])
from anytool.utils.display import Box, BoxStyle, colorize
# Build command box
box = Box(width=66, style=BoxStyle.SQUARE, color='gr')
cmd_box = [
box.top_line(2),
box.empty_line(2),
]
for line in formatted_cmd_lines:
cmd_box.append(box.text_line(line, indent=2))
cmd_box.extend([
box.empty_line(2),
box.bottom_line(2)
])
message = (
f"\n{colorize('Potentially dangerous command detected', color=Colors.WHITE)}\n\n"
f"Backend: {colorize(backend_type.value, color=Colors.CYAN)}\n"
f"Dangerous commands: {dangerous_list}\n\n"
f"Affected lines:\n"
+ "\n".join(cmd_box) + "\n\n"
f"{colorize('This command may contain risky operations. Continue?', color=Colors.YELLOW)}"
)
return await self._ask_user(message)
async def check_domain_allowed(self, backend_type: BackendType, domain: str) -> bool:
policy = self.get_policy(backend_type)
if policy.check(domain=domain):
return True
message = (
f"\n{Colors.WHITE}Unauthorized domain access detected{Colors.RESET}\n\n"
f"Backend: {Colors.CYAN}{backend_type.value}{Colors.RESET}\n"
f"Domain: {Colors.YELLOW}{domain}{Colors.RESET}\n\n"
f"{Colors.YELLOW}This domain is not in the allowed list. Continue?{Colors.RESET}"
)
return await self._ask_user(message)
================================================
FILE: anytool/grounding/core/security/sandbox.py
================================================
from typing import Any, Dict, Optional
from abc import ABC, abstractmethod
from ..types import SandboxOptions, BackendType
class BaseSandbox(ABC):
def __init__(self, options: SandboxOptions):
self.options = options
self._active = False
@abstractmethod
async def start(self) -> bool:
"""Set self._active to True"""
pass
@abstractmethod
async def stop(self) -> None:
"""Set self._active to False"""
pass
@abstractmethod
async def execute_safe(self, command: str, **kwargs) -> Any:
pass
@abstractmethod
def get_connector(self) -> Any:
pass
@property
def is_active(self) -> bool:
return self._active
class SandboxManager:
def __init__(self):
self._sandboxes: Dict[BackendType, BaseSandbox] = {}
def register_sandbox(self, backend_type: BackendType, sandbox: BaseSandbox) -> None:
self._sandboxes[backend_type] = sandbox
def get_sandbox(self, backend_type: BackendType) -> Optional[BaseSandbox]:
return self._sandboxes.get(backend_type)
async def start_all(self) -> None:
for sandbox in self._sandboxes.values():
await sandbox.start()
async def stop_all(self) -> None:
for sandbox in self._sandboxes.values():
await sandbox.stop()
================================================
FILE: anytool/grounding/core/session.py
================================================
from abc import ABC, abstractmethod
from typing import Any, Dict, List
from datetime import datetime
from .tool import BaseTool
from .transport.connectors import BaseConnector
from .types import SessionInfo, SessionStatus, BackendType, ToolResult
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
class BaseSession(ABC):
"""
Session manager for all backends.
"""
def __init__(
self,
connector: BaseConnector,
*,
session_id: str,
backend_type: BackendType | None = None,
auto_connect: bool = True,
auto_initialize: bool = True,
) -> None:
self.connector = connector
self.session_id = session_id
self.backend_type = backend_type or BackendType.NOT_SET
self.auto_connect = auto_connect
self.auto_initialize = auto_initialize
self.status: SessionStatus = SessionStatus.DISCONNECTED
self.session_info: Dict[str, Any] | None = None
self._created_at = datetime.utcnow()
self._last_activity = self._created_at
self.tools: List[BaseTool] = []
async def __aenter__(self) -> "BaseSession":
if self.auto_connect:
await self.connect()
if self.auto_initialize:
self.session_info = await self.initialize()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
"""Exit the async context manager.
Args:
exc_type: The exception type, if an exception was raised.
exc_val: The exception value, if an exception was raised.
exc_tb: The exception traceback, if an exception was raised.
"""
await self.disconnect()
async def connect(self) -> None:
if self.connector.is_connected:
return
self.status = SessionStatus.CONNECTING
await self.connector.connect()
self.status = SessionStatus.CONNECTED
async def disconnect(self) -> None:
if not self.connector.is_connected:
return
await self.connector.disconnect()
self.status = SessionStatus.DISCONNECTED
@property
def is_connected(self) -> bool:
return self.connector.is_connected
@abstractmethod
async def initialize(self) -> Dict[str, Any]:
"""
Negotiate with the backend, discover tools, etc.
Return session information (can be an empty dict).
`self.tools` need to be set in this method.
"""
raise NotImplementedError("Sub-class must implement this method")
async def list_tools(self) -> List[BaseTool]:
"""
Return tools discovered during `initialize()`.
"""
if not self.tools:
self.session_info = await self.initialize()
return self.tools
async def call_tool(self, tool_name: str, parameters=None) -> ToolResult:
parameters = parameters or {}
# Ensure tools are initialized before calling
if not self.tools:
logger.debug(f"Tools not initialized for session {self.session_id}, initializing now...")
self.session_info = await self.initialize()
tool_map = {t.schema.name: t for t in self.tools}
if tool_name not in tool_map:
raise ValueError(f"Unknown tool: {tool_name}")
result = await tool_map[tool_name].arun(**parameters)
self._touch()
return result
# Update when a successful call is made
def _touch(self):
self._last_activity = datetime.utcnow()
@property
def info(self) -> SessionInfo:
return SessionInfo(
session_id=self.session_id,
backend_type=getattr(self, "backend_type", BackendType.NOT_SET),
status=self.status,
created_at=self._created_at,
last_activity=self._last_activity,
metadata=self.session_info or {},
)
================================================
FILE: anytool/grounding/core/system/__init__.py
================================================
from .provider import SystemProvider
from .tool import SYSTEM_TOOLS
__all__ = [
"SystemProvider",
"SYSTEM_TOOLS",
]
================================================
FILE: anytool/grounding/core/system/provider.py
================================================
from typing import List, Dict, Any
from ..provider import Provider
from ..types import BackendType, SessionConfig
from ..grounding_client import GroundingClient
from .tool import SYSTEM_TOOLS, _BaseSystemTool
from ..exceptions import GroundingError, ErrorCode
class SystemProvider(Provider):
"""
Provider for system-level query tools
"""
def __init__(self, client: GroundingClient):
super().__init__(BackendType.SYSTEM, {})
# Instantiates all system tools
self._tools: List[_BaseSystemTool] = [tool_cls(client) for tool_cls in SYSTEM_TOOLS]
async def initialize(self):
self.is_initialized = True
async def create_session(self, session_config: SessionConfig):
raise GroundingError(
"SystemProvider does not support sessions",
code=ErrorCode.CONFIG_INVALID,
)
async def list_tools(self, session_name: str | None = None):
return self._tools
async def call_tool(
self,
session_name: str,
tool_name: str,
parameters: Dict[str, Any] | None = None,
):
tool_map = {t.schema.name: t for t in self._tools}
if tool_name not in tool_map:
raise GroundingError(
f"System tool '{tool_name}' not found",
code=ErrorCode.TOOL_NOT_FOUND,
)
return await tool_map[tool_name].arun(**(parameters or {}))
async def close_session(self, session_name: str) -> None:
return
================================================
FILE: anytool/grounding/core/system/tool.py
================================================
from ..tool.local_tool import LocalTool
from ..types import BackendType, ToolResult, ToolStatus
from ..grounding_client import GroundingClient
class _BaseSystemTool(LocalTool):
backend_type = BackendType.SYSTEM
def __init__(self, client: GroundingClient):
super().__init__(verbose=False, handle_errors=True)
self._client = client
@property
def client(self) -> GroundingClient:
return self._client
class ListProvidersTool(_BaseSystemTool):
_name = "list_providers"
_description = "List all registered backend providers"
async def _arun(self) -> ToolResult:
prov = list(self.client.list_providers().keys())
return ToolResult(
status=ToolStatus.SUCCESS,
content=", ".join(prov),
)
class ListBackendToolsTool(_BaseSystemTool):
_name = "list_backend_tools"
_description = "List static tools for a backend"
async def _arun(self, backend: str) -> ToolResult:
try:
be = BackendType(backend.lower())
except ValueError:
return ToolResult(ToolStatus.ERROR, error=f"Unknown backend '{backend}'")
tools = await self.client.list_backend_tools(be)
names = [t.schema.name for t in tools]
return ToolResult(
status=ToolStatus.SUCCESS,
content=", ".join(names),
)
class ListSessionToolsTool(_BaseSystemTool):
_name = "list_session_tools"
_description = "List tools (incl. dynamic) for a session"
async def _arun(self, session_id: str) -> ToolResult:
tools = await self.client.list_session_tools(session_id)
names = [t.schema.name for t in tools]
return ToolResult(
status=ToolStatus.SUCCESS,
content=", ".join(names),
)
class ListAllBackendToolsTool(_BaseSystemTool):
_name = "list_all_backend_tools"
_description = "List static tools for every registered backend"
async def _arun(self, use_cache: bool = False) -> ToolResult:
all_tools = await self.client.list_all_backend_tools(use_cache=use_cache)
lines = [
f"{backend.value}: {', '.join(t.schema.name for t in tools)}"
for backend, tools in all_tools.items()
]
return ToolResult(
status=ToolStatus.SUCCESS,
content="\n".join(lines),
)
SYSTEM_TOOLS: list[type[_BaseSystemTool]] = [
ListProvidersTool,
ListBackendToolsTool,
ListSessionToolsTool,
ListAllBackendToolsTool,
]
================================================
FILE: anytool/grounding/core/tool/__init__.py
================================================
from .base import BaseTool
from .local_tool import LocalTool
from .remote_tool import RemoteTool
__all__ = ["BaseTool", "LocalTool", "RemoteTool"]
================================================
FILE: anytool/grounding/core/tool/base.py
================================================
"""
BaseTool.
All pre-defined grounding atomic operations will inherit this tool class.
RemoteTool needs to pass in connector.
"""
import asyncio, time, inspect
from abc import ABC, abstractmethod
from functools import lru_cache
from typing import Any, ClassVar, Dict, Optional, TYPE_CHECKING
from pydantic import BaseModel, ConfigDict, Field, create_model
from ..types import BackendType, ToolResult, ToolSchema, ToolStatus
from ..exceptions import GroundingError, ErrorCode
from anytool.utils.logging import Logger
import jsonschema
if TYPE_CHECKING:
from ..grounding_client import GroundingClient
logger = Logger.get_logger(__name__)
class ToolRuntimeInfo:
"""Runtime information for a tool instance"""
def __init__(
self,
backend: BackendType,
session_name: str,
server_name: Optional[str] = None,
grounding_client: Optional['GroundingClient'] = None,
):
self.backend = backend
self.session_name = session_name
self.server_name = server_name
self.grounding_client = grounding_client
def __repr__(self):
return f""
class BaseTool(ABC):
_name: ClassVar[str] = ""
_description: ClassVar[str] = ""
backend_type: ClassVar[BackendType] = BackendType.NOT_SET
def __init__(self,
schema: Optional[ToolSchema] = None,
*,
verbose: bool = False,
handle_errors: bool = True) -> None:
self.verbose = verbose
self.handle_errors = handle_errors
self.schema: ToolSchema = schema or ToolSchema(
name=self._name or self.__class__.__name__.lower(),
description=self._description,
parameters=self.get_parameters_schema(),
backend_type=self.backend_type,
)
self._runtime_info: Optional[ToolRuntimeInfo] = None
self._disable_outer_recording = True
@property
def name(self) -> str:
"""Get tool name from schema (supports both class-defined and runtime-injected names)"""
return self.schema.name if hasattr(self, 'schema') and self.schema else self._name
@property
def description(self) -> str:
"""Get tool description from schema (supports both class-defined and runtime-injected descriptions)"""
return self.schema.description if hasattr(self, 'schema') and self.schema else self._description
@classmethod
@lru_cache
def get_parameters_schema(cls) -> Dict[str, Any]:
"""Auto-generate JSON-schema from _run() or _arun() signature.
Returns empty dict for tools with no parameters.
Priority: prefer _arun if overridden, otherwise use _run.
"""
# Priority: prefer _arun if it's overridden by subclass, else use _run
# This allows async-first tools to define their signature via _arun
sig_src = None
# Check if _arun is overridden (not from BaseTool)
if cls._arun is not BaseTool._arun:
sig_src = cls._arun
# Otherwise check if _run is overridden
elif cls._run is not BaseTool._run:
sig_src = cls._run
# If neither is overridden, raise error
else:
raise ValueError(
f"{cls.__name__} must implement _run() or _arun() to define its parameters schema"
)
sig = inspect.signature(sig_src)
fields: dict[str, Any] = {}
for name, p in sig.parameters.items():
# Skip 'self' and **kwargs / *args
if name == "self" or p.kind in (inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL):
continue
typ = p.annotation if p.annotation is not inspect._empty else str
default = p.default if p.default is not inspect._empty else ...
fields[name] = (typ, Field(default))
if not fields:
return {}
PModel: type[BaseModel] = create_model(
f"{cls.__name__}Params",
__config__=ConfigDict(arbitrary_types_allowed=True),
**fields
)
return PModel.model_json_schema()
def validate_parameters(self, params: Dict[str, Any]) -> None:
try:
self.schema.validate_parameters(params, raise_exc=True)
except jsonschema.ValidationError as ve:
raise GroundingError(
f"Invalid parameters: {ve.message}",
code=ErrorCode.TOOL_EXECUTION_FAIL,
tool_name=self.schema.name,
) from ve
def run(self, **kwargs):
try:
return asyncio.run(self.invoke(**kwargs))
except RuntimeError: # already in running loop
loop = asyncio.get_running_loop()
return loop.create_task(self.invoke(**kwargs))
def __call__(self, **kwargs):
return self.run(**kwargs)
async def __acall__(self, **kwargs):
return await self.arun(**kwargs)
async def arun(self, **kwargs) -> ToolResult:
start = time.time()
try:
self.validate_parameters(kwargs)
raw = await self._arun(**kwargs)
result = self._wrap_result(raw, time.time() - start)
# Auto-record (if enabled)
await self._auto_record_execution(kwargs, result, time.time() - start)
return result
except Exception as e:
if self.handle_errors:
return ToolResult(
status=ToolStatus.ERROR,
error=str(e),
metadata={"tool": self.schema.name},
)
raise
# to be implemented by subclasses
@abstractmethod
async def _arun(self, **kwargs): ...
def bind_runtime_info(
self,
backend: BackendType,
session_name: str,
server_name: Optional[str] = None,
grounding_client: Optional['GroundingClient'] = None,
) -> 'BaseTool':
"""
Bind runtime information to the tool instance.
Allow the tool to be invoked directly without specifying backend/session/server.
Args:
backend: Backend type
session_name: Session name
server_name: Server name (for MCP)
grounding_client: Optional reference to GroundingClient for direct invocation
"""
self._runtime_info = ToolRuntimeInfo(
backend=backend,
session_name=session_name,
server_name=server_name,
grounding_client=grounding_client,
)
return self
@property
def runtime_info(self) -> Optional['ToolRuntimeInfo']:
"""Get runtime information if bound"""
return self._runtime_info
@property
def is_bound(self) -> bool:
"""Check if tool has runtime information bound"""
return self._runtime_info is not None
async def invoke(
self,
parameters: Dict[str, Any] | None = None,
keep_session: bool = True,
**kwargs
) -> ToolResult:
"""
Invoke this tool using bound runtime information.
Requires runtime info to be bound via bind_runtime_info().
If no runtime info is bound, the tool will be executed locally.
"""
params = parameters or kwargs
if self.is_bound and self._runtime_info.grounding_client:
return await self._runtime_info.grounding_client.invoke_tool(
tool=self,
parameters=params,
keep_session=keep_session,
)
return await self.arun(**params)
def _wrap_result(self, obj: Any, elapsed: float) -> ToolResult:
if isinstance(obj, ToolResult):
obj.execution_time = elapsed
return obj
if self.verbose:
logger.debug("[%s] done in %.2f s", self.schema.name, elapsed)
if isinstance(obj, (bytes, bytearray)):
obj = obj.decode("utf-8", errors="replace")
return ToolResult(
status=ToolStatus.SUCCESS,
content=str(obj),
execution_time=elapsed,
metadata={"tool": self.schema.name},
)
async def _auto_record_execution(
self,
parameters: Dict[str, Any],
result: ToolResult,
execution_time: float,
):
"""Auto-record tool execution to recording manager and quality manager."""
# Record to quality manager (for quality tracking)
await self._record_to_quality_manager(result, execution_time * 1000)
# Record to recording manager (for trajectory recording)
try:
from anytool.recording import RecordingManager
if not RecordingManager.is_recording():
return
# Check if tool has disabled outer recording (e.g., GUI agent with intermediate steps)
if hasattr(self, '_disable_outer_recording') and self._disable_outer_recording:
logger.debug(f"Skipping outer recording for {self.schema.name} (intermediate steps recorded)")
return
# Get backend and server_name from runtime_info (if bound)
backend = self.backend_type.value
server_name = None
if self.is_bound and self._runtime_info:
# Prefer runtime_info information (more accurate)
backend = self._runtime_info.backend.value
server_name = self._runtime_info.server_name
# Get screenshot (if GUI backend)
screenshot = None
if self.backend_type == BackendType.GUI and hasattr(self, 'connector'):
try:
screenshot = await self.connector.get_screenshot()
except Exception as e:
logger.debug(f"Failed to capture screenshot: {e}")
# Record tool execution with complete runtime information
await RecordingManager.record_tool_execution(
tool_name=self.schema.name,
backend=backend,
parameters=parameters,
result=result.content,
server_name=server_name,
is_success=result.is_success, # Pass actual success status from ToolResult
)
except Exception as e:
# Recording failure should not affect tool execution
logger.debug(f"Failed to auto-record tool execution: {e}")
async def _record_to_quality_manager(
self,
result: ToolResult,
execution_time_ms: float,
):
"""Record execution result to quality manager for quality tracking."""
try:
from anytool.grounding.core.quality import get_quality_manager
manager = get_quality_manager()
if manager:
await manager.record_execution(self, result, execution_time_ms)
except Exception as e:
# Quality recording failure should not affect tool execution
logger.debug(f"Failed to record to quality manager: {e}")
# keep _run for backward-compatibility / thread-pool fallback
def _run(self, **kwargs):
raise NotImplementedError
def __repr__(self):
base = f""
def __init_subclass__(cls, **kwargs):
"""
- at least implement _run or _arun
- backend_type is NOT_SET, only give a warning, allow RemoteTool to inject at runtime
"""
super().__init_subclass__(**kwargs)
if cls._arun is BaseTool._arun and cls._run is BaseTool._run:
raise ValueError(f"{cls.__name__} must implement _run() or _arun()")
if cls.backend_type is BackendType.NOT_SET:
logger.debug(
"%s.backend_type is NOT_SET; remember to override or set at runtime.",
cls.__name__,
)
================================================
FILE: anytool/grounding/core/tool/local_tool.py
================================================
"""
LocalTool.
Executes entirely inside this Python process.
"""
import asyncio
from typing import Any
from .base import BaseTool
class LocalTool(BaseTool):
def _run(self, **kwargs):
raise NotImplementedError
async def _dispatch_run(self, **kwargs) -> Any:
# Prefer subclass's own _arun if it was overridden
if self.__class__._arun is not LocalTool._arun:
return await super()._arun(**kwargs)
# Else fall back to thread-pooled _run if provided
if self.__class__._run is not LocalTool._run:
loop = asyncio.get_running_loop()
return await loop.run_in_executor(None, lambda: self._run(**kwargs))
raise NotImplementedError(
f"{self.__class__.__name__} must implement _run() or _arun()"
)
async def _arun(self, **kwargs):
return await self._dispatch_run(**kwargs)
================================================
FILE: anytool/grounding/core/tool/remote_tool.py
================================================
"""
RemoteTool.
Wrapper around a connector that calls a remote tool.
"""
from typing import Optional
from anytool.utils.logging import Logger
from ..types import BackendType, ToolResult, ToolSchema, ToolStatus
from .base import BaseTool
from anytool.grounding.core.transport.connectors import BaseConnector
logger = Logger.get_logger(__name__)
class RemoteTool(BaseTool):
backend_type = BackendType.NOT_SET
def __init__(
self,
schema: ToolSchema | None = None,
connector: Optional[BaseConnector] = None,
remote_name: str = "",
*,
verbose: bool = False,
backend: BackendType = BackendType.NOT_SET,
):
self._conn = connector
self._remote_name = remote_name or (schema.name if schema else "")
self.backend_type = backend
super().__init__(schema=schema, verbose=verbose)
async def _arun(self, **kwargs):
# If no connector, tool must be invoked via grounding_client (on-demand startup)
if self._conn is None:
raise RuntimeError(
f"Tool '{self.name}' has no connector. "
"Use grounding_client.invoke_tool() to execute it with on-demand server startup."
)
raw = await self._conn.invoke(self._remote_name, kwargs)
if hasattr(raw, 'content') and hasattr(raw, 'isError'):
content_parts = []
for item in (raw.content or []):
# Extract text from TextContent
if hasattr(item, 'text') and item.text:
content_parts.append(item.text)
# Handle ImageContent (just note its presence)
elif hasattr(item, 'data'):
content_parts.append(f"[Image data: {len(item.data) if item.data else 0} bytes]")
# Handle EmbeddedResource
elif hasattr(item, 'resource'):
content_parts.append(f"[Embedded resource: {getattr(item.resource, 'uri', 'unknown')}]")
content = "\n".join(content_parts) if content_parts else ""
is_error = getattr(raw, 'isError', False)
return ToolResult(
status=ToolStatus.ERROR if is_error else ToolStatus.SUCCESS,
content=content,
error=content if is_error else None,
)
# Handle dict response
if isinstance(raw, dict):
import json
try:
content = json.dumps(raw, ensure_ascii=False, indent=2)
except (TypeError, ValueError):
content = str(raw)
# Handle list/tuple response
elif isinstance(raw, (list, tuple)):
import json
try:
content = json.dumps(raw, ensure_ascii=False, indent=2)
except (TypeError, ValueError):
content = str(raw)
# Handle primitive types
elif isinstance(raw, (int, float, bool)):
content = str(raw)
elif isinstance(raw, str):
content = raw
# Fallback for unknown types
else:
content = str(raw)
return ToolResult(
status=ToolStatus.SUCCESS,
content=content,
)
================================================
FILE: anytool/grounding/core/transport/connectors/__init__.py
================================================
from .base import BaseConnector
from .aiohttp_connector import AioHttpConnector
__all__ = [
"BaseConnector",
"AioHttpConnector",
]
================================================
FILE: anytool/grounding/core/transport/connectors/aiohttp_connector.py
================================================
from typing import Any
from yarl import URL
import aiohttp
from ..task_managers import AioHttpConnectionManager
from .base import BaseConnector
from anytool.utils.logging import Logger
from pydantic import BaseModel
logger = Logger.get_logger(__name__)
class AioHttpConnector(BaseConnector[aiohttp.ClientSession]):
"""Generic HTTP-based connector with auto-reconnect & helper methods."""
def __init__(self, base_url: str, **session_kw):
connection_manager = AioHttpConnectionManager(base_url, **session_kw)
super().__init__(connection_manager)
self.base_url = base_url.rstrip("/")
async def connect(self) -> None:
await super().connect()
try:
async with self._connection.get(self.base_url, timeout=5) as resp:
if resp.status >= 500:
raise ConnectionError(f"HTTP {resp.status}")
except Exception as e:
await self.disconnect()
raise ConnectionError(f"Ping {self.base_url} failed: {e}")
async def _request(
self,
method: str,
path: str,
*,
json: Any | BaseModel | None = None,
data: Any | None = None,
params: dict[str, Any] | None = None,
**kw,
) -> aiohttp.ClientResponse:
if not self.is_connected:
await self.connect()
assert self._connection is not None # for mypy
url = URL(self.base_url) / path.lstrip("/")
logger.debug("%s %s", method.upper(), url)
return await self._connection.request(
method.upper(),
url,
json=self._to_json_compatible(json),
data=data,
params=params,
**kw,
)
async def get_json(self, path: str, **kw) -> Any:
response_model: type[BaseModel] | None = kw.pop("response_model", None)
resp = await self._request("GET", path, **kw)
resp.raise_for_status()
data = await resp.json()
return self._parse_as(data, response_model)
async def get_bytes(self, path: str, **kw) -> bytes:
resp = await self._request("GET", path, **kw)
resp.raise_for_status()
return await resp.read()
async def post_json(
self,
path: str,
payload: Any | BaseModel,
*,
response_model: type[BaseModel] | None = None,
**kw,
) -> Any | BaseModel:
resp = await self._request("POST", path, json=payload, **kw)
try:
data = await resp.json()
except Exception:
data = None
if resp.status >= 400:
# Extract detailed error from response body
detail = ""
if data:
detail = data.get("output") or data.get("message") or data.get("error") or ""
error_msg = f"{resp.status}, message='{resp.reason}'"
if detail:
error_msg += f", detail='{detail}'"
raise aiohttp.ClientResponseError(
resp.request_info,
resp.history,
status=resp.status,
message=error_msg,
)
return self._parse_as(data, response_model)
async def request(self, method: str, path: str, **kw) -> aiohttp.ClientResponse:
return await self._request(method, path, **kw)
async def invoke(self, name: str, params: dict[str, Any]) -> Any:
"""
Generic tool-invocation mapping for HTTP back-ends.
name rule (case-insensitive):
- "GET /path" -> GET, return JSON
- "GET_TEXT /path" -> GET, return str
- "GET_BYTES /path" -> GET, return bytes
- "POST /path" -> POST, payload = params (JSON)
- other -> default POST /{name}, payload = params
If PUT/PATCH/DELETE is needed in the future, it can be reused in _handle_other_json.
"""
verb_path = name.strip().split(maxsplit=1)
verb = verb_path[0].upper()
path = verb_path[1] if len(verb_path) == 2 else verb_path[0]
if verb == "GET_BYTES":
return await self.get_bytes(path, params=params)
if verb == "GET_TEXT":
resp = await self._request("GET", path, params=params)
resp.raise_for_status()
return await resp.text()
if verb in {"GET", "POST"} and len(verb_path) == 2:
if verb == "GET":
return await self.get_json(path, params=params)
return await self.post_json(path, payload=params)
if verb in {"PUT", "PATCH", "DELETE"} and len(verb_path) == 2:
return await self._handle_other_json(verb, path, params)
return await self.post_json(name, payload=params)
async def _handle_other_json(self, method: str, path: str, params: dict[str, Any]):
"""Fallback implementation for PUT/PATCH/DELETE returning JSON/text, can be overridden by subclasses."""
resp = await self._request(method, path, json=params)
resp.raise_for_status()
try:
return await resp.json()
except Exception:
return await resp.text()
================================================
FILE: anytool/grounding/core/transport/connectors/base.py
================================================
"""
Base connector abstraction.
A connector is a very thin wrapper-class that owns a *connection manager*
(e.g. AioHttpConnectionManager, AsyncContextConnectionManager, …).
It exposes a unified `connect / disconnect / is_connected` lifecycle and
defines an abstract `request()` method which concrete back-ends must
implement.
"""
import asyncio
from abc import ABC, abstractmethod
from typing import Any, Generic, TypeVar, Type
from pydantic import BaseModel
from ..task_managers import BaseConnectionManager
T = TypeVar("T") # The object returned by manager.start(): session / connection
class BaseConnector(ABC, Generic[T]):
"""
Generic connector that delegates the heavy lifting to the supplied
*connection manager*. Concrete subclasses only need to implement
their own `request()` method.
"""
def __init__(self, connection_manager: BaseConnectionManager[T]):
self._connection_manager = connection_manager # e.g. AioHttpConnectionManager instance
# The raw connection object returned by the manager, for reusing the established long-term connection
self._connection: T | None = None
self._connected = False
async def connect(self) -> None:
"""Create the underlying session/connection via the manager."""
if self._connected:
return
try:
# Hook: before connection
await self._before_connect()
# Start the connection manager
self._connection = await self._connection_manager.start()
# Hook: after connection established
await self._after_connect()
# Mark as connected
self._connected = True
except Exception:
# Clean up on failure
await self._cleanup_on_connect_failure()
raise
async def disconnect(self) -> None:
"""Close the session/connection and reset state.
Ensures proper cleanup of all resources including aiohttp sessions.
"""
if not self._connected:
return
# Hook: before disconnection
await self._before_disconnect()
# Stop the connection manager
if self._connection_manager:
await self._connection_manager.stop()
self._connection = None
# Hook: after disconnection
await self._after_disconnect()
self._connected = False
async def _before_connect(self) -> None:
"""Hook called before establishing connection. Override in subclasses if needed."""
pass
async def _after_connect(self) -> None:
"""Hook called after connection is established. Override in subclasses if needed."""
pass
async def _cleanup_on_connect_failure(self) -> None:
"""Hook called when connection fails. Override in subclasses if needed."""
if self._connection_manager:
try:
await self._connection_manager.stop()
except Exception:
pass
self._connection = None
async def _before_disconnect(self) -> None:
"""Hook called before disconnection. Override in subclasses if needed."""
pass
async def _after_disconnect(self) -> None:
"""Hook called after disconnection. Override in subclasses if needed."""
pass
@property
def is_connected(self) -> bool:
"""Return True iff `connect()` has completed successfully."""
return self._connected
@staticmethod
def _to_json_compatible(obj: Any) -> Any:
"""
Convert a Pydantic BaseModel to a JSON-serialisable dict (by_alias=True).
Leave all other types unchanged.
"""
if isinstance(obj, BaseModel):
return obj.model_dump(by_alias=True)
return obj
@staticmethod
def _parse_as(data: Any, model_cls: "Type[BaseModel] | None" = None) -> Any:
"""
Try to parse *data* into *model_cls* (a subclass of BaseModel).
If `model_cls` is None or not a subclass of BaseModel, return the original data.
"""
if model_cls is None:
return data
if isinstance(model_cls, type) and issubclass(model_cls, BaseModel):
return model_cls.model_validate(data)
return data
@abstractmethod
async def invoke(self, name: str, params: dict[str, Any]) -> Any:
"""
Unified RPC entry for all tools.
Sub-class maps this to its actual RPC like call_tool / run_cmd.
"""
raise NotImplementedError
@abstractmethod
async def request(self, *args: Any, **kwargs: Any) -> Any:
"""Abstract RPC / HTTP / WS request method to be implemented by child classes."""
raise NotImplementedError("This connector has not implemented 'request'")
================================================
FILE: anytool/grounding/core/transport/task_managers/__init__.py
================================================
from .base import BaseConnectionManager
from .aiohttp_connection_manager import AioHttpConnectionManager
from .async_ctx import AsyncContextConnectionManager
from .placeholder import PlaceholderConnectionManager
from .noop import NoOpConnectionManager
__all__ = [
"BaseConnectionManager",
"AioHttpConnectionManager",
"AsyncContextConnectionManager",
"PlaceholderConnectionManager",
"NoOpConnectionManager",
]
================================================
FILE: anytool/grounding/core/transport/task_managers/aiohttp_connection_manager.py
================================================
"""
Long-lived aiohttp ClientSession manager based on AsyncContextConnectionManager.
It keeps a single ClientSession open during the lifetime of a backend
session, saving the overhead of creating and closing a TCP connection
for every request.
"""
from typing import Optional
import aiohttp
from .async_ctx import AsyncContextConnectionManager
class AioHttpConnectionManager(
AsyncContextConnectionManager[aiohttp.ClientSession, ...]
):
"""Manage a persistent aiohttp.ClientSession."""
def __init__(
self,
base_url: str,
headers: Optional[dict[str, str]] = None,
timeout: float = 30,
):
self.base_url = base_url.rstrip("/")
timeout_cfg = aiohttp.ClientTimeout(total=timeout)
super().__init__(
aiohttp.ClientSession,
timeout=timeout_cfg,
headers=headers or {},
)
self._logger.debug(
"Init AioHttpConnectionManager base_url=%s timeout=%s", self.base_url, timeout
)
async def _establish_connection(self) -> aiohttp.ClientSession:
"""Create and enter the aiohttp.ClientSession context."""
session = await super()._establish_connection()
self._logger.debug("aiohttp ClientSession created")
return session
async def _close_connection(self) -> None:
"""Close the session and then call the parent cleanup.
Ensures proper cleanup even if close() fails.
"""
if self._ctx:
try:
self._logger.debug("Closing aiohttp ClientSession")
await self._ctx.close()
# Give aiohttp time to finish its internal cleanup callbacks
import asyncio
await asyncio.sleep(0.1)
except Exception as e:
self._logger.warning(f"Error closing aiohttp ClientSession: {e}")
await super()._close_connection()
================================================
FILE: anytool/grounding/core/transport/task_managers/async_ctx.py
================================================
"""
Generic connection manager based on an *async context manager*.
Give it any factory that returns an async–context-manager.
"""
import sys
from typing import Any, Callable, Generic, Optional, ParamSpec, TypeVar
from .base import BaseConnectionManager
# BaseExceptionGroup only exists in Python 3.11+
if sys.version_info >= (3, 11):
_BaseExceptionGroup = BaseExceptionGroup
else:
# Dummy class for older Python versions
class _BaseExceptionGroup(Exception):
pass
T = TypeVar("T") # Return type of the async context
P = ParamSpec("P") # Parameter specification of the factory
class AsyncContextConnectionManager(Generic[T, P], BaseConnectionManager[T]):
def __init__(self,
ctx_factory: Callable[P, Any],
*args: P.args,
**kwargs: P.kwargs):
super().__init__()
self._factory = ctx_factory
self._factory_args = args
self._factory_kwargs = kwargs
self._ctx: Optional[Any] = None
async def _establish_connection(self) -> T:
"""Create the context manager and enter it."""
self._logger.debug("Creating context via %s", self._factory.__name__)
try:
self._ctx = self._factory(*self._factory_args, **self._factory_kwargs)
result: T = await self._ctx.__aenter__()
self._logger.debug("Context %s entered successfully", self._factory.__name__)
return result
except Exception as e:
# Check if this is a benign ExceptionGroup/TaskGroup error
# These occur during concurrent initialization and cleanup
error_msg = str(e).lower()
is_taskgroup_error = (
"unhandled errors in a taskgroup" in error_msg or
"cancel scope in a different task" in error_msg or
"exceptiongroup" in type(e).__name__.lower()
)
if is_taskgroup_error:
# This is a benign race condition during concurrent connection setup
# Log at debug level and re-raise to trigger retry logic
self._logger.debug(
f"Benign TaskGroup race condition during {self._factory.__name__} connection: {type(e).__name__}"
)
# Clean up the partially created context
if self._ctx is not None:
try:
await self._ctx.__aexit__(None, None, None)
except Exception:
pass # Ignore cleanup errors
self._ctx = None
raise
else:
# Real error - log at error level
self._logger.error(f"Error establishing connection via {self._factory.__name__}: {e}")
raise
async def _close_connection(self) -> None:
"""Exit the context manager if it exists.
Uses try-finally to ensure ctx is cleared even if __aexit__ fails.
This prevents resource leaks when cleanup encounters errors.
"""
if self._ctx is not None:
try:
self._logger.debug("Exiting context %s", self._factory.__name__)
# Give subprocesses a moment to flush buffers before closing
import asyncio
await asyncio.sleep(0.05)
# Try to exit the context, but catch all possible exceptions
try:
await self._ctx.__aexit__(None, None, None)
except BaseException as e:
# Catch absolutely everything including SystemExit, KeyboardInterrupt, etc.
# Check if it's a benign error
benign_error_types = (
BrokenPipeError, ConnectionResetError, ValueError,
OSError, IOError, ProcessLookupError, RuntimeError,
GeneratorExit
)
is_benign = False
# Check direct exception type
if isinstance(e, benign_error_types):
is_benign = True
# Check for BaseExceptionGroup (Python 3.11+)
elif hasattr(e, 'exceptions'):
# It's an exception group, check all sub-exceptions
is_benign = all(isinstance(sub_e, benign_error_types) for sub_e in e.exceptions)
if is_benign:
self._logger.debug(f"Benign cleanup error for {self._factory.__name__}: {type(e).__name__}")
else:
self._logger.warning(f"Error during context exit for {self._factory.__name__}: {type(e).__name__}: {e}")
# Don't re-raise - we want cleanup to complete
except Exception as e:
# Catch any other unexpected errors in the outer try block
self._logger.warning(f"Unexpected error during cleanup for {self._factory.__name__}: {e}")
finally:
self._ctx = None
================================================
FILE: anytool/grounding/core/transport/task_managers/base.py
================================================
"""
Base connection manager for all backend connectors.
This module provides an abstract base class for different types of connection
managers used in all backend connectors.
Flow: start() → launch_connection_task() → call subclass _establish_connection() → notify ready → maintain connection until stop() → call subclass _close_connection() → cleanup
"""
import asyncio
from abc import ABC, abstractmethod
from typing import Generic, TypeVar
from anytool.utils.logging import Logger
T = TypeVar("T")
class BaseConnectionManager(Generic[T], ABC):
"""Abstract base class for connection managers.
This class defines the interface for different types of connection managers
used with all backend connectors.
"""
def __init__(self):
"""Initialize a new connection manager."""
self._ready_event = asyncio.Event()
self._done_event = asyncio.Event()
self._exception: Exception | None = None
self._connection: T | None = None
self._task: asyncio.Task | None = None
self._logger = Logger.get_logger(f"{__name__}.{self.__class__.__name__}")
@abstractmethod
async def _establish_connection(self) -> T:
"""Establish the connection.
This method should be implemented by subclasses to establish
the specific type of connection needed.
Returns:
The established connection.
Raises:
Exception: If connection cannot be established.
"""
pass
@abstractmethod
async def _close_connection(self) -> None:
"""Close the connection.
This method should be implemented by subclasses to close
the specific type of connection.
"""
pass
async def start(self, timeout: float | None = None) -> T:
"""Start the connection manager and establish a connection.
Args:
timeout: Optional timeout in seconds. If None, waits indefinitely.
If specified, will cancel the background task on timeout.
Returns:
The established connection.
Raises:
TimeoutError: If connection establishment times out.
Exception: If connection cannot be established.
"""
# Reset state
self._ready_event.clear()
self._done_event.clear()
self._exception = None
# Create a task to establish and maintain the connection
self._task = asyncio.create_task(self._connection_task(), name=f"{self.__class__.__name__}_task")
# Wait for the connection to be ready or fail (with optional timeout)
try:
if timeout is not None:
await asyncio.wait_for(self._ready_event.wait(), timeout=timeout)
else:
await self._ready_event.wait()
except asyncio.TimeoutError:
# Timeout! Cancel the background task
self._logger.warning(f"Connection establishment timed out after {timeout}s, cancelling...")
if self._task and not self._task.done():
self._task.cancel()
try:
await asyncio.wait_for(self._task, timeout=2.0) # Give it 2s to cleanup
except (asyncio.CancelledError, asyncio.TimeoutError):
pass
except Exception as e:
self._logger.debug(f"Error during task cancellation: {e}")
raise TimeoutError(f"Connection establishment timed out after {timeout}s")
# If there was an exception, raise it
if self._exception:
# Check if this is a benign TaskGroup race condition
error_msg = str(self._exception).lower()
is_benign_taskgroup_error = (
"unhandled errors in a taskgroup" in error_msg or
"cancel scope in a different task" in error_msg or
"exceptiongroup" in type(self._exception).__name__.lower()
)
if is_benign_taskgroup_error:
# Log as debug - this is expected and will be retried
self._logger.debug(f"Benign TaskGroup race condition, will retry: {type(self._exception).__name__}")
else:
# Real error - log at error level
self._logger.error(f"Failed to start connection: {self._exception}")
raise self._exception
# Return the connection
if self._connection is None:
error_msg = "Connection was not established"
self._logger.error(error_msg)
raise RuntimeError(error_msg)
self._logger.info("Connection manager started successfully")
return self._connection
async def stop(self, timeout: float = 5.0) -> None:
"""Stop the connection manager and close the connection.
Args:
timeout: Maximum time to wait for cleanup (default 5s).
Ensures all async resources (including aiohttp sessions) are properly closed.
"""
if self._task and not self._task.done():
self._task.cancel()
try:
await asyncio.wait_for(self._task, timeout=timeout)
except asyncio.TimeoutError:
self._logger.warning(f"Task cleanup timed out after {timeout}s")
except asyncio.CancelledError:
pass # Expected
except Exception as e:
self._logger.warning(f"Error stopping task: {e}")
# Wait for the connection to be done (with timeout)
try:
await asyncio.wait_for(self._done_event.wait(), timeout=timeout)
except asyncio.TimeoutError:
self._logger.warning(f"Done event wait timed out after {timeout}s")
self._logger.info("Connection manager stopped")
def get_streams(self) -> T | None:
"""Get the current connection streams.
Returns:
The current connection (typically a tuple of read_stream, write_stream) or None if not connected.
"""
return self._connection
async def _connection_task(self) -> None:
"""Run the connection task.
This task establishes and maintains the connection until cancelled.
"""
try:
# Establish the connection
self._connection = await self._establish_connection()
self._logger.debug("Connection established")
# Signal that the connection is ready
self._ready_event.set()
# Wait indefinitely until cancelled
try:
await asyncio.Event().wait()
except asyncio.CancelledError:
raise
except asyncio.CancelledError:
raise
except Exception as e:
# Store the exception
self._exception = e
# Check if this is a benign TaskGroup race condition
error_msg = str(e).lower()
is_benign_taskgroup_error = (
"unhandled errors in a taskgroup" in error_msg or
"cancel scope in a different task" in error_msg or
"exceptiongroup" in type(e).__name__.lower()
)
if is_benign_taskgroup_error:
# Log as debug - this is expected during concurrent connection setup
self._logger.debug(f"Benign TaskGroup race condition in connection task: {type(e).__name__}")
else:
# Real error - log at error level
self._logger.error(f"Connection task failed: {e}")
# Signal that the connection is ready (with error)
self._ready_event.set()
finally:
# Close the connection if it was established
if self._connection is not None:
try:
await self._close_connection()
except Exception as e:
self._logger.warning(f"Error closing connection: {e}")
self._connection = None
# Signal that the connection is done
self._done_event.set()
================================================
FILE: anytool/grounding/core/transport/task_managers/noop.py
================================================
"""No-op connection manager for local (in-process) connectors.
Local connectors execute commands directly via subprocess, so they don't
need a real network connection. This manager satisfies the
BaseConnectionManager interface that BaseConnector requires.
"""
import asyncio
from typing import Any
from .base import BaseConnectionManager
class NoOpConnectionManager(BaseConnectionManager[Any]):
"""Connection manager that immediately reports 'ready' without
establishing any real connection.
Used by LocalShellConnector and LocalGUIConnector.
"""
async def _establish_connection(self) -> Any:
"""No-op: return a sentinel value."""
return True
async def _close_connection(self) -> None:
"""No-op: nothing to close."""
pass
================================================
FILE: anytool/grounding/core/transport/task_managers/placeholder.py
================================================
from typing import Any
from .base import BaseConnectionManager
class PlaceholderConnectionManager(BaseConnectionManager[Any]):
"""A placeholder connection manager that does nothing.
This is used by connectors that set up their real connection manager
during the connect() phase.
"""
async def _establish_connection(self) -> Any:
"""Establish the connection (placeholder implementation)."""
raise NotImplementedError("PlaceholderConnectionManager should be replaced before use")
async def _close_connection(self) -> None:
"""Close the connection (placeholder implementation)."""
pass
================================================
FILE: anytool/grounding/core/types.py
================================================
from enum import Enum
from datetime import datetime
from typing import Any, Dict, Generic, List, TypeVar, Optional
import jsonschema
from pydantic import BaseModel, Field, ConfigDict
# Pydantic v2 compatibility
try:
from pydantic import RootModel
PYDANTIC_V2 = True
except ImportError:
PYDANTIC_V2 = False
class BackendType(str, Enum):
MCP = "mcp"
SHELL = "shell"
WEB = "web"
GUI = "gui"
SYSTEM = "system"
NOT_SET = "not_set"
class ToolStatus(str, Enum):
SUCCESS = "success"
ERROR = "error"
class SessionStatus(str, Enum):
CONNECTED = "connected"
DISCONNECTED = "disconnected"
CONNECTING = "connecting"
ProgressToken = str | int
RequestId = str | int
RequestParamsT = TypeVar("RequestParamsT", bound=BaseModel | Dict[str, Any] | None)
NotificationParamsT = TypeVar("NotificationParamsT", bound=BaseModel | Dict[str, Any] | None)
MethodT = TypeVar("MethodT", bound=str)
class BaseEntity(BaseModel):
metadata: Dict[str, Any] = Field(default_factory=dict)
model_config = ConfigDict(extra="allow")
class JsonRpcBase(BaseEntity):
jsonrpc: str = "2.0"
class RpcMessage(JsonRpcBase, Generic[MethodT, RequestParamsT]):
method: MethodT
params: RequestParamsT
class Request(RpcMessage[MethodT, RequestParamsT]):
id: RequestId | None = None # id is None means Notification
class Notification(RpcMessage[MethodT, NotificationParamsT]):
pass
class Result(JsonRpcBase):
pass
class ErrorData(BaseEntity):
code: int
message: str
data: Any | None = None
class ToolResult(Result):
"""Tool execution result"""
status: ToolStatus
content: Any = ""
error: ErrorData | str | None = None
execution_time: float | None = None
@property
def is_success(self) -> bool: return self.status == ToolStatus.SUCCESS
@property
def is_error(self) -> bool: return self.status == ToolStatus.ERROR
class SecurityPolicy(BaseEntity):
allow_shell_commands: bool = True
allow_network_access: bool = True
allow_file_access: bool = True
allowed_domains: List[str] = Field(default_factory=list)
blocked_commands: List[str] = Field(default_factory=list)
sandbox_enabled: bool = False
@classmethod
def from_dict(cls, data: Dict) -> "SecurityPolicy":
"""
Create SecurityPolicy from configuration dict.
Supports two formats for blocked_commands:
1. List format (applies to all OS): ["cmd1", "cmd2"]
2. Dict format (OS-specific):
{
"common": ["cmd1", "cmd2"],
"linux": ["cmd3"],
"darwin": ["cmd4"],
"windows": ["cmd5"]
}
When using dict format, merges 'common' commands with current OS-specific commands.
"""
import sys
import platform
processed_data = {}
for k, v in data.items():
if k not in cls.model_fields:
continue
# Special handling for blocked_commands
if k == "blocked_commands":
if isinstance(v, dict):
# Dict format: merge common + OS-specific
blocked_list = list(v.get("common", []))
# Determine current OS
system = sys.platform
if system.startswith("linux"):
os_key = "linux"
elif system == "darwin":
os_key = "darwin"
elif system.startswith("win"):
os_key = "windows"
else:
os_key = None
# Merge OS-specific commands
if os_key and os_key in v:
blocked_list.extend(v[os_key])
processed_data[k] = blocked_list
elif isinstance(v, list):
# List format: use as-is
processed_data[k] = v
else:
# Invalid format, use empty list
processed_data[k] = []
else:
processed_data[k] = v
return cls(**processed_data)
def check(self, *, command: str | None = None, domain: str | None = None) -> bool:
"""
return True if allowed, False if denied.
Command check uses token-level matching to prevent simple space/escape bypasses.
"""
import shlex
# Shell / Python command check
if command:
if not self.allow_shell_commands:
return False
tokens = [t.lower() for t in shlex.split(command, posix=True)]
blocked_set = {b.lower() for b in self.blocked_commands}
if any(tok in blocked_set for tok in tokens):
return False
# Network access check
if domain:
if not self.allow_network_access:
return False
if self.allowed_domains and domain not in self.allowed_domains:
return False
return True
def find_dangerous_tokens(self, command: str) -> List[str]:
"""
Find and return all dangerous tokens in the command.
Returns empty list if no dangerous tokens found.
"""
import shlex
if not command:
return []
try:
tokens = [t.lower() for t in shlex.split(command, posix=True)]
except ValueError:
# If shlex.split fails, fall back to simple split
tokens = [t.lower() for t in command.split()]
blocked_set = {b.lower() for b in self.blocked_commands}
dangerous = [tok for tok in tokens if tok in blocked_set]
return dangerous
class ToolSchema(BaseEntity):
name: str
description: str | None = None
parameters: Dict[str, Any] = Field(default_factory=dict) # JSON Schema, optional
return_schema: Dict[str, Any] = Field(default_factory=dict)
examples: List[dict] = Field(default_factory=list)
usage_hint: str | None = None
latency_hint: str | None = None
backend_type: BackendType
security_policy: SecurityPolicy | None = None
def validate_parameters(self, params: Dict[str, Any], *, raise_exc: bool = False) -> bool:
"""use jsonschema to validate parameters
Returns True if parameters are valid or if tool has no parameters.
"""
# If tool has no parameters defined and no parameters are provided, validation passes
if not self.parameters and not params:
return True
# If tool has no parameters defined but parameters are provided, validation fails
if not self.parameters and params:
if raise_exc:
raise ValueError(f"Tool '{self.name}' does not accept any parameters, but got: {list(params.keys())}")
return False
try:
jsonschema.validate(params, self.parameters)
return True
except jsonschema.ValidationError:
if raise_exc:
raise
return False
def is_allowed(self, *, command: str | None = None, domain: str | None = None) -> bool:
"""check security policy"""
return self.security_policy.check(command=command, domain=domain) if self.security_policy else True
class SessionConfig(BaseEntity):
session_name: str
backend_type: BackendType
connection_params: Dict[str, Any] = Field(default_factory=dict)
timeout: int = 30
max_retries: int = 3
auto_reconnect: bool = True
auto_connect: bool = True
health_check_interval: int = 5
custom_settings: Dict[str, Any] = Field(default_factory=dict)
class SessionInfo(SessionConfig):
status: SessionStatus
created_at: datetime
last_activity: datetime
class SandboxOptions(BaseEntity):
api_key: str
"""Direct API key for sandbox provider (e.g., E2B API key).
If not provided, will use E2B_API_KEY environment variable."""
sandbox_template_id: Optional[str] = None
"""Template ID for the sandbox environment.
Default: 'base'"""
supergateway_command: Optional[str] = None
"""Command to run supergateway.
Default: 'npx -y supergateway'"""
# ClientMessage: Only available in Pydantic v2
if PYDANTIC_V2:
class ClientMessage(
RootModel[
Request[Any, str] | Notification[Any, str]
]
):
"""
Unified deserialization entry: `ClientMessage.model_validate_json(raw_bytes)`
"""
else:
# Pydantic v1 fallback: not used in current codebase
ClientMessage = None # type: ignore
================================================
FILE: anytool/llm/__init__.py
================================================
from .client import LLMClient
================================================
FILE: anytool/llm/client.py
================================================
import litellm
import json
import asyncio
import time
from typing import List, Sequence, Union, Dict, Optional
from dotenv import load_dotenv
from openai.types.chat import ChatCompletionToolParam
from anytool.grounding.core.types import ToolSchema, ToolResult, ToolStatus
from anytool.grounding.core.tool import BaseTool
from anytool.utils.logging import Logger
load_dotenv()
# Disable LiteLLM verbose logging to prevent stdout blocking with large tool schemas
litellm.set_verbose = False
litellm.suppress_debug_info = True
logger = Logger.get_logger(__name__)
def _sanitize_schema(params: Dict) -> Dict:
"""Sanitize tool parameter schema to comply with Claude API requirements.
Fixes common issues:
- Empty object schemas (no properties, no required)
- Missing required fields for Claude compatibility
"""
if not params:
return {"type": "object", "properties": {}, "required": []}
# Deep copy to avoid modifying the original
import copy
sanitized = copy.deepcopy(params)
# Anthropic API requires top-level type to be 'object'
# If it's not an object, wrap the schema as a property of an object
top_level_type = sanitized.get("type")
if top_level_type and top_level_type != "object":
# Wrap non-object schema as a single property called "value"
logger.debug(f"[SCHEMA_SANITIZE] Wrapping non-object schema (type={top_level_type}) into object")
wrapped = {
"type": "object",
"properties": {
"value": sanitized # The original schema becomes a property
},
"required": ["value"] # Make it required
}
sanitized = wrapped
# If type is object but missing properties/required, add them
if sanitized.get("type") == "object":
if "properties" not in sanitized:
sanitized["properties"] = {}
if "required" not in sanitized:
sanitized["required"] = []
# Remove non-standard fields that may cause issues (like 'title')
sanitized.pop("title", None)
# Recursively sanitize nested properties
if "properties" in sanitized and isinstance(sanitized["properties"], dict):
for prop_name, prop_schema in list(sanitized["properties"].items()):
if isinstance(prop_schema, dict):
# Remove title from nested properties
prop_schema.pop("title", None)
return sanitized
def _schema_to_openai(schema: ToolSchema) -> ChatCompletionToolParam:
"""Convert ToolSchema to OpenAI ChatCompletion tool format"""
function_def = {
"name": schema.name,
"description": schema.description or "",
}
# Sanitize and add parameters
if schema.parameters:
sanitized = _sanitize_schema(schema.parameters)
function_def["parameters"] = sanitized
# Debug: verify sanitization worked
if "title" in schema.parameters and "title" not in sanitized:
logger.debug(f"Sanitized tool '{schema.name}': removed title")
else:
# Claude requires parameters field even if empty
function_def["parameters"] = {"type": "object", "properties": {}, "required": []}
return {
"type": "function",
"function": function_def
}
def _prepare_tools_for_llmclient(
tools: List[BaseTool] | None,
fmt: str = "openai",
) -> tuple[Sequence[Union[ToolSchema, ChatCompletionToolParam]], Dict[str, BaseTool]]:
"""Convert BaseTool list to LLMClient usable format, with deduplication.
Args:
tools: BaseTool instance list (should be obtained from GroundingClient and bound to runtime_info)
if None or empty list, return empty list
fmt: output format, "openai" for OpenAI format
"""
if not tools:
return [], {}
if fmt == "openai":
result = []
tool_map = {} # llm_name -> BaseTool
name_count = {}
for tool in tools:
name = tool.schema.name
name_count[name] = name_count.get(name, 0) + 1
seen_names = set()
for tool in tools:
original_name = tool.schema.name
if name_count[original_name] > 1:
server_name = "unknown"
if tool.is_bound and tool.runtime_info and tool.runtime_info.server_name:
server_name = tool.runtime_info.server_name
llm_name = f"{server_name}__{original_name}"
else:
llm_name = original_name
if llm_name in seen_names:
logger.warning(f"[TOOL_DEDUP] Skipping duplicate tool: {llm_name}")
continue
seen_names.add(llm_name)
tool_param = _schema_to_openai(tool.schema)
tool_param["function"]["name"] = llm_name
result.append(tool_param)
tool_map[llm_name] = tool
if llm_name != original_name:
logger.info(f"[TOOL_RENAME] {original_name} -> {llm_name}")
logger.info(f"[SCHEMA_SANITIZE] Prepared {len(result)} tools for LLM (from {len(tools)} total)")
return result, tool_map
tool_map = {tool.schema.name: tool for tool in tools}
return [tool.schema for tool in tools], tool_map
DEFAULT_SUMMARIZE_THRESHOLD_CHARS = 200000 # ~50K tokens, lowered from 400K to prevent context overflow
MAX_TOOL_RESULT_CHARS = 200000 # Fallback truncation limit when summarization fails (~50K tokens)
async def _summarize_tool_result(
content: str,
tool_name: str,
task: str = "",
model: str = "openrouter/anthropic/claude-sonnet-4.5",
timeout: float = 60.0
) -> str:
"""Use LLM to summarize large tool results."""
try:
logger.info(f"Summarizing tool result from '{tool_name}': {len(content):,} chars")
# Pre-truncate if content is too large for the model (leave room for prompt + output)
# Assuming ~4 chars per token, 200K tokens limit, 8K output, ~500 tokens for prompt
# Safe input limit: (200K - 8K - 0.5K) * 4 = ~766K chars, but be conservative at 400K
max_input_chars = 400000
if len(content) > max_input_chars:
logger.warning(f"Pre-truncating content for summarization: {len(content):,} -> {max_input_chars:,} chars")
content = content[:max_input_chars] + f"\n\n[TRUNCATED for summarization: original was {len(content):,} chars]"
task_hint = f"\n\nUser's task: {task}\nSummarize with focus on information relevant to this task." if task else ""
prompt = f"""Tool '{tool_name}' returned a large result ({len(content):,} chars). Summarize it concisely.{task_hint}
**Guidelines:**
- Structured data (coordinates, steps, etc.): Keep key summary (totals, start/end), omit repetitive details.
- Markup content (HTML, XML): Extract text and key data only, ignore tags/scripts.
- Long documents: Keep structure outline and essential sections.
- Lists/arrays: Summarize count and most relevant items.
- Always preserve: numbers, URLs, file paths, IDs, key identifiers.
Content:
{content}
Concise summary:"""
response = await asyncio.wait_for(
litellm.acompletion(
model=model,
messages=[{"role": "user", "content": prompt}],
timeout=timeout
),
timeout=timeout + 5
)
summary = response.choices[0].message.content.strip()
result = f"[SUMMARY of {len(content):,} chars]\n{summary}"
logger.info(f"Tool result summarized: {len(content):,} -> {len(result):,} chars")
return result
except Exception as e:
logger.warning(f"Summarization failed for '{tool_name}': {e}")
return None
async def _tool_result_to_message_async(
result: ToolResult,
*,
tool_call_id: str,
tool_name: str,
task: str = "",
summarize_threshold: int = DEFAULT_SUMMARIZE_THRESHOLD_CHARS,
summarize_model: str = "openrouter/anthropic/claude-sonnet-4.5",
enable_summarization: bool = True
) -> Dict:
"""Convert ToolResult to LLMClient usable message format with LLM summarization for large results.
Args:
result: Tool execution result
tool_call_id: OpenAI tool_call ID
tool_name: Tool name
task: User's original task for context-aware summarization
summarize_threshold: If content exceeds this, use LLM summarization
summarize_model: Model to use for summarization
enable_summarization: Whether to enable LLM summarization
Returns:
OpenAI ChatCompletion tool message (text only)
"""
if result.is_error:
text_content = f"[ERROR] {result.error or 'unknown error'}"
else:
text_content = (
result.content
if isinstance(result.content, str)
else json.dumps(result.content, ensure_ascii=False, default=str)
)
original_len = len(text_content)
# Use LLM summarization if content exceeds threshold
if original_len > summarize_threshold and enable_summarization:
summary = await _summarize_tool_result(text_content, tool_name, task, summarize_model)
if summary:
text_content = summary
elif original_len > MAX_TOOL_RESULT_CHARS:
# Fallback: truncate if summarization failed and content is too large
truncate_msg = f"\n\n[TRUNCATED: Original content was {original_len:,} chars, showing first {MAX_TOOL_RESULT_CHARS:,}]"
text_content = text_content[:MAX_TOOL_RESULT_CHARS - len(truncate_msg)] + truncate_msg
logger.warning(f"Tool result truncated for '{tool_name}': {original_len:,} -> {len(text_content):,} chars (summarization failed)")
return {
"role": "tool",
"name": tool_name,
"content": text_content,
"tool_call_id": tool_call_id,
}
async def _execute_tool_call(
tool: BaseTool,
openai_tool_call: Dict,
) -> ToolResult:
"""Execute LLMClient returned tool_call
Args:
tool: BaseTool instance (must be obtained from GroundingClient and bound to runtime_info)
openai_tool_call: LLMClient usable tool_call object, contains id, type, function etc. fields
"""
if not tool.is_bound:
raise ValueError(
f"Tool '{tool.schema.name}' is not bound to runtime_info. "
f"Please ensure tools are obtained from GroundingClient.list_tools() "
f"with bind_runtime_info=True"
)
func = openai_tool_call["function"]
arguments = func.get("arguments", "{}")
if isinstance(arguments, str):
arguments = json.loads(arguments or "{}")
# Filter out parameters that are not in the tool's schema
if isinstance(arguments, dict) and tool.schema.parameters:
# Get valid parameter names from tool schema (JSON Schema format)
schema_params = tool.schema.parameters
valid_params = set()
if isinstance(schema_params, dict) and "properties" in schema_params:
valid_params = set(schema_params["properties"].keys())
# Check for invalid parameters
invalid_params = []
for param_name in list(arguments.keys()):
if param_name == "skip_visual_analysis":
invalid_params.append(param_name)
continue
# Check if parameter is in the tool's schema
if valid_params and param_name not in valid_params:
invalid_params.append(param_name)
# Remove invalid parameters
for param in invalid_params:
arguments.pop(param)
logger.debug(
f"Removed parameter '{param}' from {tool.schema.name} "
f"(not in tool schema)"
)
return await tool.invoke(
parameters=arguments,
keep_session=True
)
class LLMClient:
"""LLMClient class for single round call"""
def __init__(
self,
model: str = "openrouter/anthropic/claude-sonnet-4.5",
enable_thinking: bool = False,
rate_limit_delay: float = 0.0,
max_retries: int = 3,
retry_delay: float = 1.0,
timeout: float = 120.0,
summarize_threshold_chars: int = DEFAULT_SUMMARIZE_THRESHOLD_CHARS,
enable_tool_result_summarization: bool = True,
**litellm_kwargs
):
"""
Args:
model: LLM model identifier
enable_thinking: Whether to enable extended thinking mode
rate_limit_delay: Minimum delay between API calls in seconds (0 = no delay)
max_retries: Maximum number of retries on rate limit errors
retry_delay: Initial delay between retries in seconds (exponential backoff)
timeout: Request timeout in seconds (default: 120s)
summarize_threshold_chars: If tool result exceeds this threshold, use LLM to
summarize the result (default: 50000 chars ≈ 12.5K tokens)
enable_tool_result_summarization: Whether to enable LLM-based summarization for
large tool results (default: True)
**litellm_kwargs: Additional litellm parameters
"""
self.model = model
self.enable_thinking = enable_thinking
self.rate_limit_delay = rate_limit_delay
self.max_retries = max_retries
self.retry_delay = retry_delay
self.timeout = timeout
self.summarize_threshold_chars = summarize_threshold_chars
self.enable_tool_result_summarization = enable_tool_result_summarization
self.litellm_kwargs = litellm_kwargs
self._logger = Logger.get_logger(__name__)
self._last_call_time = 0.0
async def _rate_limit(self):
"""Apply rate limiting by adding delay between API calls"""
if self.rate_limit_delay > 0:
current_time = time.time()
time_since_last_call = current_time - self._last_call_time
if time_since_last_call < self.rate_limit_delay:
sleep_time = self.rate_limit_delay - time_since_last_call
self._logger.debug(f"Rate limiting: waiting {sleep_time:.2f}s before next API call")
await asyncio.sleep(sleep_time)
self._last_call_time = time.time()
async def _call_with_retry(self, **completion_kwargs):
"""Call LLM with backoff retry on rate limit errors
Timeout and retry strategy:
- Single call timeout: self.timeout (default 120s)
- Rate limit retry delays: 60s, 90s, 120s
- Total max time: timeout * max_retries + sum(retry_delays)
"""
last_exception = None
for attempt in range(self.max_retries):
try:
# Add timeout to the completion call
response = await asyncio.wait_for(
litellm.acompletion(**completion_kwargs),
timeout=self.timeout
)
return response
except asyncio.TimeoutError:
self._logger.error(
f"LLM call timed out after {self.timeout}s (attempt {attempt + 1}/{self.max_retries})"
)
last_exception = TimeoutError(f"LLM call timed out after {self.timeout}s")
if attempt < self.max_retries - 1:
# Retry on timeout with shorter delay
self._logger.info(f"Retrying after {self.retry_delay}s delay...")
await asyncio.sleep(self.retry_delay)
continue
else:
raise last_exception
except Exception as e:
last_exception = e
error_str = str(e).lower()
# Check if it's a retryable error
is_rate_limit = any(
keyword in error_str
for keyword in ['rate limit', 'rate_limit', 'too many requests', '429']
)
is_overloaded = any(
keyword in error_str
for keyword in ['overloaded', '500', '502', '503', '504', 'internal server error', 'service unavailable']
)
if attempt < self.max_retries - 1 and (is_rate_limit or is_overloaded):
# Determine backoff delay based on error type
if is_rate_limit:
# Use longer backoff for rate limits to cross rate limit windows
backoff_delay = 60 + (attempt * 30) # 60s, 90s, 120s
error_type = "Rate limit"
else: # is_overloaded
# Use exponential backoff for server errors
backoff_delay = min(5 * (2 ** attempt), 60) # 5s, 10s, 20s, max 60s
error_type = "Server overload"
self._logger.warning(
f"{error_type} error (attempt {attempt + 1}/{self.max_retries}), "
f"waiting {backoff_delay}s before retry..."
)
await asyncio.sleep(backoff_delay)
continue
else:
# Not a retryable error, or max retries reached
if attempt >= self.max_retries - 1:
self._logger.error(f"Max retries ({self.max_retries}) reached, giving up")
raise
raise last_exception
async def complete(
self,
messages: List[Dict] | str,
tools: List[BaseTool] | None = None,
execute_tools: bool = True,
summary_prompt: Optional[str] = None,
tool_result_callback: Optional[callable] = None,
**kwargs
) -> Dict:
"""
Single-round LLM call with optional tool execution.
Args:
messages: conversation history (List[Dict] for standard OpenAI format, or str for text format)
tools: BaseTool instance list (must be obtained from GroundingClient and bound to runtime_info)
if None or empty list, only perform conversation, no tools
execute_tools: if LLM returns tool_calls, whether to automatically execute tools
summary_prompt: Optional custom prompt for requesting iteration summary.
If provided, will request summary after tool execution.
If None, no summary will be requested.
tool_result_callback: Optional async callback to process tool results after execution.
Signature: async def callback(result: ToolResult, tool_name: str, tool_call: Dict, backend: str) -> ToolResult
**kwargs: additional parameters for litellm completion
"""
# 1. Process messages
if isinstance(messages, str):
current_messages = [{"role": "user", "content": messages}]
user_task = messages
elif isinstance(messages, list):
current_messages = messages.copy()
# Extract first user message as task for context-aware summarization
user_task = next(
(m.get("content", "") for m in messages if m.get("role") == "user"),
""
)
else:
raise ValueError("messages must be List[Dict] or str")
# 2. prepare base litellm completion kwargs
completion_kwargs = {
"model": kwargs.get("model", self.model),
**self.litellm_kwargs,
}
# Add thinking/reasoning_effort only if explicitly enabled and not using tools
enable_thinking = kwargs.get("enable_thinking", self.enable_thinking)
# 3. if tools are provided, add them to the request
llm_tools = None
tool_map = {} # llm_name -> BaseTool
if tools:
llm_tools, tool_map = _prepare_tools_for_llmclient(tools, fmt="openai")
if llm_tools:
completion_kwargs["tools"] = llm_tools
completion_kwargs["tool_choice"] = kwargs.get("tool_choice", "auto")
# Disable thinking when using tools to avoid format conflicts
enable_thinking = False
self._logger.debug(f"Prepared {len(llm_tools)} tools for LLM")
else:
self._logger.warning("Tools provided but none could be prepared for LLM")
# Add thinking parameters if enabled
if enable_thinking:
completion_kwargs["reasoning_effort"] = kwargs.get("reasoning_effort", "medium")
# 4. Apply rate limiting
await self._rate_limit()
# 5. Call LLM with retry (single round)
completion_kwargs["messages"] = current_messages
response = await self._call_with_retry(**completion_kwargs)
if not response.choices:
raise ValueError("LLM response has no choices")
response_message = response.choices[0].message
# 6. Build assistant message
assistant_message = {
"role": "assistant",
"content": response_message.content or "",
}
tool_calls = getattr(response_message, 'tool_calls', None)
if tool_calls:
assistant_message["tool_calls"] = [
{
"id": tc.id,
"type": "function",
"function": {
"name": tc.function.name,
"arguments": tc.function.arguments
}
}
for tc in tool_calls
]
# Add assistant message to conversation
current_messages.append(assistant_message)
# 7. Execute tools if requested
tool_results = []
if execute_tools and tool_calls and tools:
self._logger.info(f"Executing {len(tool_calls)} tool calls...")
for tool_call in tool_calls:
tool_name = tool_call.function.name
# Extract tool metadata and check visual analysis request
tool_obj = tool_map.get(tool_name)
backend = None
server_name = None
if tool_obj:
try:
# Prefer runtime_info if bound
if getattr(tool_obj, 'is_bound', False) and getattr(tool_obj, 'runtime_info', None):
backend = tool_obj.runtime_info.backend.value
server_name = tool_obj.runtime_info.server_name
else:
backend = tool_obj.backend_type.value if hasattr(tool_obj, 'backend_type') else None
except Exception:
pass
# Log tool execution
try:
if isinstance(tool_call.function.arguments, str):
safe_args_str = tool_call.function.arguments.strip() or "{}"
args = json.loads(safe_args_str)
else:
args = tool_call.function.arguments
args_str = json.dumps(args, ensure_ascii=False)[:200]
self._logger.info(f"Calling {tool_name} with args: {args_str}")
except:
pass
if tool_name not in tool_map:
result = ToolResult(
status=ToolStatus.ERROR,
error=f"Tool '{tool_name}' not found"
)
else:
try:
result = await _execute_tool_call(
tool=tool_map[tool_name],
openai_tool_call={
"id": tool_call.id,
"type": "function",
"function": {
"name": tool_call.function.name,
"arguments": tool_call.function.arguments
}
}
)
# Apply tool result callback if provided
if tool_result_callback and not result.is_error:
try:
result = await tool_result_callback(
result=result,
tool_name=tool_name,
tool_call=tool_call,
backend=backend
)
except Exception as e:
self._logger.warning(f"Tool result callback failed for {tool_name}: {e}")
except Exception as e:
result = ToolResult(
status=ToolStatus.ERROR,
error=str(e)
)
# Use async version with LLM summarization for large results
tool_message = await _tool_result_to_message_async(
result,
tool_call_id=tool_call.id,
tool_name=tool_name,
task=user_task,
summarize_threshold=self.summarize_threshold_chars,
summarize_model=self.model,
enable_summarization=self.enable_tool_result_summarization
)
current_messages.append(tool_message)
# Store result
tool_results.append({
"tool_call": tool_call,
"result": result,
"message": tool_message,
"backend": backend,
"server_name": server_name,
})
self._logger.info(f"Tool execution completed, {len(tool_results)} tools executed")
# 8. Request summary if provided and tools were executed
iteration_summary = None
if summary_prompt and tool_results:
self._logger.debug("Requesting iteration summary from LLM")
summary_message = {
"role": "system",
"content": summary_prompt
}
current_messages.append(summary_message)
# Apply rate limiting before summary call
await self._rate_limit()
# Call LLM to generate summary (without tools)
summary_kwargs = {
**self.litellm_kwargs,
"model": self.model,
"messages": current_messages,
"tools": [],
"tool_choice": "none",
}
summary_response = await self._call_with_retry(**summary_kwargs)
if summary_response.choices:
summary_message = summary_response.choices[0].message
iteration_summary = summary_message.content or ""
# Add summary response to messages
current_messages.append({
"role": "assistant",
"content": iteration_summary
})
self._logger.debug(f"Generated iteration summary: {iteration_summary[:100]}...")
# 9. Return single-round result
return {
"message": assistant_message,
"tool_results": tool_results,
"messages": current_messages,
"has_tool_calls": bool(tool_calls),
"iteration_summary": iteration_summary
}
@staticmethod
def format_messages_to_text(messages: List[Dict]) -> str:
"""Format conversation history to readable text (for logging/debugging)"""
formatted = ""
for msg in messages:
role = msg.get("role", "unknown").upper()
content = msg.get("content", "")
formatted += f"[{role}]\n{content}\n\n"
return formatted
================================================
FILE: anytool/local_server/README.md
================================================
# AnyTool Local Server (Desktop Version)
## 1. Introduction
The AnyTool Local Server is a **lightweight, cross-platform** Flask service that launches on the host workstation and exposes a uniform HTTP interface for controlling the native desktop environment. By translating REST calls into deterministic GUI actions—mouse and keyboard synthesis, window management, screenshot capture, file I/O—it enables higher-level AnyTool agents to interact with real software instead of simulated environments.
**Supported platforms:** Windows 10/11, macOS 11+ (Intel & Apple Silicon) and mainstream Linux distributions (X11/Wayland).
## 2. System Architecture
* **PlatformAdapter** abstracts OS-specific primitives (Windows, macOS, Linux).
* **Accessibility Helper** queries the UI accessibility tree for semantic information.
* **Screenshot Helper** captures full or partial screenshots (PNG).
* **Recorder** streams screen recordings for offline analysis.
* **Health / Feature Checker** validates runtime capabilities and permissions.
## 3. REST Endpoints
| Path | Method | Semantics |
|------|--------|-----------|
| `/` | GET | Liveness probe |
| `/platform` | GET | Return host OS metadata |
| `/execute` | POST | Execute a PyAutoGUI script fragment |
| `/execute_with_verification` | POST | Execute fragment and verify via template matching |
| `/run_python` | POST | Run arbitrary Python within a sandbox |
| `/run_bash_script` | POST | Run shell script (optional conda activation) |
| `/screenshot` | GET | Return PNG screenshot (full or ROI) |
| `/cursor_position` | GET | Current mouse coordinates |
| `/screen_size` | GET/POST | Query or set virtual screen resolution |
| `/list_directory` | POST | List directory contents |
*see* `main.py` *for ~20 additional endpoints.*
## 4. Setup & Launch
> [!NOTE]
> python=3.12
> Accessibility / screen-record permissions (macOS: *System Settings ▸ Privacy & Security*).
### Dependency Installation
```bash
cd anytool/local_server
pip install -r requirements.txt
```
### Launching the Server
*Python entry point*
```bash
python -m anytool.local_server.main \
--host 127.0.0.1 --port 5000 # flags optional; defaults read from config.json
```
*Bash helper script*
```bash
./run.sh # reads config.json then starts the service
```
Press `Ctrl+C` at any time to gracefully stop the server.
---
## 5. Configuration
Runtime options live in `config.json`:
```json
{
"server": {
"host": "127.0.0.1", // listening address (0.0.0.0 for all interfaces)
"port": 5000, // default port
"debug": false // verbose Flask logs
}
}
```
================================================
FILE: anytool/local_server/__init__.py
================================================
from .main import app, run_server
__all__ = ["app", "run_server"]
================================================
FILE: anytool/local_server/config.json
================================================
{
"server": {
"host": "127.0.0.1",
"port": 5000,
"debug": false,
"threaded": true
}
}
================================================
FILE: anytool/local_server/feature_checker.py
================================================
import platform
import subprocess
import tempfile
from typing import Dict, Any
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
platform_name = platform.system()
class FeatureChecker:
def __init__(self, platform_adapter=None, accessibility_helper=None):
self.platform_adapter = platform_adapter
self.accessibility_helper = accessibility_helper
self.platform = platform_name
self._cache = {}
def check_screenshot_available(self, use_cache: bool = True) -> bool:
if use_cache and 'screenshot' in self._cache:
return self._cache['screenshot']
try:
import pyautogui
from PIL import Image
size = pyautogui.size()
result = size.width > 0 and size.height > 0
self._cache['screenshot'] = result
logger.info(f"Screenshot check: {'available' if result else 'unavailable'}")
return result
except ImportError as e:
logger.warning(f"Screenshot unavailable - missing dependency: {e}")
self._cache['screenshot'] = False
return False
except Exception as e:
logger.error(f"Screenshot check failed: {e}")
self._cache['screenshot'] = False
return False
def check_shell_available(self, use_cache: bool = True) -> bool:
if use_cache and 'shell' in self._cache:
return self._cache['shell']
try:
if self.platform == "Windows":
cmd = ['cmd', '/c', 'echo', 'test']
else:
cmd = ['echo', 'test']
result = subprocess.run(
cmd,
capture_output=True,
timeout=2,
text=True
)
available = result.returncode == 0
self._cache['shell'] = available
logger.info(f"Shell check: {'available' if available else 'unavailable'}")
return available
except FileNotFoundError as e:
logger.warning(f"Shell check failed - command not found: {e}")
self._cache['shell'] = False
return False
except Exception as e:
logger.error(f"Shell check failed: {e}")
self._cache['shell'] = False
return False
def check_python_available(self, use_cache: bool = True) -> bool:
if use_cache and 'python' in self._cache:
return self._cache['python']
python_commands = []
if self.platform == "Windows":
python_commands = ['py', 'python', 'python3']
else:
python_commands = ['python3', 'python']
for python_cmd in python_commands:
try:
result = subprocess.run(
[python_cmd, '--version'],
capture_output=True,
timeout=2,
text=True
)
if result.returncode == 0:
version = result.stdout.strip() or result.stderr.strip()
self._cache['python'] = True
logger.info(f"Python check: available ({python_cmd} - {version})")
return True
except FileNotFoundError:
continue
except Exception as e:
logger.debug(f"Error testing {python_cmd}: {e}")
continue
logger.warning("Python check failed - no valid Python interpreter found")
self._cache['python'] = False
return False
def check_file_ops_available(self, use_cache: bool = True) -> bool:
if use_cache and 'file_ops' in self._cache:
return self._cache['file_ops']
try:
with tempfile.NamedTemporaryFile(mode='w+b', delete=True) as tmp:
test_data = b'test data'
tmp.write(test_data)
tmp.flush()
tmp.seek(0)
read_data = tmp.read()
available = read_data == test_data
self._cache['file_ops'] = available
logger.info(f"File operations check: {'available' if available else 'unavailable'}")
return available
except PermissionError as e:
logger.warning(f"File operations check failed - permission denied: {e}")
self._cache['file_ops'] = False
return False
except Exception as e:
logger.error(f"File operations check failed: {e}")
self._cache['file_ops'] = False
return False
def check_window_mgmt_available(self, use_cache: bool = True) -> bool:
if use_cache and 'window_mgmt' in self._cache:
return self._cache['window_mgmt']
try:
if not self.platform_adapter:
logger.warning("Window management check failed - no platform adapter loaded")
self._cache['window_mgmt'] = False
return False
required_methods = ['activate_window', 'close_window', 'list_windows']
available_methods = [
method for method in required_methods
if hasattr(self.platform_adapter, method)
]
available = len(available_methods) > 0
self._cache['window_mgmt'] = available
if available:
logger.info(f"Window management check: {'available' if available else 'unavailable'} - supported methods: {', '.join(available_methods)}")
else:
logger.warning(f"Window management check failed - platform adapter missing required methods")
return available
except Exception as e:
logger.error(f"Window management check failed: {e}")
self._cache['window_mgmt'] = False
return False
def check_recording_available(self, use_cache: bool = True) -> bool:
if use_cache and 'recording' in self._cache:
return self._cache['recording']
try:
if not self.platform_adapter:
logger.warning("Recording check failed - no platform adapter loaded")
self._cache['recording'] = False
return False
available = (
hasattr(self.platform_adapter, 'start_recording') and
hasattr(self.platform_adapter, 'stop_recording')
)
self._cache['recording'] = available
logger.info(f"Recording check: {'available' if available else 'unavailable'}")
return available
except Exception as e:
logger.error(f"Recording check failed: {e}")
self._cache['recording'] = False
return False
def check_accessibility_available(self, use_cache: bool = True) -> bool:
if use_cache and 'accessibility' in self._cache:
return self._cache['accessibility']
try:
if not self.accessibility_helper:
logger.warning("Accessibility check failed - no accessibility helper loaded")
self._cache['accessibility'] = False
return False
available = self.accessibility_helper.is_available()
self._cache['accessibility'] = available
logger.info(f"Accessibility check: {'available' if available else 'unavailable'}")
return available
except Exception as e:
logger.error(f"Accessibility check failed: {e}")
self._cache['accessibility'] = False
return False
def check_platform_adapter_available(self, use_cache: bool = True) -> bool:
if use_cache and 'platform_adapter' in self._cache:
return self._cache['platform_adapter']
available = self.platform_adapter is not None
self._cache['platform_adapter'] = available
logger.info(f"Platform adapter check: {'available' if available else 'unavailable'}")
return available
def check_all_features(self, use_cache: bool = True) -> Dict[str, bool]:
logger.info(f"Checking all features (platform: {self.platform})")
results = {
'accessibility': self.check_accessibility_available(use_cache),
'screenshot': self.check_screenshot_available(use_cache),
'recording': self.check_recording_available(use_cache),
'shell': self.check_shell_available(use_cache),
'python': self.check_python_available(use_cache),
'file_ops': self.check_file_ops_available(use_cache),
'window_mgmt': self.check_window_mgmt_available(use_cache),
'platform_adapter': self.check_platform_adapter_available(use_cache),
}
available_count = sum(1 for v in results.values() if v)
total_count = len(results)
logger.info(f"Feature check completed: {available_count}/{total_count} features available")
return results
def clear_cache(self):
self._cache.clear()
logger.debug("Feature check cache cleared")
def get_feature_report(self) -> Dict[str, Any]:
results = self.check_all_features()
return {
'platform': {
'system': self.platform,
'release': platform.release(),
'version': platform.version(),
'machine': platform.machine(),
'processor': platform.processor(),
},
'features': results,
'summary': {
'total': len(results),
'available': sum(1 for v in results.values() if v),
'unavailable': sum(1 for v in results.values() if not v),
}
}
================================================
FILE: anytool/local_server/health_checker.py
================================================
import requests
import os
from pathlib import Path
from typing import Dict, Tuple, Optional
from anytool.utils.logging import Logger
from anytool.local_server.feature_checker import FeatureChecker
logger = Logger.get_logger(__name__)
from anytool.utils.display import colorize as _c
class HealthStatus:
"""Health status"""
def __init__(self, feature_available: bool, endpoint_available: Optional[bool],
endpoint_detail: str = ""):
self.feature_available = feature_available
self.endpoint_available = endpoint_available
self.endpoint_detail = endpoint_detail
@property
def fully_available(self) -> bool:
"""Fully available: feature and endpoint are available"""
return self.feature_available and (self.endpoint_available == True)
def __str__(self):
if not self.feature_available:
return "Feature N/A"
elif self.endpoint_available is None:
return "Feature OK (endpoint not tested)"
elif self.endpoint_available:
return f"OK ({self.endpoint_detail})"
else:
return f"Endpoint failed: {self.endpoint_detail}"
class HealthChecker:
"""Health checker with functional testing"""
def __init__(self, feature_checker: FeatureChecker,
base_url: str = "http://127.0.0.1:5000",
auto_cleanup: bool = True,
test_output_dir: str = None):
self.feature_checker = feature_checker
self.base_url = base_url
self.results = {}
self.auto_cleanup = auto_cleanup
# set the test output directory
if test_output_dir:
self.test_output_dir = Path(test_output_dir)
else:
current_dir = Path(__file__).parent
self.test_output_dir = current_dir / "temp"
# create the directory
self.test_output_dir.mkdir(exist_ok=True)
self.temp_files = [] # Track temporary files for cleanup
logger.info(f"Health checker initialized. Test output: {self.test_output_dir}, Auto-cleanup: {auto_cleanup}")
def _get_test_file_path(self, filename: str) -> str:
"""Get path for a test file"""
filepath = str(self.test_output_dir / filename)
self._register_temp_file(filepath)
return filepath
def _register_temp_file(self, filepath: str):
"""Register a temporary file for later cleanup"""
if filepath and filepath not in self.temp_files:
self.temp_files.append(filepath)
def cleanup_temp_files(self):
"""Clean up all temporary test files"""
if not self.auto_cleanup:
logger.info(f"Auto-cleanup disabled. Test files kept in: {self.test_output_dir}")
return
cleaned = 0
for filepath in self.temp_files:
try:
if os.path.exists(filepath):
os.remove(filepath)
cleaned += 1
logger.debug(f"Cleaned up: {filepath}")
except Exception as e:
logger.warning(f"Failed to clean up {filepath}: {e}")
self.temp_files.clear()
# if the directory is empty, delete it
try:
if self.test_output_dir.exists() and not any(self.test_output_dir.iterdir()):
self.test_output_dir.rmdir()
logger.debug(f"Removed empty directory: {self.test_output_dir}")
except:
pass
if cleaned > 0:
logger.info(f"Cleaned up {cleaned} test files")
def check_screenshot(self) -> Tuple[bool, str]:
"""Functionally test screenshot - actually take a screenshot and verify"""
# 1. Check feature first
if not self.feature_checker.check_screenshot_available():
return False, "Feature N/A"
# 2. Save screenshot to test directory
screenshot_path = self._get_test_file_path("test_screenshot.png")
try:
response = requests.get(f"{self.base_url}/screenshot", timeout=10)
if response.status_code != 200:
return False, f"HTTP {response.status_code}"
# 3. Save to file
with open(screenshot_path, 'wb') as f:
f.write(response.content)
# 4. Verify it's actually an image
content_type = response.headers.get('Content-Type', '')
if 'image' not in content_type:
return False, f"Invalid content type: {content_type}"
# 5. Check file size (should be > 1KB)
size_kb = len(response.content) / 1024
if size_kb < 1:
return False, "Image too small"
logger.info(f"Screenshot saved: {screenshot_path} ({size_kb:.1f}KB)")
return True, f"OK ({size_kb:.1f}KB)"
except requests.exceptions.Timeout:
return False, "Timeout"
except Exception as e:
return False, f"Error: {str(e)[:30]}"
def check_cursor_position(self) -> Tuple[bool, str]:
"""Test cursor position"""
if not self.feature_checker.check_screenshot_available():
return False, "Feature N/A"
try:
response = requests.get(f"{self.base_url}/cursor_position", timeout=5)
if response.status_code != 200:
return False, f"HTTP {response.status_code}"
data = response.json()
if 'x' in data and 'y' in data:
return True, f"({data['x']}, {data['y']})"
return False, "Invalid response"
except Exception as e:
return False, str(e)[:30]
def check_screen_size(self) -> Tuple[bool, str]:
"""Test screen size"""
if not self.feature_checker.check_screenshot_available():
return False, "Feature N/A"
try:
response = requests.get(f"{self.base_url}/screen_size", timeout=5)
if response.status_code != 200:
return False, f"HTTP {response.status_code}"
data = response.json()
if 'width' in data and 'height' in data:
return True, f"{data['width']}x{data['height']}"
return False, "Invalid response"
except Exception as e:
return False, str(e)[:30]
def check_shell_command(self) -> Tuple[bool, str]:
"""Functionally test shell command execution"""
if not self.feature_checker.check_shell_available():
return False, "Feature N/A"
try:
response = requests.post(
f"{self.base_url}/execute",
json={"command": "echo hello_test", "shell": True},
timeout=5
)
if response.status_code != 200:
return False, f"HTTP {response.status_code}"
data = response.json()
output = data.get('output', '').strip()
# Verify the command actually executed
if 'hello_test' in output:
return True, "Command executed"
return False, "Command failed"
except Exception as e:
return False, str(e)[:30]
def check_python_execution(self) -> Tuple[bool, str]:
"""Functionally test Python code execution"""
if not self.feature_checker.check_python_available():
return False, "Feature N/A"
try:
test_code = 'print("test_output_123")'
response = requests.post(
f"{self.base_url}/run_python",
json={"code": test_code},
timeout=5
)
if response.status_code != 200:
return False, f"HTTP {response.status_code}"
data = response.json()
content = data.get('content', '')
# Verify Python executed correctly
if 'test_output_123' in content:
return True, "Python executed"
return False, "Execution failed"
except Exception as e:
return False, str(e)[:30]
def check_bash_script(self) -> Tuple[bool, str]:
"""Functionally test Bash script execution"""
if not self.feature_checker.check_shell_available():
return False, "Feature N/A"
try:
response = requests.post(
f"{self.base_url}/run_bash_script",
json={"script": "echo bash_test_456"},
timeout=5
)
if response.status_code != 200:
return False, f"HTTP {response.status_code}"
data = response.json()
output = data.get('output', '')
if 'bash_test_456' in output:
return True, "Bash executed"
return False, "Execution failed"
except Exception as e:
return False, str(e)[:30]
def check_file_operations(self) -> Tuple[bool, str]:
"""Test file operations"""
if not self.feature_checker.check_file_ops_available():
return False, "Feature N/A"
try:
# Test list directory
response = requests.post(
f"{self.base_url}/list_directory",
json={"path": "."},
timeout=5
)
if response.status_code != 200:
return False, f"HTTP {response.status_code}"
data = response.json()
if 'items' in data and isinstance(data['items'], list):
return True, f"{len(data['items'])} items"
return False, "Invalid response"
except Exception as e:
return False, str(e)[:30]
def check_desktop_path(self) -> Tuple[bool, str]:
"""Test desktop path"""
if not self.feature_checker.check_file_ops_available():
return False, "Feature N/A"
try:
response = requests.get(f"{self.base_url}/desktop_path", timeout=5)
if response.status_code != 200:
return False, f"HTTP {response.status_code}"
data = response.json()
path = data.get('path', '')
if path and os.path.exists(path):
return True, "Path valid"
return False, "Path not found"
except Exception as e:
return False, str(e)[:30]
def check_window_management(self) -> Tuple[bool, str]:
"""Test window management"""
if not self.feature_checker.check_window_mgmt_available():
return False, "Feature N/A"
try:
# Just test if endpoint responds (window may not exist)
response = requests.post(
f"{self.base_url}/setup/activate_window",
json={"window_name": "NonExistentWindow"},
timeout=5
)
# 200 (success), 404 (not found), 501 (not supported) are all acceptable
if response.status_code in [200, 404, 501]:
return True, f"API available"
return False, f"HTTP {response.status_code}"
except Exception as e:
return False, str(e)[:30]
def check_recording(self) -> Tuple[bool, str]:
"""Functionally test recording - actually start and stop recording"""
if not self.feature_checker.check_recording_available():
return False, "Feature N/A"
recording_path = self._get_test_file_path("test_recording.mp4")
try:
# 1. Start recording
response = requests.post(f"{self.base_url}/start_recording", json={}, timeout=10)
if response.status_code == 501:
return False, "Not supported"
if response.status_code != 200:
return False, f"Start failed: {response.status_code}"
# 2. Wait a bit
import time
time.sleep(3.0) # Record for 3 seconds
# 3. Stop recording
response = requests.post(f"{self.base_url}/end_recording", json={}, timeout=15)
if response.status_code == 200:
# Save the recording file
with open(recording_path, 'wb') as f:
f.write(response.content)
size_kb = len(response.content) / 1024
logger.info(f"Recording saved: {recording_path} ({size_kb:.1f}KB)")
return True, f"OK ({size_kb:.1f}KB)"
else:
return False, f"Stop failed: {response.status_code}"
except Exception as e:
# Try to stop recording in case of error
try:
requests.post(f"{self.base_url}/end_recording", json={}, timeout=5)
except:
pass
return False, str(e)[:30]
def check_accessibility(self) -> Tuple[bool, str]:
"""Test accessibility tree"""
if not self.feature_checker.check_accessibility_available():
return False, "Feature N/A"
try:
response = requests.get(f"{self.base_url}/accessibility?max_depth=1", timeout=10)
if response.status_code != 200:
return False, f"HTTP {response.status_code}"
data = response.json()
if 'error' in data:
return False, "Permission denied"
# Should have some tree structure
if 'platform' in data or 'children' in data:
return True, "Tree available"
return False, "Invalid response"
except Exception as e:
return False, str(e)[:30]
def check_health_endpoint(self) -> Tuple[bool, str]:
"""Test health check endpoint"""
try:
response = requests.get(f"{self.base_url}/", timeout=5)
if response.status_code == 200:
data = response.json()
if data.get('status') == 'ok':
return True, "OK"
return False, f"HTTP {response.status_code}"
except Exception as e:
return False, str(e)[:30]
def check_platform_info(self) -> Tuple[bool, str]:
"""Test platform info endpoint"""
try:
response = requests.get(f"{self.base_url}/platform", timeout=5)
if response.status_code == 200:
data = response.json()
if 'system' in data:
return True, data['system']
return False, f"HTTP {response.status_code}"
except Exception as e:
return False, str(e)[:30]
def check_all(self, test_endpoints: bool = True) -> Dict[str, HealthStatus]:
"""
Check all features with functional testing
Args:
test_endpoints: Whether to test endpoints (False only checks features)
Returns:
{Feature name: HealthStatus}
"""
results = {}
if not test_endpoints:
# Only check features, not endpoints
feature_results = self.feature_checker.check_all_features()
for name, available in feature_results.items():
results[name] = HealthStatus(available, None, "")
self.results = results
return results
# Functional tests
test_functions = {
'Health Check': self.check_health_endpoint,
'Platform Info': self.check_platform_info,
'Screenshot': self.check_screenshot,
'Cursor Position': self.check_cursor_position,
'Screen Size': self.check_screen_size,
'Shell Command': self.check_shell_command,
'Python Execution': self.check_python_execution,
'Bash Script': self.check_bash_script,
'File Operations': self.check_file_operations,
'Desktop Path': self.check_desktop_path,
'Window Management': self.check_window_management,
'Recording': self.check_recording,
'Accessibility': self.check_accessibility,
}
for name, test_func in test_functions.items():
success, detail = test_func()
# Determine feature availability
if detail == "Feature N/A":
feature_available = False
endpoint_available = None
else:
feature_available = True
endpoint_available = success
results[name] = HealthStatus(feature_available, endpoint_available, detail)
# Clean up temporary files
self.cleanup_temp_files()
self.results = results
return results
def print_results(self, results: Dict[str, HealthStatus] = None,
show_endpoint_details: bool = False):
"""Print check results"""
if results is None:
results = self.results
if not results:
return
total = len(results)
feature_available = sum(1 for s in results.values() if s.feature_available)
fully_available = sum(1 for s in results.values() if s.fully_available)
# Categorize
basic = ['Health Check', 'Platform Info']
# Basic Features
print()
print(_c(" - Basic", 'c', bold=True))
basic_items = []
for name in basic:
if name in results:
status = results[name]
# Use colored dot instead of emoji
if status.fully_available:
icon = _c("●", 'g')
elif not status.feature_available:
icon = _c("●", 'rd')
elif status.endpoint_available is None:
icon = _c("●", 'y')
else:
icon = _c("●", 'y')
text = _c(name, 'gr' if not status.feature_available else '')
basic_items.append((icon, text, status))
# Display in rows of 4
for i in range(0, len(basic_items), 4):
line_items = []
for j in range(4):
if i + j < len(basic_items):
icon, text, status = basic_items[i + j]
line_items.append(f"{icon} {text:<15}")
print(" " + " ".join(line_items))
# Show details if requested
if show_endpoint_details:
for name in basic:
if name in results:
status = results[name]
print(f" {_c('·', 'gr')} {name}: {_c(str(status), 'gr')}")
# Advanced Features
print()
print(_c(" - Advanced", 'c', bold=True))
advanced_items = []
for name, status in results.items():
if name not in basic:
# Use colored dot instead of emoji
if status.fully_available:
icon = _c("●", 'g')
elif not status.feature_available:
icon = _c("●", 'rd')
elif status.endpoint_available is None:
icon = _c("●", 'y')
else:
icon = _c("●", 'y')
text = _c(name, 'gr' if not status.feature_available else '')
advanced_items.append((icon, text, status))
# Display in rows of 4
for i in range(0, len(advanced_items), 4):
line_items = []
for j in range(4):
if i + j < len(advanced_items):
icon, text, _ = advanced_items[i + j]
line_items.append(f"{icon} {text:<15}")
print(" " + " ".join(line_items))
# Show details if requested
if show_endpoint_details:
for name, status in results.items():
if name not in basic:
print(f" {_c('·', 'gr')} {name}: {_c(str(status), 'gr')}")
# Summary
from anytool.utils.display import print_separator
print()
print_separator()
print(f" {_c('Summary:', 'c', bold=True)} {_c(str(feature_available) + '/' + str(total), 'g' if feature_available == total else 'y')} features available", end='')
if any(s.endpoint_available is not None for s in results.values()):
print(f", {_c(str(fully_available) + '/' + str(total), 'g' if fully_available == total else 'y')} fully functional")
else:
print()
print_separator()
# Legend
print(f" {_c('Legend:', 'gr')} {_c('●', 'g')} Available {_c('●', 'y')} Partial/Untested {_c('●', 'rd')} Unavailable")
# Test files info
if self.temp_files and not self.auto_cleanup:
print()
print(f" {_c('Test files saved:', 'y')} {self.test_output_dir}")
print(f" {_c(str(len(self.temp_files)) + ' file(s) available for inspection', 'gr')}")
print()
def get_summary(self) -> dict:
"""Get summary"""
if not self.results:
return {}
total = len(self.results)
feature_available = sum(1 for s in self.results.values() if s.feature_available)
fully_available = sum(1 for s in self.results.values() if s.fully_available)
return {
'total': total,
'feature_available': feature_available,
'fully_available': fully_available,
'details': {k: str(v) for k, v in self.results.items()}
}
def get_simple_features_dict(self) -> Dict[str, bool]:
"""Get simple feature dict (for banner display)"""
return self.feature_checker.check_all_features()
================================================
FILE: anytool/local_server/main.py
================================================
import os
import platform
import shlex
import subprocess
import signal
import time
import json
import uuid
from datetime import datetime
from flask import Flask, request, jsonify, send_file, abort
import pyautogui
import threading
from io import BytesIO
import tempfile
from anytool.utils.logging import Logger
from anytool.local_server.utils import AccessibilityHelper, ScreenshotHelper
from anytool.local_server.platform_adapters import get_platform_adapter
from anytool.local_server.health_checker import HealthChecker
from anytool.local_server.feature_checker import FeatureChecker
platform_name = platform.system()
app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024 # 500MB
pyautogui.PAUSE = 0
if platform_name == "Darwin":
pyautogui.DARWIN_CATCH_UP_TIME = 0
logger = Logger.get_logger(__name__)
TIMEOUT = 1800
recording_process = None
if platform_name == "Windows":
recording_path = os.path.join(os.environ.get('TEMP', 'C:\\Temp'), 'recording.mp4')
else:
recording_path = "/tmp/recording.mp4"
accessibility_helper = AccessibilityHelper()
screenshot_helper = ScreenshotHelper()
platform_adapter = get_platform_adapter()
feature_checker = FeatureChecker(
platform_adapter=platform_adapter,
accessibility_helper=accessibility_helper
)
def get_conda_activation_prefix(conda_env: str = None) -> str:
"""
Generate platform-specific conda activation command prefix
Args:
conda_env: Conda environment name (e.g., 'myenv')
Returns:
Activation command prefix string, empty if no conda_env
"""
if not conda_env:
return ""
if platform_name == "Windows":
# Windows: use conda.bat or conda.exe
# Try common conda installation paths
conda_paths = [
os.path.expandvars("%USERPROFILE%\\miniconda3\\Scripts\\activate.bat"),
os.path.expandvars("%USERPROFILE%\\anaconda3\\Scripts\\activate.bat"),
"C:\\ProgramData\\Miniconda3\\Scripts\\activate.bat",
"C:\\ProgramData\\Anaconda3\\Scripts\\activate.bat",
]
# Find first existing conda activate script
activate_script = None
for path in conda_paths:
if os.path.exists(path):
activate_script = path
break
if activate_script:
return f'call "{activate_script}" {conda_env} && '
else:
# Fallback: assume conda is in PATH
return f'conda activate {conda_env} && '
else:
# Linux/macOS: source conda.sh then activate
conda_paths = [
os.path.expanduser("~/miniconda3/etc/profile.d/conda.sh"),
os.path.expanduser("~/anaconda3/etc/profile.d/conda.sh"),
"/opt/conda/etc/profile.d/conda.sh",
"/usr/local/miniconda3/etc/profile.d/conda.sh",
"/usr/local/anaconda3/etc/profile.d/conda.sh",
]
# Find first existing conda.sh
conda_sh = None
for path in conda_paths:
if os.path.exists(path):
conda_sh = path
break
if conda_sh:
return f'source "{conda_sh}" && conda activate {conda_env} && '
else:
# Fallback: assume conda is already initialized in shell
return f'conda activate {conda_env} && '
def wrap_script_with_conda(script: str, conda_env: str = None) -> str:
"""
Wrap script with conda activation command.
If conda is not available, returns original script without conda activation.
"""
if not conda_env:
return script
if platform_name == "Windows":
activation_prefix = get_conda_activation_prefix(conda_env)
return f"{activation_prefix}{script}"
else:
conda_paths = [
os.path.expanduser("~/miniconda3/etc/profile.d/conda.sh"),
os.path.expanduser("~/anaconda3/etc/profile.d/conda.sh"),
os.path.expanduser("~/opt/anaconda3/etc/profile.d/conda.sh"),
"/opt/conda/etc/profile.d/conda.sh",
]
conda_sh = None
for path in conda_paths:
if os.path.exists(path):
conda_sh = path
break
if conda_sh:
# Use bash -i -c to run interactively, or directly source conda.sh
wrapped_script = f"""#!/bin/bash
# Initialize conda
if [ -f "{conda_sh}" ]; then
. "{conda_sh}"
conda activate {conda_env} 2>/dev/null || true
fi
# Run user script
{script}
"""
return wrapped_script
else:
# Conda not found - log warning and execute script directly without conda
logger.warning(f"Conda environment '{conda_env}' requested but conda not found. Executing with system Python.")
return script
health_checker = None
@app.route('/', methods=['GET'])
def health_check():
"""Health check interface - return features information"""
# Get features from health_checker
if health_checker:
features = health_checker.get_simple_features_dict()
else:
# Initial startup of health_checker may not have been initialized, fallback to feature_checker
features = feature_checker.check_all_features(use_cache=True)
return jsonify({
'status': 'ok',
'service': 'AnyTool Desktop Server',
'version': '1.0.0',
'platform': platform_name,
'features': features,
'timestamp': datetime.now().isoformat()
})
@app.route('/platform', methods=['GET'])
def get_platform():
info = {
'system': platform_name,
'release': platform.release(),
'version': platform.version(),
'machine': platform.machine(),
'processor': platform.processor()
}
if platform_adapter and hasattr(platform_adapter, 'get_system_info'):
info.update(platform_adapter.get_system_info())
return jsonify(info)
@app.route('/execute', methods=['POST'])
@app.route('/setup/execute', methods=['POST'])
def execute_command():
data = request.json
# The 'command' key in the JSON request should contain the command to be executed.
shell = data.get('shell', False)
command = data.get('command', "" if shell else [])
timeout = data.get('timeout', 120)
if isinstance(command, str) and not shell:
command = shlex.split(command)
# Expand user directory
if isinstance(command, list):
for i, arg in enumerate(command):
if arg.startswith("~/"):
command[i] = os.path.expanduser(arg)
try:
if platform_name == "Windows":
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=shell,
text=True,
timeout=timeout,
creationflags=subprocess.CREATE_NO_WINDOW,
)
else:
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=shell,
text=True,
timeout=timeout,
)
return jsonify({
'status': 'success',
'output': result.stdout,
'error': result.stderr,
'returncode': result.returncode
})
except subprocess.TimeoutExpired:
return jsonify({
'status': 'error',
'message': f'Command timeout after {timeout} seconds'
}), 408
except Exception as e:
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route('/execute_with_verification', methods=['POST'])
@app.route('/setup/execute_with_verification', methods=['POST'])
def execute_command_with_verification():
"""Execute command and verify the result based on provided verification criteria"""
data = request.json
shell = data.get('shell', False)
command = data.get('command', "" if shell else [])
verification = data.get('verification', {})
max_wait_time = data.get('max_wait_time', 10) # Maximum wait time in seconds
check_interval = data.get('check_interval', 1) # Check interval in seconds
if isinstance(command, str) and not shell:
command = shlex.split(command)
# Expand user directory
if isinstance(command, list):
for i, arg in enumerate(command):
if arg.startswith("~/"):
command[i] = os.path.expanduser(arg)
# Execute the main command
try:
if platform_name == "Windows":
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=shell,
text=True,
timeout=120,
creationflags=subprocess.CREATE_NO_WINDOW,
)
else:
result = subprocess.run(
command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=shell,
text=True,
timeout=120,
)
# If no verification is needed, return immediately
if not verification:
return jsonify({
'status': 'success',
'output': result.stdout,
'error': result.stderr,
'returncode': result.returncode
})
# Wait and verify the result
start_time = time.time()
while time.time() - start_time < max_wait_time:
verification_passed = True
# Check window existence if specified
if 'window_exists' in verification:
window_name = verification['window_exists']
try:
if platform_name == 'Linux':
wmctrl_result = subprocess.run(
['wmctrl', '-l'],
capture_output=True,
text=True,
check=True
)
if window_name.lower() not in wmctrl_result.stdout.lower():
verification_passed = False
elif platform_adapter:
# Use platform adapter to check window existence
windows = platform_adapter.list_windows() if hasattr(platform_adapter, 'list_windows') else []
if not any(window_name.lower() in str(w).lower() for w in windows):
verification_passed = False
except:
verification_passed = False
# Check command execution if specified
if 'command_success' in verification:
verify_cmd = verification['command_success']
try:
verify_result = subprocess.run(
verify_cmd,
shell=True,
capture_output=True,
text=True,
timeout=5
)
if verify_result.returncode != 0:
verification_passed = False
except:
verification_passed = False
if verification_passed:
return jsonify({
'status': 'success',
'output': result.stdout,
'error': result.stderr,
'returncode': result.returncode,
'verification': 'passed',
'wait_time': time.time() - start_time
})
time.sleep(check_interval)
# Verification failed
return jsonify({
'status': 'verification_failed',
'output': result.stdout,
'error': result.stderr,
'returncode': result.returncode,
'verification': 'failed',
'wait_time': max_wait_time
}), 500
except Exception as e:
return jsonify({
'status': 'error',
'message': str(e)
}), 500
def _get_machine_architecture() -> str:
"""Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc.
Returns 'amd' for x86/AMD architectures, 'arm' for ARM architectures, or 'unknown'.
"""
architecture = platform.machine().lower()
if architecture in ['amd32', 'amd64', 'x86', 'x86_64', 'x86-64', 'x64', 'i386', 'i686']:
return 'amd'
elif architecture in ['arm64', 'aarch64', 'aarch32']:
return 'arm'
else:
return 'unknown'
@app.route('/setup/launch', methods=["POST"])
def launch_app():
data = request.json
shell = data.get("shell", False)
command = data.get("command", "" if shell else [])
if isinstance(command, str) and not shell:
command = shlex.split(command)
# Expand user directory
if isinstance(command, list):
for i, arg in enumerate(command):
if arg.startswith("~/"):
command[i] = os.path.expanduser(arg)
try:
# ARM architecture compatibility: replace google-chrome with chromium
# ARM64 Chrome is not available yet, can only use Chromium
if isinstance(command, list) and 'google-chrome' in command and _get_machine_architecture() == 'arm':
index = command.index('google-chrome')
command[index] = 'chromium'
logger.info("ARM architecture detected: replacing 'google-chrome' with 'chromium'")
subprocess.Popen(command, shell=shell)
cmd_str = command if shell else " ".join(command)
logger.info(f"Application launched successfully: {cmd_str}")
return jsonify({
'status': 'success',
'message': f'{cmd_str} launched successfully'
})
except Exception as e:
logger.error(f"Application launch failed: {str(e)}")
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route("/run_python", methods=['POST'])
def run_python():
data = request.json
code = data.get('code', None)
timeout = data.get('timeout', 30)
working_dir = data.get('working_dir', None)
env = data.get('env', None)
conda_env = data.get('conda_env', None)
if not code:
return jsonify({'status': 'error', 'message': 'Code not supplied!'}), 400
# Generate unique filename
if platform_name == "Windows":
temp_filename = os.path.join(tempfile.gettempdir(), f"python_exec_{uuid.uuid4().hex}.py")
else:
temp_filename = f"/tmp/python_exec_{uuid.uuid4().hex}.py"
try:
with open(temp_filename, 'w') as f:
f.write(code)
# Prepare environment variables
exec_env = os.environ.copy()
if env:
exec_env.update(env)
# If conda_env is specified, try to use bash/cmd to activate and run
# If conda is not available, fall back to system Python
if conda_env:
activation_cmd = get_conda_activation_prefix(conda_env)
# Check if conda activation command is empty (conda not found)
if not activation_cmd:
logger.warning(f"Conda environment '{conda_env}' requested but conda not found. Using system Python.")
conda_env = None # Disable conda and use default path
if conda_env and get_conda_activation_prefix(conda_env):
if platform_name == "Windows":
# Windows: use cmd with activation
activation_cmd = get_conda_activation_prefix(conda_env)
full_cmd = f'{activation_cmd}python "{temp_filename}"'
result = subprocess.run(
['cmd', '/c', full_cmd],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=timeout,
cwd=working_dir or os.getcwd(),
env=exec_env
)
else:
# Linux/macOS: use bash with activation
activation_cmd = get_conda_activation_prefix(conda_env)
full_cmd = f'{activation_cmd}python3 "{temp_filename}"'
result = subprocess.run(
['/bin/bash', '-c', full_cmd],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=timeout,
cwd=working_dir or os.getcwd(),
env=exec_env
)
else:
# No conda activation needed
python_cmd = 'python' if platform_name == "Windows" else 'python3'
result = subprocess.run(
[python_cmd, temp_filename],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
timeout=timeout,
cwd=working_dir or os.getcwd(),
env=exec_env
)
os.remove(temp_filename)
output = result.stdout + result.stderr
return jsonify({
'status': 'success' if result.returncode == 0 else 'error',
'content': output or "Code executed successfully (no output)",
'returncode': result.returncode
})
except subprocess.TimeoutExpired:
if os.path.exists(temp_filename):
os.remove(temp_filename)
return jsonify({
'status': 'error',
'message': f'Execution timeout after {timeout} seconds'
}), 408
except Exception as e:
if os.path.exists(temp_filename):
os.remove(temp_filename)
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route("/run_bash_script", methods=['POST'])
def run_bash_script():
data = request.json
script = data.get('script', None)
timeout = data.get('timeout', 30)
working_dir = data.get('working_dir', None)
env = data.get('env', None)
conda_env = data.get('conda_env', None)
if not script:
return jsonify({'status': 'error', 'message': 'Script not supplied!'}), 400
# Generate unique filename
if platform_name == "Windows":
temp_filename = os.path.join(tempfile.gettempdir(), f"bash_exec_{uuid.uuid4().hex}.sh")
else:
temp_filename = f"/tmp/bash_exec_{uuid.uuid4().hex}.sh"
try:
# Wrap script with conda activation if needed
final_script = wrap_script_with_conda(script, conda_env)
with open(temp_filename, 'w') as f:
f.write(final_script)
os.chmod(temp_filename, 0o755)
if platform_name == "Windows":
shell_cmd = ['bash', temp_filename]
else:
shell_cmd = ['/bin/bash', temp_filename]
# Prepare environment variables
exec_env = os.environ.copy()
if env:
exec_env.update(env)
result = subprocess.run(
shell_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True,
timeout=timeout,
cwd=working_dir or os.getcwd(),
env=exec_env
)
os.unlink(temp_filename)
return jsonify({
'status': 'success' if result.returncode == 0 else 'error',
'output': result.stdout,
'error': "",
'returncode': result.returncode
})
except subprocess.TimeoutExpired:
if os.path.exists(temp_filename):
os.unlink(temp_filename)
return jsonify({
'status': 'error',
'output': f'Script execution timed out after {timeout} seconds',
'error': "",
'returncode': -1
}), 500
except Exception as e:
if os.path.exists(temp_filename):
try:
os.unlink(temp_filename)
except:
pass
return jsonify({
'status': 'error',
'output': f'Failed to execute script: {str(e)}',
'error': "",
'returncode': -1
}), 500
@app.route('/screenshot', methods=['GET'])
def capture_screen_with_cursor():
"""Capture screenshot (including mouse cursor)"""
try:
buf = BytesIO()
tmp_path = os.path.join(tempfile.gettempdir(), f"screenshot_{uuid.uuid4().hex}.png")
if screenshot_helper.capture(tmp_path, with_cursor=True):
with open(tmp_path, 'rb') as f:
buf.write(f.read())
os.remove(tmp_path)
buf.seek(0)
return send_file(buf, mimetype='image/png')
else:
return jsonify({'status':'error','message':'Screenshot failed'}), 500
except Exception as e:
logger.error(f"Screenshot failed: {str(e)}")
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route('/cursor_position', methods=['GET'])
def get_cursor_position():
"""Get cursor position"""
try:
x, y = screenshot_helper.get_cursor_position()
return jsonify({'x': x, 'y': y, 'status': 'success'})
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)}), 500
@app.route('/screen_size', methods=['POST', 'GET'])
def get_screen_size():
"""Get screen size"""
try:
width, height = screenshot_helper.get_screen_size()
return jsonify({'width': width, 'height': height, 'status': 'success'})
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)}), 500
# Accessibility Tree
@app.route("/accessibility", methods=["GET"])
def get_accessibility_tree():
"""Get accessibility tree"""
try:
max_depth = request.args.get('max_depth', 10, type=int)
tree = accessibility_helper.get_tree(max_depth=max_depth)
return jsonify(tree)
except Exception as e:
logger.error(f"Failed to get accessibility tree: {str(e)}")
return jsonify({
'status': 'error',
'message': str(e)
}), 500
# File Operations
@app.route('/list_directory', methods=['POST'])
def list_directory():
"""List directory contents"""
data = request.json
path = data.get('path', '.')
try:
path = os.path.expanduser(path)
items = []
for item in os.listdir(path):
item_path = os.path.join(path, item)
items.append({
'name': item,
'is_dir': os.path.isdir(item_path),
'is_file': os.path.isfile(item_path),
'size': os.path.getsize(item_path) if os.path.isfile(item_path) else None
})
return jsonify({
'status': 'success',
'path': path,
'items': items
})
except Exception as e:
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route('/file', methods=['POST'])
def file_operation():
"""File operations"""
data = request.json
operation = data.get('operation', 'read')
path = data.get('path')
if not path:
return jsonify({'status': 'error', 'message': 'Path required'}), 400
path = os.path.expanduser(path)
try:
if operation == 'read':
with open(path, 'r') as f:
content = f.read()
return jsonify({
'status': 'success',
'content': content
})
elif operation == 'exists':
exists = os.path.exists(path)
return jsonify({
'status': 'success',
'exists': exists
})
else:
return jsonify({
'status': 'error',
'message': f'Unknown operation: {operation}'
}), 400
except Exception as e:
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route('/desktop_path', methods=['POST', 'GET'])
def get_desktop_path():
"""Get desktop path"""
try:
desktop = os.path.expanduser("~/Desktop")
return jsonify({
'status': 'success',
'path': desktop
})
except Exception as e:
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route("/setup/activate_window", methods=['POST'])
def activate_window():
"""Activate window"""
data = request.json
window_name = data.get("window_name")
strict = data.get("strict", False)
by_class_name = data.get("by_class", False)
if not window_name:
return jsonify({'status': 'error', 'message': 'window_name required'}), 400
try:
if platform_adapter and hasattr(platform_adapter, 'activate_window'):
result = platform_adapter.activate_window(window_name, strict=strict)
if result['status'] == 'success':
return jsonify(result)
else:
return jsonify(result), 400
else:
return jsonify({
'status': 'error',
'message': f'Window activation not supported on {platform_name}'
}), 501
except Exception as e:
logger.error(f"Window activation failed: {str(e)}")
return jsonify({'status': 'error', 'message': str(e)}), 500
@app.route("/setup/close_window", methods=["POST"])
def close_window():
"""Close window"""
data = request.json
window_name = data.get("window_name")
strict = data.get("strict", False)
by_class_name = data.get("by_class", False)
if not window_name:
return jsonify({'status': 'error', 'message': 'window_name required'}), 400
try:
if platform_adapter and hasattr(platform_adapter, 'close_window'):
result = platform_adapter.close_window(window_name, strict=strict)
if result['status'] == 'success':
return jsonify(result)
else:
return jsonify(result), 404
else:
return jsonify({
'status': 'error',
'message': f'Window closing not supported on {platform_name}'
}), 501
except Exception as e:
logger.error(f"Window closing failed: {str(e)}")
return jsonify({'status': 'error', 'message': str(e)}), 500
@app.route('/window_size', methods=['POST'])
def get_window_size():
"""Get window size"""
try:
width, height = screenshot_helper.get_screen_size()
return jsonify({
'status': 'success',
'width': width,
'height': height
})
except Exception as e:
return jsonify({'status': 'error', 'message': str(e)}), 500
@app.route('/wallpaper', methods=['POST'])
@app.route('/setup/change_wallpaper', methods=['POST'])
def set_wallpaper():
"""Set wallpaper"""
data = request.json
image_path = data.get('path')
if not image_path:
return jsonify({'status': 'error', 'message': 'path required'}), 400
try:
if platform_adapter and hasattr(platform_adapter, 'set_wallpaper'):
result = platform_adapter.set_wallpaper(image_path)
if result['status'] == 'success':
return jsonify(result)
else:
return jsonify(result), 400
else:
return jsonify({
'status': 'error',
'message': f'Wallpaper setting not supported on {platform_name}'
}), 501
except Exception as e:
logger.error(f"Failed to set wallpaper: {str(e)}")
return jsonify({'status': 'error', 'message': str(e)}), 500
# Screen Recording
@app.route('/start_recording', methods=['POST'])
def start_recording():
"""Start screen recording (supports Linux, macOS, Windows)"""
global recording_process
# Check if platform adapter supports recording
if not platform_adapter or not hasattr(platform_adapter, 'start_recording'):
return jsonify({
'status': 'error',
'message': f'Recording not supported on {platform_name}'
}), 501
# Check if recording is already in progress
if recording_process and recording_process.poll() is None:
return jsonify({
'status': 'error',
'message': 'Recording is already in progress.'
}), 400
# Clean up old recording file
if os.path.exists(recording_path):
try:
os.remove(recording_path)
except OSError as e:
logger.error(f"Cannot delete old recording file: {e}")
try:
# Use platform adapter to start recording
result = platform_adapter.start_recording(recording_path)
if result['status'] == 'success':
recording_process = result.get('process')
logger.info("Recording started successfully")
return jsonify({
'status': 'success',
'message': 'Recording started'
})
else:
logger.error(f"Failed to start recording: {result.get('message', 'Unknown error')}")
return jsonify({
'status': 'error',
'message': result.get('message', 'Failed to start recording')
}), 500
except Exception as e:
logger.error(f"Failed to start recording: {str(e)}")
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route('/end_recording', methods=['POST'])
def end_recording():
"""End screen recording (supports Linux, macOS, Windows)"""
global recording_process
# Check if recording is in progress
if not recording_process or recording_process.poll() is not None:
recording_process = None
return jsonify({
'status': 'error',
'message': 'No recording in progress'
}), 400
try:
# Use platform adapter to stop recording
if platform_adapter and hasattr(platform_adapter, 'stop_recording'):
result = platform_adapter.stop_recording(recording_process)
recording_process = None
if result['status'] != 'success':
logger.error(f"Failed to stop recording: {result.get('message', 'Unknown error')}")
return jsonify(result), 500
else:
# Fallback: terminate process directly
recording_process.send_signal(signal.SIGINT)
try:
recording_process.wait(timeout=15)
except subprocess.TimeoutExpired:
logger.warning("ffmpeg not responding, force terminating")
recording_process.kill()
recording_process.wait()
recording_process = None
# Check if recording file exists
# wait for ffmpeg to write the file header
for _ in range(10):
if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
break
time.sleep(0.5)
if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
logger.info("Recording ended, file saved")
return send_file(recording_path, as_attachment=True)
else:
logger.error("Recording file is missing or empty")
return abort(500, description="Recording file is missing or empty")
except Exception as e:
logger.error(f"Failed to end recording: {str(e)}")
if recording_process:
try:
recording_process.kill()
recording_process.wait()
except:
pass
recording_process = None
return jsonify({
'status': 'error',
'message': str(e)
}), 500
@app.route('/terminal', methods=['GET'])
def get_terminal_output():
"""Get terminal output (supports Linux, macOS, Windows)"""
try:
if platform_adapter and hasattr(platform_adapter, 'get_terminal_output'):
output = platform_adapter.get_terminal_output()
if output:
return jsonify({'output': output, 'status': 'success'})
else:
return jsonify({
'status': 'error',
'message': f'No terminal output available on {platform_name}',
'platform_note': 'Make sure a terminal window is open and active'
}), 404
else:
return jsonify({
'status': 'error',
'message': f'Terminal output not supported on {platform_name}'
}), 501
except Exception as e:
logger.error(f"Failed to get terminal output: {str(e)}")
return jsonify({'status': 'error', 'message': str(e)}), 500
@app.route("/setup/upload", methods=["POST"])
def upload_file():
"""Upload file"""
if 'file' not in request.files:
return jsonify({'status': 'error', 'message': 'No file provided'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'status': 'error', 'message': 'No file selected'}), 400
try:
# Get target path
target_path = request.form.get('path', os.path.expanduser('~/Desktop'))
target_path = os.path.expanduser(target_path)
# Ensure directory exists
os.makedirs(target_path, exist_ok=True)
# Save file
file_path = os.path.join(target_path, file.filename)
file.save(file_path)
logger.info(f"File uploaded successfully: {file_path}")
return jsonify({
'status': 'success',
'path': file_path,
'message': 'File uploaded successfully'
})
except Exception as e:
logger.error(f"File upload failed: {str(e)}")
return jsonify({'status': 'error', 'message': str(e)}), 500
@app.route("/setup/download_file", methods=["POST"])
def download_file():
"""Download file"""
data = request.json
path = data.get('path')
if not path:
return jsonify({'status': 'error', 'message': 'path required'}), 400
try:
path = os.path.expanduser(path)
if not os.path.exists(path):
return jsonify({'status': 'error', 'message': f'File not found: {path}'}), 404
return send_file(path, as_attachment=True)
except Exception as e:
logger.error(f"File download failed: {str(e)}")
return jsonify({'status': 'error', 'message': str(e)}), 500
@app.route("/setup/open_file", methods=['POST'])
def open_file():
"""Open file (using system default application)"""
data = request.json
path = data.get('path')
if not path:
return jsonify({'status': 'error', 'message': 'path required'}), 400
try:
path = os.path.expanduser(path)
if not os.path.exists(path):
return jsonify({'status': 'error', 'message': f'File not found: {path}'}), 404
if platform_name == "Darwin":
subprocess.Popen(['open', path])
elif platform_name == "Linux":
subprocess.Popen(['xdg-open', path])
elif platform_name == "Windows":
os.startfile(path)
logger.info(f"File opened successfully: {path}")
return jsonify({
'status': 'success',
'message': f'File opened: {path}'
})
except Exception as e:
logger.error(f"File opening failed: {str(e)}")
return jsonify({'status': 'error', 'message': str(e)}), 500
def print_banner(host: str = "127.0.0.1", port: int = 5000, debug: bool = False):
"""Print startup banner with server information"""
from anytool.utils.display import print_banner as display_banner, print_section, print_separator, colorize
# STARTUP INFORMATION
display_banner("AnyTool · Local Server")
server_url = f"http://{host}:{port}"
# Server section
info_lines = [
colorize(server_url, 'g', bold=True),
]
if host == '0.0.0.0':
info_lines.append(f"{colorize('Listening on all interfaces', 'gr')} {colorize('(0.0.0.0:' + str(port) + ')', 'y')}")
info_lines.append(f"{colorize(platform_name, 'gr')} · {colorize('Debug' if debug else 'Production', 'y' if debug else 'g')}")
print_section("Server", info_lines)
print()
print_separator()
print(f" {colorize('Press Ctrl+C to stop', 'gr')}")
print()
def run_health_check_async():
"""Asynchronous running health check"""
def _run():
from anytool.utils.display import colorize
time.sleep(2)
print(colorize("\n - Starting health check...\n", 'c', bold=True))
results = health_checker.check_all(test_endpoints=True)
health_checker.print_results(results, show_endpoint_details=False)
summary = health_checker.get_summary()
logger.info(f"Health check completed: {summary['fully_available']}/{summary['total']} fully available")
thread = threading.Thread(target=_run, daemon=True)
thread.start()
def run_server(host: str = "127.0.0.1", port: int = 5000, debug: bool = False):
"""
Start desktop control server
Args:
host: Listening address (127.0.0.1 for local, 0.0.0.0 for all interfaces)
port: Listening port
debug: Debug mode (display detailed logs)
"""
global health_checker
# Initialize health_checker
base_url = f"http://{host if host != '0.0.0.0' else '127.0.0.1'}:{port}"
health_checker = HealthChecker(feature_checker, base_url, auto_cleanup=False)
print_banner(host, port, debug)
if not debug:
run_health_check_async()
app.run(host=host, port=port, debug=debug, threaded=True)
def main():
import argparse
from anytool.config.utils import get_config_value
parser = argparse.ArgumentParser(
description='AnyTool Local Server - Desktop Control Server'
)
parser.add_argument('--host', type=str, default='127.0.0.1',
help='Server host (default: 127.0.0.1)')
parser.add_argument('--port', type=int, default=5000,
help='Server port (default: 5000)')
parser.add_argument('--debug', action='store_true',
help='Enable debug mode')
parser.add_argument('--config', type=str,
help='Path to config.json file')
args = parser.parse_args()
config_path = args.config
if not config_path:
config_path = os.path.join(os.path.dirname(__file__), 'config.json')
if os.path.exists(config_path):
try:
with open(config_path, 'r') as f:
config = json.load(f)
server_config = get_config_value(config, 'server', {})
host = args.host if args.host != '127.0.0.1' else get_config_value(server_config, 'host', '127.0.0.1')
port = args.port if args.port != 5000 else get_config_value(server_config, 'port', 5000)
debug = args.debug or get_config_value(server_config, 'debug', False)
run_server(host=host, port=port, debug=debug)
except Exception as e:
logger.error(f"Failed to load config: {e}")
run_server(host=args.host, port=args.port, debug=args.debug)
else:
run_server(host=args.host, port=args.port, debug=args.debug)
if __name__ == "__main__":
main()
================================================
FILE: anytool/local_server/platform_adapters/__init__.py
================================================
import platform
from typing import Optional, Any
platform_name = platform.system()
if platform_name == "Darwin":
try:
from .macos_adapter import MacOSAdapter as PlatformAdapter
ADAPTER_AVAILABLE = True
except ImportError:
PlatformAdapter = None
ADAPTER_AVAILABLE = False
elif platform_name == "Linux":
try:
from .linux_adapter import LinuxAdapter as PlatformAdapter
ADAPTER_AVAILABLE = True
except ImportError:
PlatformAdapter = None
ADAPTER_AVAILABLE = False
elif platform_name == "Windows":
try:
from .windows_adapter import WindowsAdapter as PlatformAdapter
ADAPTER_AVAILABLE = True
except ImportError:
PlatformAdapter = None
ADAPTER_AVAILABLE = False
else:
PlatformAdapter = None
ADAPTER_AVAILABLE = False
def get_platform_adapter() -> Optional[Any]:
if ADAPTER_AVAILABLE and PlatformAdapter:
return PlatformAdapter()
return None
__all__ = ["PlatformAdapter", "get_platform_adapter", "ADAPTER_AVAILABLE"]
================================================
FILE: anytool/local_server/platform_adapters/linux_adapter.py
================================================
import subprocess
import os
from typing import Dict, Any, Optional, List
from anytool.utils.logging import Logger
from PIL import Image
import pyautogui
try:
import pyatspi
from pyatspi import Accessible, StateType, STATE_SHOWING
import Xlib
from Xlib import display, X
LINUX_LIBS_AVAILABLE = True
except ImportError:
LINUX_LIBS_AVAILABLE = False
logger = Logger.get_logger(__name__)
class LinuxAdapter:
def __init__(self):
if not LINUX_LIBS_AVAILABLE:
logger.warning("Linux libraries are not fully installed, some features may not be available")
self.available = LINUX_LIBS_AVAILABLE
def capture_screenshot_with_cursor(self, output_path: str) -> bool:
"""
Use pyautogui + pyxcursor to capture screenshot (including cursor)
Args:
output_path: Output file path
Returns:
Whether the screenshot is successful
"""
try:
# Use pyautogui to capture screenshot
screenshot = pyautogui.screenshot()
# Try to add cursor
try:
# Import pyxcursor (should be in the same directory)
import sys
import os
sys.path.insert(0, os.path.dirname(__file__))
from pyxcursor import Xcursor
cursor_obj = Xcursor()
imgarray = cursor_obj.getCursorImageArrayFast()
cursor_img = Image.fromarray(imgarray)
cursor_x, cursor_y = pyautogui.position()
screenshot.paste(cursor_img, (cursor_x, cursor_y), cursor_img)
logger.info("Linux screenshot successfully (with cursor)")
except Exception as e:
logger.warning(f"Failed to add cursor to screenshot: {e}")
logger.info("Linux screenshot successfully (without cursor)")
screenshot.save(output_path)
return True
except Exception as e:
logger.error(f"Linux screenshot failed: {e}")
return False
def activate_window(self, window_name: str, strict: bool = False, by_class: bool = False) -> Dict[str, Any]:
"""
Activate window (Linux uses wmctrl)
Args:
window_name: Window name
strict: Whether to strictly match
by_class: Whether to match by class name
Returns:
Result dictionary
"""
try:
# Build wmctrl command
flags = f"-{'x' if by_class else ''}{'F' if strict else ''}a"
cmd = ["wmctrl", flags, window_name]
subprocess.run(cmd, check=True, timeout=5)
logger.info(f"Linux window activated successfully: {window_name}")
return {'status': 'success', 'message': 'Window activated'}
except subprocess.CalledProcessError as e:
logger.warning(f"wmctrl command execution failed: {e}")
return {'status': 'error', 'message': f'Window {window_name} not found or wmctrl failed'}
except FileNotFoundError:
logger.error("wmctrl not installed, please install: sudo apt install wmctrl")
return {'status': 'error', 'message': 'wmctrl not installed'}
except Exception as e:
logger.error(f"Linux window activation failed: {e}")
return {'status': 'error', 'message': str(e)}
def close_window(self, window_name: str, strict: bool = False, by_class: bool = False) -> Dict[str, Any]:
"""
Close window (Linux uses wmctrl)
Args:
window_name: Window name
strict: Whether to strictly match
by_class: Whether to match by class name
Returns:
Result dictionary
"""
try:
# Build wmctrl command
flags = f"-{'x' if by_class else ''}{'F' if strict else ''}c"
cmd = ["wmctrl", flags, window_name]
subprocess.run(cmd, check=True, timeout=5)
logger.info(f"Linux window closed successfully: {window_name}")
return {'status': 'success', 'message': 'Window closed'}
except subprocess.CalledProcessError as e:
logger.warning(f"wmctrl command execution failed: {e}")
return {'status': 'error', 'message': f'Window {window_name} not found or wmctrl failed'}
except FileNotFoundError:
logger.error("wmctrl not installed")
return {'status': 'error', 'message': 'wmctrl not installed'}
except Exception as e:
logger.error(f"Linux window close failed: {e}")
return {'status': 'error', 'message': str(e)}
def get_accessibility_tree(self, max_depth: int = 10, max_width: int = 50) -> Dict[str, Any]:
"""
Get Linux accessibility tree (using AT-SPI)
Args:
max_depth: Maximum depth
max_width: Maximum number of child elements per level
Returns:
Accessibility tree data
"""
if not LINUX_LIBS_AVAILABLE:
return {'error': 'Linux accessibility libraries not available'}
try:
# Get desktop root node
desktop = pyatspi.Registry.getDesktop(0)
# Serialize accessibility tree
tree = self._serialize_atspi_element(
desktop,
depth=0,
max_depth=max_depth,
max_width=max_width
)
return {
'tree': tree,
'platform': 'Linux'
}
except Exception as e:
logger.error(f"Linux get accessibility tree failed: {e}")
return {'error': str(e)}
def _serialize_atspi_element(
self,
element: Accessible,
depth: int = 0,
max_depth: int = 10,
max_width: int = 50
) -> Optional[Dict[str, Any]]:
"""
Serialize AT-SPI element to dictionary
Args:
element: AT-SPI accessible element
depth: Current depth
max_depth: Maximum depth
max_width: Maximum width
Returns:
Serialized dictionary
"""
if depth > max_depth:
return None
try:
result = {
'depth': depth,
'role': element.getRoleName(),
'name': element.name,
}
# Get states
try:
states = element.getState().get_states()
result['states'] = [StateType._enum_lookup[st].split('_', 1)[1].lower()
for st in states if st in StateType._enum_lookup]
except:
result['states'] = []
# Get attributes
try:
attributes = element.get_attributes()
if attributes:
result['attributes'] = dict(attributes)
except:
result['attributes'] = {}
# Get position and size (if visible)
if STATE_SHOWING in element.getState().get_states():
try:
component = element.queryComponent()
bbox = component.getExtents(pyatspi.XY_SCREEN)
result['position'] = {'x': bbox[0], 'y': bbox[1]}
result['size'] = {'width': bbox[2], 'height': bbox[3]}
except:
pass
# Get text content
try:
text_obj = element.queryText()
text = text_obj.getText(0, text_obj.characterCount)
if text:
result['text'] = text.replace("\ufffc", "").replace("\ufffd", "")
except:
pass
# Recursively get child elements
result['children'] = []
try:
child_count = min(element.childCount, max_width)
for i in range(child_count):
try:
child = element.getChildAtIndex(i)
child_data = self._serialize_atspi_element(
child,
depth + 1,
max_depth,
max_width
)
if child_data:
result['children'].append(child_data)
except Exception as e:
logger.debug(f"Cannot serialize child element {i}: {e}")
continue
except Exception as e:
logger.debug(f"Cannot get child elements: {e}")
return result
except Exception as e:
logger.debug(f"Failed to serialize element (depth={depth}): {e}")
return None
def get_screen_size(self) -> Dict[str, int]:
"""
Get screen size
Returns:
Screen size dictionary
"""
try:
if LINUX_LIBS_AVAILABLE:
d = display.Display()
screen = d.screen()
return {
'width': screen.width_in_pixels,
'height': screen.height_in_pixels
}
else:
# Use pyautogui as fallback
size = pyautogui.size()
return {'width': size.width, 'height': size.height}
except Exception as e:
logger.error(f"Failed to get screen size: {e}")
return {'width': 1920, 'height': 1080} # Default value
def list_windows(self) -> List[Dict[str, Any]]:
"""
List all windows
Returns:
Window list
"""
try:
result = subprocess.run(
['wmctrl', '-l'],
capture_output=True,
text=True,
check=True
)
windows = []
for line in result.stdout.strip().split('\n'):
if line:
parts = line.split(None, 3)
if len(parts) >= 4:
windows.append({
'id': parts[0],
'desktop': parts[1],
'hostname': parts[2],
'title': parts[3]
})
return windows
except FileNotFoundError:
logger.error("wmctrl not installed")
return []
except Exception as e:
logger.error(f"List windows failed: {e}")
return []
def get_terminal_output(self) -> Optional[str]:
"""
Get terminal output (GNOME Terminal)
Returns:
Terminal output content
"""
if not LINUX_LIBS_AVAILABLE:
return None
try:
desktop = pyatspi.Registry.getDesktop(0)
# Find gnome-terminal-server
for app in desktop:
if app.getRoleName() == "application" and app.name == "gnome-terminal-server":
for frame in app:
if frame.getRoleName() == "frame" and frame.getState().contains(pyatspi.STATE_ACTIVE):
# Find terminal component
for component in self._find_terminals(frame):
try:
text_obj = component.queryText()
output = text_obj.getText(0, text_obj.characterCount)
return output.rstrip() if output else None
except:
continue
return None
except Exception as e:
logger.error(f"Failed to get terminal output: {e}")
return None
def _find_terminals(self, element) -> List[Accessible]:
"""Recursively find terminal components"""
terminals = []
try:
if element.getRoleName() == "terminal":
terminals.append(element)
for i in range(element.childCount):
child = element.getChildAtIndex(i)
terminals.extend(self._find_terminals(child))
except:
pass
return terminals
def set_wallpaper(self, image_path: str) -> Dict[str, Any]:
"""
Set desktop wallpaper (GNOME)
Args:
image_path: Image path
Returns:
Result dictionary
"""
try:
image_path = os.path.expanduser(image_path)
image_path = os.path.abspath(image_path)
if not os.path.exists(image_path):
return {'status': 'error', 'message': f'Image not found: {image_path}'}
# Use gsettings to set wallpaper (GNOME)
subprocess.run([
'gsettings', 'set',
'org.gnome.desktop.background',
'picture-uri',
f'file://{image_path}'
], check=True, timeout=5)
logger.info(f"Linux wallpaper set successfully: {image_path}")
return {'status': 'success', 'message': 'Wallpaper set successfully'}
except Exception as e:
logger.error(f"Linux set wallpaper failed: {e}")
return {'status': 'error', 'message': str(e)}
def get_system_info(self) -> Dict[str, Any]:
"""
Get Linux system information
Returns:
System information dictionary
"""
try:
# Get distribution information
try:
with open('/etc/os-release', 'r') as f:
os_info = {}
for line in f:
if '=' in line:
key, value = line.strip().split('=', 1)
os_info[key] = value.strip('"')
distro = os_info.get('PRETTY_NAME', 'Unknown Linux')
except:
distro = 'Unknown Linux'
# Get kernel version
kernel = subprocess.run(
['uname', '-r'],
capture_output=True,
text=True
).stdout.strip()
return {
'platform': 'Linux',
'distro': distro,
'kernel': kernel,
'available': self.available
}
except Exception as e:
logger.error(f"Failed to get system information: {e}")
return {
'platform': 'Linux',
'error': str(e)
}
def start_recording(self, output_path: str) -> Dict[str, Any]:
try:
try:
subprocess.run(['ffmpeg', '-version'],
capture_output=True,
check=True,
timeout=5)
except (subprocess.CalledProcessError, FileNotFoundError):
return {
'status': 'error',
'message': 'ffmpeg not installed. Install with: sudo apt install ffmpeg'
}
try:
if LINUX_LIBS_AVAILABLE:
from Xlib import display as xdisplay
d = xdisplay.Display()
screen_width = d.screen().width_in_pixels
screen_height = d.screen().height_in_pixels
else:
# use pyautogui as fallback
size = pyautogui.size()
screen_width = size.width
screen_height = size.height
except:
screen_width, screen_height = 1920, 1080
command = [
'ffmpeg',
'-y',
'-f', 'x11grab',
'-draw_mouse', '1',
'-s', f'{screen_width}x{screen_height}',
'-i', ':0.0',
'-c:v', 'libx264',
'-preset', 'ultrafast',
'-r', '30',
output_path
]
process = subprocess.Popen(
command,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True
)
import time
time.sleep(1)
if process.poll() is not None:
error_output = process.stderr.read() if process.stderr else "Unknown error"
return {
'status': 'error',
'message': f'Failed to start recording: {error_output}'
}
logger.info(f"Linux recording started: {output_path}")
return {
'status': 'success',
'message': 'Recording started',
'process': process
}
except Exception as e:
logger.error(f"Linux start recording failed: {e}")
return {
'status': 'error',
'message': str(e)
}
def stop_recording(self, process) -> Dict[str, Any]:
try:
import signal
if not process or process.poll() is not None:
return {
'status': 'error',
'message': 'No recording in progress'
}
process.send_signal(signal.SIGINT)
try:
process.wait(timeout=15)
except subprocess.TimeoutExpired:
logger.warning("ffmpeg did not respond to SIGINT, killing process")
process.kill()
process.wait()
logger.info("Linux recording stopped successfully")
return {
'status': 'success',
'message': 'Recording stopped'
}
except Exception as e:
logger.error(f"Linux stop recording failed: {e}")
return {
'status': 'error',
'message': str(e)
}
def get_running_applications(self) -> List[Dict[str, str]]:
"""
Get list of all running applications
Returns:
Application list
"""
try:
import psutil
apps = []
seen_names = set()
for proc in psutil.process_iter(['pid', 'name', 'exe', 'cmdline']):
try:
pinfo = proc.info
name = pinfo['name']
exe = pinfo['exe']
# Skip kernel processes and system daemons
if not exe or name.startswith('['):
continue
# Skip duplicates
if name in seen_names:
continue
seen_names.add(name)
apps.append({
'name': name,
'pid': pinfo['pid'],
'path': exe or '',
'cmdline': ' '.join(pinfo.get('cmdline', []))
})
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
return apps
except ImportError:
logger.warning("psutil not installed, cannot get running applications")
return []
except Exception as e:
logger.error(f"Failed to get running applications list: {e}")
return []
================================================
FILE: anytool/local_server/platform_adapters/macos_adapter.py
================================================
import subprocess
import os
from typing import Dict, Any, Optional, List
from anytool.utils.logging import Logger
try:
import AppKit
import atomacos
MACOS_LIBS_AVAILABLE = True
except ImportError:
MACOS_LIBS_AVAILABLE = False
logger = Logger.get_logger(__name__)
_warning_shown = False
class MacOSAdapter:
def __init__(self):
global _warning_shown
if not MACOS_LIBS_AVAILABLE and not _warning_shown:
logger.warning("macOS libraries are not fully installed, some features may not be available")
logger.info("To install missing libraries, run: pip install pyobjc-framework-Cocoa atomacos")
_warning_shown = True
self.available = MACOS_LIBS_AVAILABLE
def capture_screenshot_with_cursor(self, output_path: str) -> bool:
"""
Capture screenshot with cursor using macOS native screencapture command
Args:
output_path: Output file path
Returns:
Whether successful
"""
try:
# -C parameter includes cursor, -x disables sound, -m captures main display
subprocess.run(["screencapture", "-C", "-x", "-m", output_path], check=True)
logger.info(f"macOS screenshot successfully: {output_path}")
return True
except Exception as e:
logger.error(f"macOS screenshot failed: {e}")
return False
def activate_window(self, window_name: str, strict: bool = False) -> Dict[str, Any]:
"""
Activate window (macOS uses AppleScript)
Args:
window_name: Window name or application name
strict: Whether to strictly match
Returns:
Result dictionary
"""
try:
# Try to activate application
script = f'''
tell application "System Events"
set appName to "{window_name}"
try
-- Try to activate application by name
set frontmost of first process whose name is appName to true
return "success"
on error
-- Try to find window by title
set foundWindow to false
repeat with theProcess in (every process whose visible is true)
try
tell theProcess
repeat with theWindow in windows
if name of theWindow contains appName then
set frontmost of theProcess to true
set foundWindow to true
exit repeat
end if
end repeat
end tell
end try
if foundWindow then exit repeat
end repeat
if foundWindow then
return "success"
else
return "not found"
end if
end try
end tell
'''
result = subprocess.run(
['osascript', '-e', script],
capture_output=True,
text=True,
timeout=10
)
if "success" in result.stdout:
logger.info(f"macOS window activated successfully: {window_name}")
return {'status': 'success', 'message': 'Window activated'}
else:
logger.warning(f"macOS window not found: {window_name}")
return {'status': 'error', 'message': f'Window {window_name} not found'}
except Exception as e:
logger.error(f"macOS window activation failed: {e}")
return {'status': 'error', 'message': str(e)}
def close_window(self, window_name: str, strict: bool = False) -> Dict[str, Any]:
"""
Close window or application (macOS uses AppleScript)
Args:
window_name: Window name or application name
strict: Whether to strictly match
Returns:
Result dictionary
"""
try:
# Try to exit application
script = f'''
tell application "{window_name}"
quit
end tell
'''
subprocess.run(['osascript', '-e', script], check=True, timeout=5)
logger.info(f"macOS window/application closed successfully: {window_name}")
return {'status': 'success', 'message': 'Window/Application closed'}
except subprocess.TimeoutExpired:
# If timeout, try to force terminate
try:
script_force = f'''
tell application "{window_name}"
quit
end tell
do shell script "killall '{window_name}'"
'''
subprocess.run(['osascript', '-e', script_force], timeout=5)
logger.info(f"macOS application force closed: {window_name}")
return {'status': 'success', 'message': 'Application force closed'}
except Exception as e2:
logger.error(f"macOS force close failed: {e2}")
return {'status': 'error', 'message': str(e2)}
except Exception as e:
logger.error(f"macOS close window failed: {e}")
return {'status': 'error', 'message': str(e)}
def get_accessibility_tree(self, max_depth: int = 10) -> Dict[str, Any]:
"""
Get macOS accessibility tree
Args:
max_depth: Maximum depth
Returns:
Accessibility tree data
"""
if not MACOS_LIBS_AVAILABLE:
return {'error': 'macOS accessibility libraries not available'}
try:
# Get frontmost application
workspace = AppKit.NSWorkspace.sharedWorkspace()
active_app = workspace.activeApplication()
if not active_app:
return {'error': 'No active application'}
app_name = active_app.get('NSApplicationName', 'Unknown')
bundle_id = active_app.get('NSApplicationBundleIdentifier', '')
logger.info(f"Getting accessibility tree: {app_name} ({bundle_id})")
# Use atomacos to get application reference
try:
if bundle_id:
app_ref = atomacos.getAppRefByBundleId(bundle_id)
else:
# If no bundle_id, try to find by name
return {'error': 'Cannot find application without bundle ID'}
# Serialize accessibility tree
tree = self._serialize_ax_element(app_ref, depth=0, max_depth=max_depth)
return {
'app_name': app_name,
'bundle_id': bundle_id,
'tree': tree,
'platform': 'macOS'
}
except Exception as e:
logger.error(f"Cannot get app reference: {e}")
return {
'error': f'Cannot get app reference: {e}',
'app_name': app_name,
'bundle_id': bundle_id
}
except Exception as e:
logger.error(f"macOS get accessibility tree failed: {e}")
return {'error': str(e)}
def _serialize_ax_element(self, element, depth: int = 0, max_depth: int = 10) -> Optional[Dict[str, Any]]:
"""
Serialize macOS accessibility element to dictionary
Args:
element: AX element
depth: Current depth
max_depth: Maximum depth
Returns:
Serialized dictionary
"""
if depth > max_depth:
return None
try:
result = {
'depth': depth
}
# Get common attributes
try:
result['role'] = element.AXRole if hasattr(element, 'AXRole') else 'unknown'
except:
result['role'] = 'unknown'
try:
result['title'] = element.AXTitle if hasattr(element, 'AXTitle') else ''
except:
result['title'] = ''
try:
result['description'] = element.AXDescription if hasattr(element, 'AXDescription') else ''
except:
result['description'] = ''
try:
result['value'] = str(element.AXValue) if hasattr(element, 'AXValue') else ''
except:
result['value'] = ''
try:
result['enabled'] = element.AXEnabled if hasattr(element, 'AXEnabled') else False
except:
result['enabled'] = False
try:
result['focused'] = element.AXFocused if hasattr(element, 'AXFocused') else False
except:
result['focused'] = False
# Position and size
try:
if hasattr(element, 'AXPosition'):
pos = element.AXPosition
result['position'] = {'x': pos.x, 'y': pos.y}
except:
pass
try:
if hasattr(element, 'AXSize'):
size = element.AXSize
result['size'] = {'width': size.width, 'height': size.height}
except:
pass
# Recursively get child elements (with limit)
result['children'] = []
try:
if hasattr(element, 'AXChildren') and element.AXChildren:
for i, child in enumerate(element.AXChildren[:30]): # Limit to max 30 child elements
try:
child_data = self._serialize_ax_element(child, depth + 1, max_depth)
if child_data:
result['children'].append(child_data)
except Exception as e:
logger.debug(f"Cannot serialize child element {i}: {e}")
continue
except Exception as e:
logger.debug(f"Cannot get child elements: {e}")
return result
except Exception as e:
logger.debug(f"Failed to serialize element (depth={depth}): {e}")
return None
def get_running_applications(self) -> List[Dict[str, str]]:
"""
Get list of all running applications
Returns:
Application list
"""
try:
workspace = AppKit.NSWorkspace.sharedWorkspace()
running_apps = workspace.runningApplications()
apps = []
for app in running_apps:
if app.activationPolicy() == AppKit.NSApplicationActivationPolicyRegular:
apps.append({
'name': app.localizedName() or 'Unknown',
'bundle_id': app.bundleIdentifier() or '',
'pid': app.processIdentifier(),
'active': app.isActive()
})
return apps
except Exception as e:
logger.error(f"Failed to get running applications list: {e}")
return []
def set_wallpaper(self, image_path: str) -> Dict[str, Any]:
"""
Set desktop wallpaper
Args:
image_path: Image path
Returns:
Result dictionary
"""
try:
image_path = os.path.expanduser(image_path)
if not os.path.exists(image_path):
return {'status': 'error', 'message': f'Image not found: {image_path}'}
# Use AppleScript to set wallpaper
script = f'''
tell application "System Events"
tell every desktop
set picture to "{image_path}"
end tell
end tell
'''
subprocess.run(['osascript', '-e', script], check=True, timeout=10)
logger.info(f"macOS wallpaper set successfully: {image_path}")
return {'status': 'success', 'message': 'Wallpaper set successfully'}
except Exception as e:
logger.error(f"macOS set wallpaper failed: {e}")
return {'status': 'error', 'message': str(e)}
def get_system_info(self) -> Dict[str, Any]:
"""
Get macOS system information
Returns:
System information dictionary
"""
try:
# Get macOS version
version = subprocess.run(
['sw_vers', '-productVersion'],
capture_output=True,
text=True
).stdout.strip()
# Get hardware information
model = subprocess.run(
['sysctl', '-n', 'hw.model'],
capture_output=True,
text=True
).stdout.strip()
return {
'platform': 'macOS',
'version': version,
'model': model,
'available': self.available
}
except Exception as e:
logger.error(f"Failed to get system information: {e}")
return {
'platform': 'macOS',
'error': str(e)
}
def _detect_screen_device(self) -> str:
"""
Return the screen device number of avfoundation, like '1:none'
On macOS, ffmpeg -f avfoundation -list_devices true -i "" will list all devices:
- AVFoundation video devices (usually the camera is [0])
- AVFoundation audio devices
- The screen capture device usually displays as "Capture screen X", numbered from [1]
"""
try:
probe = subprocess.run(
['ffmpeg', '-f', 'avfoundation', '-list_devices', 'true', '-i', ''],
stderr=subprocess.PIPE, text=True, timeout=5
)
# Find all "Capture screen" devices
screen_devices = []
for line in probe.stderr.splitlines():
# Match lines like "[AVFoundation indev @ 0x...] [1] Capture screen 0"
if 'Capture screen' in line and '[AVFoundation' in line:
# Extract device number from square brackets
import re
# Find pattern like "] [number] Capture screen"
match = re.search(r'\]\s*\[(\d+)\]\s*Capture screen', line)
if match:
device_id = match.group(1)
screen_devices.append(device_id)
logger.info(f"Found screen capture device: {device_id} - {line.strip()}")
# Use first found screen capture device
if screen_devices:
device = f'{screen_devices[0]}:none'
logger.info(f"Using screen capture device: {device}")
return device
else:
logger.warning("No screen capture device found, using default '1:none'")
return '1:none' # Usually screen capture is device 1
except Exception as e:
logger.warning(f"Failed to detect screen device: {e}, using default '1:none'")
return '1:none'
def start_recording(self, output_path: str) -> Dict[str, Any]:
try:
# Check if libx264 encoder is available
result = subprocess.run(
['ffmpeg', '-encoders'],
capture_output=True,
text=True,
timeout=5
)
has_libx264 = 'libx264' in result.stdout
# Get screen resolution
try:
if MACOS_LIBS_AVAILABLE:
from AppKit import NSScreen
screen = NSScreen.mainScreen()
frame = screen.frame()
width = int(frame.size.width)
height = int(frame.size.height)
logger.info(f"Screen resolution: {width}x{height}")
else:
width, height = 1920, 1080
logger.info(f"Using default resolution: {width}x{height}")
except:
width, height = 1920, 1080
logger.info(f"Using default resolution: {width}x{height}")
# Detect screen capture device
screen_dev = self._detect_screen_device()
logger.info(f"Screen capture device: {screen_dev}")
# Build ffmpeg command
command = [
'ffmpeg', '-y',
'-f', 'avfoundation',
'-capture_cursor', '1',
'-capture_mouse_clicks', '1',
'-framerate', '30',
'-i', screen_dev, # Use detected screen device
]
if has_libx264:
command.extend(['-c:v', 'libx264', '-pix_fmt', 'yuv420p'])
logger.info("Using libx264 encoder")
else:
command.extend(['-c:v', 'mpeg4'])
logger.info("Using mpeg4 encoder")
command.extend(['-r', '30', output_path])
logger.info(f"Starting recording with command: {' '.join(command)}")
process = subprocess.Popen(
command,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True
)
import time
time.sleep(1.5) # Wait for a longer time to ensure ffmpeg starts
# Check if process exited early
if process.poll() is not None:
err = process.stderr.read() if process.stderr else ""
logger.error(f"FFmpeg exited early with stderr: {err}")
if "Operation not permitted" in err or "Screen Recording" in err:
return {
"status": "error",
"message": "Screen-recording permission denied. Please grant permission in System Settings → Privacy & Security → Screen Recording."
}
# Check if it's a device error
if "Input/output error" in err or "Invalid argument" in err or "does not exist" in err:
return {
"status": "error",
"message": f"Invalid screen capture device. Please ensure screen recording is enabled. Error: {err[:200]}"
}
error_output = err or "Unknown error"
return {
'status': 'error',
'message': f'Failed to start recording: {error_output[:300]}'
}
logger.info(f"macOS recording started successfully: {output_path}")
return {
'status': 'success',
'message': 'Recording started',
'process': process
}
except Exception as e:
logger.error(f"macOS start recording failed: {e}")
return {
'status': 'error',
'message': str(e)
}
def stop_recording(self, process) -> Dict[str, Any]:
try:
import signal
import time
if not process or process.poll() is not None:
return {
'status': 'error',
'message': 'No recording in progress'
}
try:
process.stdin.write('q')
process.stdin.flush()
logger.info("Sent 'q' command to ffmpeg")
process.wait(timeout=5)
logger.info("ffmpeg exited gracefully")
time.sleep(0.2) # give ffmpeg time to flush the file
except subprocess.TimeoutExpired:
logger.warning("ffmpeg did not respond to 'q', trying SIGINT")
process.send_signal(signal.SIGINT)
try:
process.wait(timeout=20)
logger.info("ffmpeg responded to SIGINT")
except subprocess.TimeoutExpired:
logger.warning("ffmpeg did not respond to SIGINT, killing process")
process.kill()
process.wait()
except Exception as e:
logger.warning(f"Failed to send 'q': {e}, trying SIGINT")
process.send_signal(signal.SIGINT)
try:
process.wait(timeout=20)
except subprocess.TimeoutExpired:
logger.warning("Killing ffmpeg")
process.kill()
process.wait()
time.sleep(0.5)
logger.info("macOS recording stopped successfully")
return {
'status': 'success',
'message': 'Recording stopped'
}
except Exception as e:
logger.error(f"macOS stop recording failed: {e}")
return {
'status': 'error',
'message': str(e)
}
def list_windows(self) -> List[Dict[str, Any]]:
"""
List all windows
Returns:
Window list
"""
try:
# Use AppleScript to get window list
script = '''
tell application "System Events"
set windowList to {}
repeat with theProcess in (every process whose visible is true)
try
set processName to name of theProcess
tell theProcess
repeat with theWindow in windows
try
set windowTitle to name of theWindow
set windowInfo to {processName, windowTitle}
set end of windowList to windowInfo
end try
end repeat
end tell
end try
end repeat
return windowList
end tell
'''
result = subprocess.run(
['osascript', '-e', script],
capture_output=True,
text=True,
timeout=10
)
windows = []
if result.returncode == 0 and result.stdout:
# Parse AppleScript output: "app1, window1, app2, window2"
output = result.stdout.strip()
if output:
# AppleScript returns comma-separated list
items = [item.strip() for item in output.split(',')]
# Group by pairs (app, window)
for i in range(0, len(items), 2):
if i + 1 < len(items):
windows.append({
'app_name': items[i],
'window_title': items[i + 1]
})
return windows
except Exception as e:
logger.error(f"List windows failed: {e}")
return []
def get_terminal_output(self) -> Optional[str]:
"""
Get terminal output (macOS Terminal.app or iTerm2)
Returns:
Terminal output content
"""
try:
# Try to get Terminal.app output first
script = '''
tell application "Terminal"
if (count of windows) > 0 then
try
set currentTab to selected tab of front window
set terminalOutput to contents of currentTab
return terminalOutput
on error
return ""
end try
else
return ""
end if
end tell
'''
result = subprocess.run(
['osascript', '-e', script],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0 and result.stdout:
output = result.stdout.strip()
if output:
return output
# Try iTerm2 if Terminal.app failed
iterm_script = '''
tell application "iTerm"
if (count of windows) > 0 then
try
tell current session of current window
set terminalOutput to contents
return terminalOutput
end tell
on error
return ""
end try
else
return ""
end if
end tell
'''
result = subprocess.run(
['osascript', '-e', iterm_script],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0 and result.stdout:
output = result.stdout.strip()
if output:
return output
return None
except Exception as e:
logger.error(f"Failed to get terminal output: {e}")
return None
================================================
FILE: anytool/local_server/platform_adapters/pyxcursor.py
================================================
import os
import ctypes
import ctypes.util
import numpy as np
# A helper function to convert data from Xlib to byte array.
import struct, array
# Define ctypes version of XFixesCursorImage structure.
PIXEL_DATA_PTR = ctypes.POINTER(ctypes.c_ulong)
Atom = ctypes.c_ulong
class XFixesCursorImage(ctypes.Structure):
"""
See /usr/include/X11/extensions/Xfixes.h
typedef struct {
short x, y;
unsigned short width, height;
unsigned short xhot, yhot;
unsigned long cursor_serial;
unsigned long *pixels;
if XFIXES_MAJOR >= 2
Atom atom; /* Version >= 2 only */
const char *name; /* Version >= 2 only */
endif
} XFixesCursorImage;
"""
_fields_ = [('x', ctypes.c_short),
('y', ctypes.c_short),
('width', ctypes.c_ushort),
('height', ctypes.c_ushort),
('xhot', ctypes.c_ushort),
('yhot', ctypes.c_ushort),
('cursor_serial', ctypes.c_ulong),
('pixels', PIXEL_DATA_PTR),
('atom', Atom),
('name', ctypes.c_char_p)]
class Display(ctypes.Structure):
pass
class Xcursor:
display = None
def __init__(self, display=None):
if not display:
try:
display = os.environ["DISPLAY"].encode("utf-8")
except KeyError:
raise Exception("$DISPLAY not set.")
# XFixeslib = ctypes.CDLL('libXfixes.so')
XFixes = ctypes.util.find_library("Xfixes")
if not XFixes:
raise Exception("No XFixes library found.")
self.XFixeslib = ctypes.cdll.LoadLibrary(XFixes)
# xlib = ctypes.CDLL('libX11.so.6')
x11 = ctypes.util.find_library("X11")
if not x11:
raise Exception("No X11 library found.")
self.xlib = ctypes.cdll.LoadLibrary(x11)
# Define ctypes' version of XFixesGetCursorImage function
XFixesGetCursorImage = self.XFixeslib.XFixesGetCursorImage
XFixesGetCursorImage.restype = ctypes.POINTER(XFixesCursorImage)
XFixesGetCursorImage.argtypes = [ctypes.POINTER(Display)]
self.XFixesGetCursorImage = XFixesGetCursorImage
XOpenDisplay = self.xlib.XOpenDisplay
XOpenDisplay.restype = ctypes.POINTER(Display)
XOpenDisplay.argtypes = [ctypes.c_char_p]
if not self.display:
self.display = self.xlib.XOpenDisplay(display) # (display) or (None)
def argbdata_to_pixdata(self, data, len):
if data == None or len < 1: return None
# Create byte array
b = array.array('b', b'\x00' * 4 * len)
offset, i = 0, 0
while i < len:
argb = data[i] & 0xffffffff
rgba = (argb << 8) | (argb >> 24)
b1 = (rgba >> 24) & 0xff
b2 = (rgba >> 16) & 0xff
b3 = (rgba >> 8) & 0xff
b4 = rgba & 0xff
struct.pack_into("=BBBB", b, offset, b1, b2, b3, b4)
offset = offset + 4
i = i + 1
return b
def getCursorImageData(self):
# Call the function. Read data of cursor/mouse-pointer.
cursor_data = self.XFixesGetCursorImage(self.display)
if not (cursor_data and cursor_data[0]):
raise Exception("Cannot read XFixesGetCursorImage()")
# Note: cursor_data is a pointer, take cursor_data[0]
return cursor_data[0]
def getCursorImageArray(self):
data = self.getCursorImageData()
# x, y = data.x, data.y
height, width = data.height, data.width
bytearr = self.argbdata_to_pixdata(data.pixels, height * width)
imgarray = np.array(bytearr, dtype=np.uint8)
imgarray = imgarray.reshape(height, width, 4)
del bytearr
return imgarray
def getCursorImageArrayFast(self):
data = self.getCursorImageData()
# x, y = data.x, data.y
height, width = data.height, data.width
bytearr = ctypes.cast(data.pixels, ctypes.POINTER(ctypes.c_ulong * height * width))[0]
imgarray = np.array(bytearray(bytearr))
imgarray = imgarray.reshape(height, width, 8)[:, :, (0, 1, 2, 3)]
del bytearr
return imgarray
def saveImage(self, imgarray, text):
from PIL import Image
img = Image.fromarray(imgarray)
img.save(text)
if __name__ == "__main__":
cursor = Xcursor()
imgarray = cursor.getCursorImageArrayFast()
cursor.saveImage(imgarray, 'cursor_image.png')
================================================
FILE: anytool/local_server/platform_adapters/windows_adapter.py
================================================
import os
import ctypes
import subprocess
from typing import Dict, Any, Optional, List
from anytool.utils.logging import Logger
from PIL import Image, ImageGrab
try:
from pywinauto import Desktop
import win32ui
import win32gui
import win32con
import pygetwindow as gw
WINDOWS_LIBS_AVAILABLE = True
except ImportError:
WINDOWS_LIBS_AVAILABLE = False
logger = Logger.get_logger(__name__)
class WindowsAdapter:
"""Windows platform-specific functionality adapter"""
def __init__(self):
if not WINDOWS_LIBS_AVAILABLE:
logger.warning("Windows libraries are not fully installed, some features may not be available")
self.available = WINDOWS_LIBS_AVAILABLE
def capture_screenshot_with_cursor(self, output_path: str) -> bool:
"""
Capture screenshot using ImageGrab (including cursor)
Args:
output_path: Output file path
Returns:
Whether successful
"""
try:
# Use ImageGrab to capture screenshot
img = ImageGrab.grab(bbox=None, include_layered_windows=True)
# Try to add cursor
try:
if WINDOWS_LIBS_AVAILABLE:
cursor, hotspot = self._get_cursor()
if cursor:
# Get scaling ratio
ratio = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100
pos_win = win32gui.GetCursorPos()
pos = (
round(pos_win[0] * ratio - hotspot[0]),
round(pos_win[1] * ratio - hotspot[1])
)
img.paste(cursor, pos, cursor)
logger.info("Windows screenshot successfully (with cursor)")
else:
logger.info("Windows screenshot successfully (without cursor)")
except Exception as e:
logger.warning(f"Cannot add cursor to screenshot: {e}")
logger.info("Windows screenshot successfully (without cursor)")
img.save(output_path)
return True
except Exception as e:
logger.error(f"Windows screenshot failed: {e}")
return False
def _get_cursor(self) -> tuple:
"""
Get current cursor image and hotspot
Returns:
(cursor_image, (hotspot_x, hotspot_y))
"""
try:
hcursor = win32gui.GetCursorInfo()[1]
hdc = win32ui.CreateDCFromHandle(win32gui.GetDC(0))
hbmp = win32ui.CreateBitmap()
hbmp.CreateCompatibleBitmap(hdc, 36, 36)
hdc_compatible = hdc.CreateCompatibleDC()
hdc_compatible.SelectObject(hbmp)
hdc_compatible.DrawIcon((0, 0), hcursor)
bmpinfo = hbmp.GetInfo()
bmpstr = hbmp.GetBitmapBits(True)
cursor = Image.frombuffer(
'RGB',
(bmpinfo['bmWidth'], bmpinfo['bmHeight']),
bmpstr, 'raw', 'BGRX', 0, 1
).convert("RGBA")
win32gui.DestroyIcon(hcursor)
win32gui.DeleteObject(hbmp.GetHandle())
hdc_compatible.DeleteDC()
# Make black pixels transparent
pixdata = cursor.load()
width, height = cursor.size
for y in range(height):
for x in range(width):
if pixdata[x, y] == (0, 0, 0, 255):
pixdata[x, y] = (0, 0, 0, 0)
hotspot = win32gui.GetIconInfo(hcursor)[1:3]
return (cursor, hotspot)
except Exception as e:
logger.debug(f"Failed to get cursor image: {e}")
return (None, (0, 0))
def activate_window(self, window_name: str, strict: bool = False) -> Dict[str, Any]:
"""
Activate window (Windows uses pygetwindow)
Args:
window_name: Window title
strict: Whether to strictly match
Returns:
Result dictionary
"""
if not WINDOWS_LIBS_AVAILABLE:
return {'status': 'error', 'message': 'Windows libraries not available'}
try:
windows = gw.getWindowsWithTitle(window_name)
if not windows:
logger.warning(f"Window not found: {window_name}")
return {'status': 'error', 'message': f'Window {window_name} not found'}
window = None
if strict:
# Strict match
for wnd in windows:
if wnd.title == window_name:
window = wnd
break
if not window:
return {'status': 'error', 'message': f'Window {window_name} not found (strict mode)'}
else:
window = windows[0]
window.activate()
logger.info(f"Windows window activated successfully: {window_name}")
return {'status': 'success', 'message': 'Window activated'}
except Exception as e:
logger.error(f"Windows window activation failed: {e}")
return {'status': 'error', 'message': str(e)}
def close_window(self, window_name: str, strict: bool = False) -> Dict[str, Any]:
"""
Close window (Windows uses pygetwindow)
Args:
window_name: Window title
strict: Whether to strictly match
Returns:
Result dictionary
"""
if not WINDOWS_LIBS_AVAILABLE:
return {'status': 'error', 'message': 'Windows libraries not available'}
try:
windows = gw.getWindowsWithTitle(window_name)
if not windows:
logger.warning(f"Window not found: {window_name}")
return {'status': 'error', 'message': f'Window {window_name} not found'}
window = None
if strict:
for wnd in windows:
if wnd.title == window_name:
window = wnd
break
if not window:
return {'status': 'error', 'message': f'Window {window_name} not found (strict mode)'}
else:
window = windows[0]
window.close()
logger.info(f"Windows window closed successfully: {window_name}")
return {'status': 'success', 'message': 'Window closed'}
except Exception as e:
logger.error(f"Windows window close failed: {e}")
return {'status': 'error', 'message': str(e)}
def get_accessibility_tree(self, max_depth: int = 10, max_width: int = 50) -> Dict[str, Any]:
"""
Get Windows accessibility tree (using pywinauto)
Args:
max_depth: Maximum depth
max_width: Maximum number of child elements per level
Returns:
Accessibility tree data
"""
if not WINDOWS_LIBS_AVAILABLE:
return {'error': 'Windows accessibility libraries not available'}
try:
# Get desktop
desktop = Desktop(backend="uia")
# Serialize accessibility tree
tree = self._serialize_uia_element(
desktop,
depth=0,
max_depth=max_depth,
max_width=max_width,
visited=set()
)
return {
'tree': tree,
'platform': 'Windows'
}
except Exception as e:
logger.error(f"Windows get accessibility tree failed: {e}")
return {'error': str(e)}
def _serialize_uia_element(
self,
element,
depth: int = 0,
max_depth: int = 10,
max_width: int = 50,
visited: set = None
) -> Optional[Dict[str, Any]]:
"""
Serialize Windows UIA element to dictionary
Args:
element: UIA element
depth: Current depth
max_depth: Maximum depth
max_width: Maximum width
visited: Set of visited elements
Returns:
Serialized dictionary
"""
if visited is None:
visited = set()
if depth > max_depth or element in visited:
return None
visited.add(element)
try:
result = {
'depth': depth
}
# Get basic attributes
try:
result['class_name'] = element.class_name()
except:
result['class_name'] = 'unknown'
try:
result['name'] = element.window_text()
except:
result['name'] = ''
# Get states
states = {}
state_methods = [
'is_enabled', 'is_visible', 'is_minimized', 'is_maximized',
'is_focused', 'is_checked', 'is_selected'
]
for method_name in state_methods:
if hasattr(element, method_name):
try:
method = getattr(element, method_name)
states[method_name] = method()
except:
pass
if states:
result['states'] = states
# Get position and size
try:
rectangle = element.rectangle()
result['position'] = {
'left': rectangle.left,
'top': rectangle.top
}
result['size'] = {
'width': rectangle.width(),
'height': rectangle.height()
}
except:
pass
# Recursively get child elements
result['children'] = []
try:
children = element.children()
for i, child in enumerate(children[:max_width]):
try:
child_data = self._serialize_uia_element(
child,
depth + 1,
max_depth,
max_width,
visited
)
if child_data:
result['children'].append(child_data)
except Exception as e:
logger.debug(f"Cannot serialize child element {i}: {e}")
continue
except Exception as e:
logger.debug(f"Cannot get child elements: {e}")
return result
except Exception as e:
logger.debug(f"Failed to serialize element (depth={depth}): {e}")
return None
def list_windows(self) -> List[Dict[str, Any]]:
"""
List all windows
Returns:
Window list
"""
if not WINDOWS_LIBS_AVAILABLE:
return []
try:
windows = gw.getAllWindows()
return [
{
'title': win.title,
'left': win.left,
'top': win.top,
'width': win.width,
'height': win.height,
'visible': win.visible,
'active': win.isActive
}
for win in windows
if win.title # Only return windows with titles
]
except Exception as e:
logger.error(f"List windows failed: {e}")
return []
def set_wallpaper(self, image_path: str) -> Dict[str, Any]:
"""
Set desktop wallpaper
Args:
image_path: Image path
Returns:
Result dictionary
"""
try:
image_path = os.path.expanduser(image_path)
image_path = os.path.abspath(image_path)
if not os.path.exists(image_path):
return {'status': 'error', 'message': f'Image not found: {image_path}'}
# Use Windows API to set wallpaper
SPI_SETDESKWALLPAPER = 20
ctypes.windll.user32.SystemParametersInfoW(
SPI_SETDESKWALLPAPER,
0,
image_path,
3 # SPIF_UPDATEINIFILE | SPIF_SENDCHANGE
)
logger.info(f"Windows wallpaper set successfully: {image_path}")
return {'status': 'success', 'message': 'Wallpaper set successfully'}
except Exception as e:
logger.error(f"Windows set wallpaper failed: {e}")
return {'status': 'error', 'message': str(e)}
def get_system_info(self) -> Dict[str, Any]:
"""
Get Windows system information
Returns:
System information dictionary
"""
try:
import platform as plat
return {
'platform': 'Windows',
'version': plat.version(),
'release': plat.release(),
'edition': plat.win32_edition() if hasattr(plat, 'win32_edition') else 'Unknown',
'available': self.available
}
except Exception as e:
logger.error(f"Failed to get system information: {e}")
return {
'platform': 'Windows',
'error': str(e)
}
def start_recording(self, output_path: str) -> Dict[str, Any]:
try:
try:
result = subprocess.run(['ffmpeg', '-version'],
capture_output=True,
check=True,
timeout=5,
creationflags=subprocess.CREATE_NO_WINDOW)
except (subprocess.CalledProcessError, FileNotFoundError):
return {
'status': 'error',
'message': 'ffmpeg not installed. Download from: https://ffmpeg.org/download.html'
}
try:
user32 = ctypes.windll.user32
width = user32.GetSystemMetrics(0) # SM_CXSCREEN
height = user32.GetSystemMetrics(1) # SM_CYSCREEN
except:
width, height = 1920, 1080
command = [
'ffmpeg',
'-y',
'-f', 'gdigrab',
'-draw_mouse', '1',
'-framerate', '30',
'-video_size', f'{width}x{height}',
'-i', 'desktop',
'-c:v', 'libx264',
'-preset', 'ultrafast',
'-pix_fmt', 'yuv420p',
'-r', '30',
output_path
]
process = subprocess.Popen(
command,
stdout=subprocess.DEVNULL,
stderr=subprocess.PIPE,
text=True,
creationflags=subprocess.CREATE_NO_WINDOW
)
import time
time.sleep(1)
if process.poll() is not None:
error_output = process.stderr.read() if process.stderr else "Unknown error"
return {
'status': 'error',
'message': f'Failed to start recording: {error_output}'
}
logger.info(f"Windows recording started: {output_path}")
return {
'status': 'success',
'message': 'Recording started',
'process': process
}
except Exception as e:
logger.error(f"Windows start recording failed: {e}")
return {
'status': 'error',
'message': str(e)
}
def stop_recording(self, process) -> Dict[str, Any]:
try:
if not process or process.poll() is not None:
return {
'status': 'error',
'message': 'No recording in progress'
}
import signal
try:
process.send_signal(signal.CTRL_C_EVENT)
except:
process.terminate()
try:
process.wait(timeout=15)
except subprocess.TimeoutExpired:
logger.warning("ffmpeg did not respond, killing process")
process.kill()
process.wait()
logger.info("Windows recording stopped successfully")
return {
'status': 'success',
'message': 'Recording stopped'
}
except Exception as e:
logger.error(f"Windows stop recording failed: {e}")
return {
'status': 'error',
'message': str(e)
}
def get_running_applications(self) -> List[Dict[str, str]]:
"""
Get list of all running applications
Returns:
Application list
"""
if not WINDOWS_LIBS_AVAILABLE:
return []
try:
import psutil
apps = []
seen_names = set()
for proc in psutil.process_iter(['pid', 'name', 'exe']):
try:
pinfo = proc.info
name = pinfo['name']
exe = pinfo['exe']
# Skip system processes
if not exe or name in ['System', 'Registry', 'svchost.exe', 'csrss.exe']:
continue
# Skip duplicates
if name in seen_names:
continue
seen_names.add(name)
apps.append({
'name': name,
'pid': pinfo['pid'],
'path': exe or ''
})
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
pass
return apps
except ImportError:
logger.warning("psutil not installed, cannot get running applications")
return []
except Exception as e:
logger.error(f"Failed to get running applications list: {e}")
return []
def get_screen_size(self) -> Dict[str, int]:
"""
Get screen size
Returns:
Screen size dictionary
"""
try:
user32 = ctypes.windll.user32
width = user32.GetSystemMetrics(0) # SM_CXSCREEN
height = user32.GetSystemMetrics(1) # SM_CYSCREEN
return {'width': width, 'height': height}
except Exception as e:
logger.error(f"Failed to get screen size: {e}")
return {'width': 1920, 'height': 1080} # Default value
def get_terminal_output(self) -> Optional[str]:
"""
Get terminal output (Windows Command Prompt, PowerShell, or Windows Terminal)
Note: Due to Windows architecture, getting terminal output is complex.
This method attempts to find active console windows.
Returns:
Terminal output content (limited functionality on Windows)
"""
try:
# Windows doesn't provide easy access to terminal content like Linux/macOS
# This is a limitation of the Windows platform
# We can try to use PowerShell to get recent command history
# Try to get PowerShell history
try:
history_path = os.path.expanduser(
'~\\AppData\\Roaming\\Microsoft\\Windows\\PowerShell\\PSReadLine\\ConsoleHost_history.txt'
)
if os.path.exists(history_path):
with open(history_path, 'r', encoding='utf-8', errors='ignore') as f:
# Get last 50 lines
lines = f.readlines()
recent_history = ''.join(lines[-50:])
if recent_history:
return f"PowerShell History (last 50 commands):\n{recent_history}"
except Exception as e:
logger.debug(f"Cannot read PowerShell history: {e}")
# Try to get Command Prompt history using doskey
try:
result = subprocess.run(
['doskey', '/history'],
capture_output=True,
text=True,
timeout=2,
creationflags=subprocess.CREATE_NO_WINDOW
)
if result.returncode == 0 and result.stdout:
return f"Command Prompt History:\n{result.stdout}"
except Exception as e:
logger.debug(f"Cannot get Command Prompt history: {e}")
logger.warning("Windows terminal output is limited - only command history available")
return None
except Exception as e:
logger.error(f"Failed to get terminal output: {e}")
return None
================================================
FILE: anytool/local_server/requirements.txt
================================================
# Local server dependencies (cross-platform)
flask>=3.1.0
pyautogui>=0.9.54
pydantic>=2.12.0
requests>=2.32.0
# # macOS-specific dependencies (local server)
# pyobjc-core>=12.0; sys_platform == 'darwin'
# pyobjc-framework-cocoa>=12.0; sys_platform == 'darwin'
# pyobjc-framework-quartz>=12.0; sys_platform == 'darwin'
# atomacos>=3.2.0; sys_platform == 'darwin'
# # Linux-specific dependencies (local server)
# python-xlib>=0.33; sys_platform == 'linux'
# pyatspi>=2.38.0; sys_platform == 'linux'
# numpy>=1.24.0; sys_platform == 'linux'
# # Windows-specific dependencies (local server)
# pywinauto>=0.6.8; sys_platform == 'win32'
# pywin32>=306; sys_platform == 'win32'
# PyGetWindow>=0.0.9; sys_platform == 'win32'
================================================
FILE: anytool/local_server/run.sh
================================================
#!/bin/bash
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$( cd "$SCRIPT_DIR/../.." && pwd )"
# Check Python
if ! command -v python3 &> /dev/null; then
echo "Error: python3 not installed"
exit 1
fi
# Check if dependencies are installed
if ! python3 -c "import flask" &> /dev/null; then
echo "Installing dependencies..."
pip3 install -q -r "$SCRIPT_DIR/requirements.txt" || {
echo "Failed to install dependencies"
exit 1
}
fi
# Set PYTHONPATH and start server
export PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH"
cd "$PROJECT_ROOT"
python3 -m anytool.local_server.main
================================================
FILE: anytool/local_server/utils/__init__.py
================================================
from .accessibility import AccessibilityHelper
from .screenshot import ScreenshotHelper
__all__ = ["AccessibilityHelper", "ScreenshotHelper"]
================================================
FILE: anytool/local_server/utils/accessibility.py
================================================
import platform
from anytool.utils.logging import Logger
from typing import Dict, Any, Optional
logger = Logger.get_logger(__name__)
platform_name = platform.system()
class AccessibilityHelper:
def __init__(self):
self.platform = platform_name
self.adapter = None
try:
if platform_name == "Darwin":
from ..platform_adapters.macos_adapter import MacOSAdapter
self.adapter = MacOSAdapter()
elif platform_name == "Linux":
from ..platform_adapters.linux_adapter import LinuxAdapter
self.adapter = LinuxAdapter()
elif platform_name == "Windows":
from ..platform_adapters.windows_adapter import WindowsAdapter
self.adapter = WindowsAdapter()
except ImportError as e:
logger.warning(f"Failed to import platform adapter: {e}")
def get_tree(self, max_depth: int = 10) -> Dict[str, Any]:
if not self.adapter:
return {
'error': f'No adapter available for {self.platform}',
'platform': self.platform
}
try:
return self.adapter.get_accessibility_tree(max_depth=max_depth)
except Exception as e:
logger.error(f"Failed to get accessibility tree: {e}")
return {
'error': str(e),
'platform': self.platform
}
def is_available(self) -> bool:
return self.adapter is not None and hasattr(self.adapter, 'available') and self.adapter.available
def find_element_by_name(self, tree: Dict[str, Any], name: str) -> Optional[Dict[str, Any]]:
if not tree or 'tree' not in tree:
return None
return self._search_tree(tree['tree'], 'name', name)
def find_element_by_role(self, tree: Dict[str, Any], role: str) -> Optional[Dict[str, Any]]:
if not tree or 'tree' not in tree:
return None
return self._search_tree(tree['tree'], 'role', role)
def _search_tree(self, node: Dict[str, Any], key: str, value: str) -> Optional[Dict[str, Any]]:
if not node:
return None
# Check current node
if key in node and node[key] == value:
return node
# Recursively search child nodes
if 'children' in node:
for child in node['children']:
result = self._search_tree(child, key, value)
if result:
return result
return None
def flatten_tree(self, tree: Dict[str, Any]) -> list:
if not tree or 'tree' not in tree:
return []
result = []
self._flatten_node(tree['tree'], result)
return result
def _flatten_node(self, node: Dict[str, Any], result: list):
"""Recursively flatten nodes"""
if not node:
return
# Add current node (remove children)
node_copy = {k: v for k, v in node.items() if k != 'children'}
result.append(node_copy)
# Recursively process child nodes
if 'children' in node:
for child in node['children']:
self._flatten_node(child, result)
def get_visible_elements(self, tree: Dict[str, Any]) -> list:
all_elements = self.flatten_tree(tree)
visible = []
for element in all_elements:
if self.platform == "Linux":
if 'states' in element and 'showing' in element.get('states', []):
visible.append(element)
elif self.platform == "Darwin":
if element.get('enabled', False):
visible.append(element)
elif self.platform == "Windows":
if element.get('states', {}).get('is_visible', False):
visible.append(element)
return visible
def get_clickable_elements(self, tree: Dict[str, Any]) -> list:
all_elements = self.flatten_tree(tree)
clickable_roles = [
'button', 'push-button', 'toggle-button', 'radio-button',
'link', 'menu-item', 'AXButton', 'AXLink', 'AXMenuItem'
]
clickable = []
for element in all_elements:
role = element.get('role', '').lower()
if any(cr in role for cr in clickable_roles):
clickable.append(element)
return clickable
def get_statistics(self, tree: Dict[str, Any]) -> Dict[str, Any]:
all_elements = self.flatten_tree(tree)
# Count roles
roles = {}
for element in all_elements:
role = element.get('role', 'unknown')
roles[role] = roles.get(role, 0) + 1
return {
'total_elements': len(all_elements),
'visible_elements': len(self.get_visible_elements(tree)),
'clickable_elements': len(self.get_clickable_elements(tree)),
'roles': roles,
'platform': self.platform
}
================================================
FILE: anytool/local_server/utils/screenshot.py
================================================
import platform
import os
import logging
from typing import Optional, Tuple
from PIL import Image
import pyautogui
logger = logging.getLogger(__name__)
platform_name = platform.system()
class ScreenshotHelper:
def __init__(self):
self.platform = platform_name
self.adapter = None
try:
if platform_name == "Darwin":
from ..platform_adapters.macos_adapter import MacOSAdapter
self.adapter = MacOSAdapter()
elif platform_name == "Linux":
from ..platform_adapters.linux_adapter import LinuxAdapter
self.adapter = LinuxAdapter()
elif platform_name == "Windows":
from ..platform_adapters.windows_adapter import WindowsAdapter
self.adapter = WindowsAdapter()
except ImportError as e:
logger.warning(f"Failed to import platform adapter: {e}")
def capture(self, output_path: str, with_cursor: bool = True) -> bool:
try:
# Ensure directory exists
os.makedirs(os.path.dirname(output_path), exist_ok=True)
if with_cursor and self.adapter:
# Use platform-specific method to capture screenshot (with cursor)
return self.adapter.capture_screenshot_with_cursor(output_path)
else:
# Use pyautogui to capture screenshot (without cursor)
screenshot = pyautogui.screenshot()
screenshot.save(output_path)
logger.info(f"Screenshot successfully (without cursor): {output_path}")
return True
except Exception as e:
logger.error(f"Screenshot failed: {e}")
return False
def capture_region(
self,
output_path: str,
x: int,
y: int,
width: int,
height: int
) -> bool:
"""
Capture specified screen region
Args:
output_path: Output path
x: Starting x coordinate
y: Starting y coordinate
width: Width
height: Height
Returns:
Whether successful
"""
try:
os.makedirs(os.path.dirname(output_path), exist_ok=True)
screenshot = pyautogui.screenshot(region=(x, y, width, height))
screenshot.save(output_path)
logger.info(f"Region screenshot successfully: {output_path}")
return True
except Exception as e:
logger.error(f"Region screenshot failed: {e}")
return False
def get_screen_size(self) -> Tuple[int, int]:
"""
Get screen size
Returns:
(width, height)
"""
try:
size = pyautogui.size()
return (size.width, size.height)
except Exception as e:
logger.error(f"Failed to get screen size: {e}")
return (1920, 1080) # Default value
def get_cursor_position(self) -> Tuple[int, int]:
"""
Get cursor position
Returns:
(x, y)
"""
try:
pos = pyautogui.position()
return (pos.x, pos.y)
except Exception as e:
logger.error(f"Failed to get cursor position: {e}")
return (0, 0)
def capture_to_base64(self, with_cursor: bool = True) -> Optional[str]:
"""
Capture screenshot and convert to base64
Args:
with_cursor: Whether to include cursor
Returns:
Base64 encoded image string
"""
import tempfile
import base64
try:
# Create temporary file
with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
tmp_path = tmp.name
# Capture screenshot
if self.capture(tmp_path, with_cursor):
# Read and encode
with open(tmp_path, 'rb') as f:
img_data = f.read()
img_base64 = base64.b64encode(img_data).decode('utf-8')
# Delete temporary file
os.remove(tmp_path)
return img_base64
else:
if os.path.exists(tmp_path):
os.remove(tmp_path)
return None
except Exception as e:
logger.error(f"Failed to convert screenshot to base64: {e}")
return None
def compare_screenshots(self, path1: str, path2: str) -> float:
"""
Compare similarity between two screenshots
Args:
path1: First image path
path2: Second image path
Returns:
Similarity (0-1), 1 means identical
"""
try:
from PIL import ImageChops
import math
import operator
from functools import reduce
img1 = Image.open(path1)
img2 = Image.open(path2)
# Ensure same size
if img1.size != img2.size:
# Resize to same size
img2 = img2.resize(img1.size)
# Calculate difference
diff = ImageChops.difference(img1, img2)
# Calculate statistics
stat = diff.histogram()
sum_of_squares = reduce(
operator.add,
map(lambda h, i: h * (i ** 2), stat, range(len(stat)))
)
# Calculate RMS
rms = math.sqrt(sum_of_squares / float(img1.size[0] * img1.size[1]))
# Normalize to 0-1, RMS max value is approximately 441 (for RGB)
similarity = 1 - (rms / 441.0)
return max(0, min(1, similarity))
except Exception as e:
logger.error(f"Failed to compare screenshots: {e}")
return 0.0
def annotate_screenshot(
self,
input_path: str,
output_path: str,
annotations: list
) -> bool:
"""
Add annotations to screenshot
Args:
input_path: Input image path
output_path: Output image path
annotations: List of annotations, each annotation is a dict:
{'type': 'rectangle'/'text', 'x': int, 'y': int,
'width': int, 'height': int, 'text': str, 'color': tuple}
Returns:
Whether successful
"""
try:
from PIL import ImageDraw, ImageFont
img = Image.open(input_path)
draw = ImageDraw.Draw(img)
for annotation in annotations:
ann_type = annotation.get('type', 'rectangle')
color = annotation.get('color', (255, 0, 0))
if ann_type == 'rectangle':
x = annotation.get('x', 0)
y = annotation.get('y', 0)
width = annotation.get('width', 100)
height = annotation.get('height', 100)
draw.rectangle(
[(x, y), (x + width, y + height)],
outline=color,
width=2
)
elif ann_type == 'text':
x = annotation.get('x', 0)
y = annotation.get('y', 0)
text = annotation.get('text', '')
try:
font = ImageFont.truetype("Arial.ttf", 20)
except:
font = ImageFont.load_default()
draw.text((x, y), text, fill=color, font=font)
img.save(output_path)
logger.info(f"Annotated screenshot successfully: {output_path}")
return True
except Exception as e:
logger.error(f"Failed to annotate screenshot: {e}")
return False
================================================
FILE: anytool/platform/__init__.py
================================================
from .system_info import SystemInfoClient, get_system_info, get_screen_size
from .recording import RecordingClient, RecordingContextManager
from .screenshot import ScreenshotClient, AutoScreenshotWrapper
from .config import get_local_server_config, get_client_base_url
__all__ = [
# System Info
"SystemInfoClient",
"get_system_info",
"get_screen_size",
# Recording
"RecordingClient",
"RecordingContextManager",
# Screenshot
"ScreenshotClient",
"AutoScreenshotWrapper",
# Config
"get_local_server_config",
"get_client_base_url",
]
================================================
FILE: anytool/platform/config.py
================================================
import os
import json
from typing import Dict, Any
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
def get_local_server_config() -> Dict[str, Any]:
"""
Read local server configuration.
Priority:
1. Environment variable LOCAL_SERVER_URL (parsed into host/port)
2. Config file local_server/config.json
3. Defaults (127.0.0.1:5000)
Returns:
Dict with 'host' and 'port' from server config
"""
# Check environment variable first (for OSWorld/remote VM integration)
env_url = os.getenv("LOCAL_SERVER_URL")
if env_url:
try:
# Parse URL like "http://localhost:5000"
from urllib.parse import urlparse
parsed = urlparse(env_url)
host = parsed.hostname or '127.0.0.1'
port = parsed.port or 5000
logger.debug(f"Using LOCAL_SERVER_URL: {host}:{port}")
return {
'host': host,
'port': port,
'debug': False,
}
except Exception as e:
logger.warning(f"Failed to parse LOCAL_SERVER_URL: {e}")
# Find local_server config file
try:
# Try relative path from this file
current_dir = os.path.dirname(__file__)
config_path = os.path.join(current_dir, '../local_server/config.json')
config_path = os.path.abspath(config_path)
if os.path.exists(config_path):
with open(config_path, 'r') as f:
config = json.load(f)
server_config = config.get('server', {})
return {
'host': server_config.get('host', '127.0.0.1'),
'port': server_config.get('port', 5000),
'debug': server_config.get('debug', False),
}
except Exception as e:
logger.debug(f"Failed to read local server config: {e}")
# Return defaults
return {
'host': '127.0.0.1',
'port': 5000,
'debug': False,
}
def get_client_base_url() -> str:
"""
Get base URL for connecting to local server.
Priority:
1. Environment variable LOCAL_SERVER_URL
2. Read from local_server/config.json
3. Default http://localhost:5000
Returns:
Base URL string
"""
# Check environment variable first
env_url = os.getenv("LOCAL_SERVER_URL")
if env_url:
return env_url
# Read from config file
config = get_local_server_config()
host = config['host']
port = config['port']
# Convert 0.0.0.0 to localhost for client
if host == '0.0.0.0':
host = 'localhost'
return f"http://{host}:{port}"
================================================
FILE: anytool/platform/recording.py
================================================
import aiohttp
from typing import Optional
from anytool.utils.logging import Logger
from .config import get_client_base_url
logger = Logger.get_logger(__name__)
class RecordingClient:
"""
Client for screen recording via HTTP API.
This client directly calls the local server's recording endpoints:
- POST /start_recording
- POST /end_recording
"""
def __init__(
self,
base_url: Optional[str] = None,
timeout: int = 30
):
"""
Initialize recording client.
Args:
base_url: Base URL of the local server
(default: read from local_server/config.json or env LOCAL_SERVER_URL)
timeout: Request timeout in seconds
"""
# Get base_url: priority is explicit > env > config file
if base_url is None:
base_url = get_client_base_url()
self.base_url = base_url.rstrip("/")
self.timeout = timeout
self._session: Optional[aiohttp.ClientSession] = None
async def _get_session(self) -> aiohttp.ClientSession:
"""Get or create aiohttp session."""
if self._session is None or self._session.closed:
self._session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=self.timeout)
)
return self._session
async def start_recording(self, auto_cleanup: bool = True) -> bool:
"""
Start screen recording.
Args:
auto_cleanup: If True, automatically end previous recording if one is in progress
"""
try:
session = await self._get_session()
url = f"{self.base_url}/start_recording"
async with session.post(url) as response:
if response.status == 200:
logger.info("Screen recording started")
return True
elif response.status == 400 and auto_cleanup:
# Check if error is due to recording already in progress
error_text = await response.text()
if "already in progress" in error_text.lower():
logger.warning("Recording already in progress, stopping previous recording...")
# Try to end the previous recording
video_bytes = await self.end_recording()
if video_bytes:
logger.info("Previous recording ended successfully, retrying start...")
else:
logger.warning("Failed to end previous recording, but will retry start anyway...")
# Retry starting recording (without auto_cleanup to avoid infinite loop)
return await self.start_recording(auto_cleanup=False)
else:
logger.error(f"Failed to start recording: HTTP {response.status} - {error_text}")
return False
else:
error_text = await response.text()
logger.error(f"Failed to start recording: HTTP {response.status} - {error_text}")
return False
except Exception as e:
logger.error(f"Failed to start recording: {e}")
return False
async def end_recording(self, dest: Optional[str] = None) -> Optional[bytes]:
"""
End screen recording and optionally save to file.
"""
try:
session = await self._get_session()
url = f"{self.base_url}/end_recording"
# Use longer timeout for end_recording (file may be large)
async with session.post(url, timeout=aiohttp.ClientTimeout(total=60)) as response:
if response.status == 200:
video_bytes = await response.read()
# Save to file if destination provided
if dest:
try:
with open(dest, "wb") as f:
f.write(video_bytes)
logger.info(f"Recording saved to: {dest}")
except Exception as e:
logger.error(f"Failed to save recording file: {e}")
return None
logger.info("Screen recording ended")
return video_bytes
else:
error_text = await response.text()
logger.error(f"Failed to end recording: HTTP {response.status} - {error_text}")
return None
except Exception as e:
logger.error(f"Failed to end recording: {e}")
return None
async def close(self):
"""Close the HTTP session."""
if self._session and not self._session.closed:
await self._session.close()
# Give aiohttp time to finish cleanup callbacks
import asyncio
await asyncio.sleep(0.25)
logger.debug("Recording client session closed")
async def __aenter__(self):
"""Context manager entry."""
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
await self.close()
return False
class RecordingContextManager:
def __init__(
self,
base_url: Optional[str] = None,
output_path: Optional[str] = None,
timeout: Optional[int] = None
):
"""
Initialize recording context manager.
Args:
base_url: Base URL of the local server (default: from config)
output_path: Path to save recording (default: from config)
timeout: Request timeout in seconds (default: from config)
"""
# Load output_path from config if not provided
if output_path is None:
try:
from anytool.config import get_config
config = get_config()
if config.recording.screen_recording_path:
output_path = config.recording.screen_recording_path
except Exception:
pass
self.client = RecordingClient(base_url=base_url, timeout=timeout)
self.output_path = output_path
self.recording_started = False
async def __aenter__(self) -> RecordingClient:
"""Start recording on context entry."""
success = await self.client.start_recording()
if success:
self.recording_started = True
logger.info("Recording context started")
else:
logger.warning("Failed to start recording in context")
return self.client
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Stop recording on context exit."""
if self.recording_started:
try:
await self.client.end_recording(dest=self.output_path)
logger.info("Recording context ended")
except Exception as e:
logger.error(f"Failed to end recording in context: {e}")
await self.client.close()
return False
================================================
FILE: anytool/platform/screenshot.py
================================================
"""
Screenshot client for capturing screens via HTTP API.
This module provides a screenshot client that captures screenshots by calling
the local_server's /screenshot endpoint.
Always uses HTTP API (like RecordingClient):
- Local: http://127.0.0.1:5000/screenshot
- Remote: http://remote-vm:5000/screenshot
"""
import aiohttp
from typing import Optional
from anytool.utils.logging import Logger
from .config import get_client_base_url
logger = Logger.get_logger(__name__)
class ScreenshotClient:
def __init__(
self,
base_url: Optional[str] = None,
timeout: int = 10
):
"""
Initialize screenshot client.
Args:
base_url: Base URL of local_server
(default: read from config/env, typically http://127.0.0.1:5000)
timeout: Request timeout (seconds)
"""
# Get base_url from config if not provided
if base_url is None:
base_url = get_client_base_url()
self.base_url = base_url.rstrip("/")
self.timeout = timeout
self._session = None
logger.debug(f"ScreenshotClient initialized: {self.base_url}")
async def _get_session(self) -> aiohttp.ClientSession:
"""Get or create aiohttp session."""
if self._session is None or self._session.closed:
self._session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=self.timeout)
)
return self._session
@staticmethod
def _is_valid_image_response(content_type: str, data: Optional[bytes]) -> bool:
"""
Validate image response using magic bytes.
Args:
content_type: HTTP Content-Type header
data: Response data bytes
Returns:
True if data is valid PNG/JPEG image
"""
if not isinstance(data, (bytes, bytearray)) or not data:
return False
# PNG magic bytes: \x89PNG\r\n\x1a\n
if len(data) >= 8 and data[:8] == b"\x89PNG\r\n\x1a\n":
return True
# JPEG magic bytes: \xff\xd8\xff
if len(data) >= 3 and data[:3] == b"\xff\xd8\xff":
return True
# Fallback to content-type check
if content_type and ("image/png" in content_type or "image/jpeg" in content_type):
return True
return False
async def capture(self) -> Optional[bytes]:
"""
Capture screenshot via HTTP API.
Calls: GET {base_url}/screenshot
Returns:
PNG image bytes, or None on failure
"""
try:
session = await self._get_session()
url = f"{self.base_url}/screenshot"
logger.debug(f"Requesting screenshot: {url}")
async with session.get(url) as response:
if response.status == 200:
content_type = response.headers.get("Content-Type", "")
screenshot_bytes = await response.read()
# Validate image format
if self._is_valid_image_response(content_type, screenshot_bytes):
logger.debug(f"Screenshot captured: {len(screenshot_bytes)} bytes")
return screenshot_bytes
else:
logger.error("Invalid screenshot format received")
return None
else:
error_text = await response.text()
logger.error(f"Failed to capture screenshot: HTTP {response.status} - {error_text}")
return None
except Exception as e:
logger.error(f"Failed to capture screenshot: {e}")
return None
async def capture_to_file(self, output_path: str) -> bool:
try:
screenshot = await self.capture()
if screenshot:
import os
os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
with open(output_path, 'wb') as f:
f.write(screenshot)
logger.info(f"Screenshot saved to: {output_path}")
return True
return False
except Exception as e:
logger.error(f"Failed to save screenshot to file: {e}")
return False
async def get_screen_size(self) -> tuple[int, int]:
"""
Get screen size via HTTP API.
Calls: GET {base_url}/screen_size
Returns:
(width, height)
"""
try:
session = await self._get_session()
url = f"{self.base_url}/screen_size"
async with session.get(url) as response:
if response.status == 200:
data = await response.json()
width = data.get('width', 1920)
height = data.get('height', 1080)
logger.debug(f"Screen size: {width}x{height}")
return (width, height)
else:
logger.warning("Failed to get screen size, using default")
return (1920, 1080)
except Exception as e:
logger.error(f"Failed to get screen size: {e}")
return (1920, 1080)
async def close(self):
"""Close HTTP session."""
if self._session and not self._session.closed:
await self._session.close()
logger.debug("Screenshot client session closed")
async def __aenter__(self):
"""Context manager entry."""
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
await self.close()
return False
class AutoScreenshotWrapper:
"""
Wrapper that automatically captures screenshots after backend calls.
This wrapper can be used to wrap any backend tool/session and automatically
capture screenshots after each operation.
Usage:
# Wrap a backend tool
wrapped_tool = AutoScreenshotWrapper(
tool=gui_tool,
screenshot_client=screenshot_client,
on_screenshot=lambda screenshot: recorder.record_step(...)
)
# Use wrapped tool normally
result = await wrapped_tool.execute(...)
# Screenshot is automatically captured and handled
"""
def __init__(
self,
tool,
screenshot_client: Optional[ScreenshotClient] = None,
on_screenshot=None,
enabled: bool = True
):
"""
Initialize auto-screenshot wrapper.
Args:
tool: The tool/session to wrap
screenshot_client: Screenshot client to use (created if None)
on_screenshot: Callback function(screenshot_bytes) called after each screenshot
enabled: Whether auto-screenshot is enabled
"""
self._tool = tool
self._screenshot_client = screenshot_client or ScreenshotClient()
self._on_screenshot = on_screenshot
self._enabled = enabled
def __getattr__(self, name):
"""Delegate attribute access to wrapped tool."""
return getattr(self._tool, name)
async def _capture_and_notify(self):
"""Capture screenshot and notify callback."""
if not self._enabled:
return
try:
screenshot = await self._screenshot_client.capture()
if screenshot and self._on_screenshot:
await self._on_screenshot(screenshot)
except Exception as e:
logger.warning(f"Failed to auto-capture screenshot: {e}")
async def execute(self, *args, **kwargs):
"""
Execute tool and auto-capture screenshot.
"""
# Execute original method
result = await self._tool.execute(*args, **kwargs)
# Capture screenshot after execution
await self._capture_and_notify()
return result
async def _arun(self, *args, **kwargs):
"""
Run tool and auto-capture screenshot.
"""
# Execute original method
result = await self._tool._arun(*args, **kwargs)
# Capture screenshot after execution
await self._capture_and_notify()
return result
def enable(self):
"""Enable auto-screenshot."""
self._enabled = True
def disable(self):
"""Disable auto-screenshot."""
self._enabled = False
================================================
FILE: anytool/platform/system_info.py
================================================
import aiohttp
from typing import Optional, Dict, Any
from anytool.utils.logging import Logger
from .config import get_client_base_url
logger = Logger.get_logger(__name__)
class SystemInfoClient:
"""
This client provides simple methods to get:
- Platform info (OS, architecture, version, etc.)
- Screen size
- Cursor position
"""
def __init__(
self,
base_url: Optional[str] = None,
timeout: int = 10
):
"""
Initialize system info client.
Args:
base_url: Base URL of the local server
(default: read from local_server/config.json or env LOCAL_SERVER_URL)
timeout: Request timeout in seconds
"""
# Get base_url: priority is explicit > env > config file
if base_url is None:
base_url = get_client_base_url()
self.base_url = base_url.rstrip("/")
self.timeout = timeout
self._session: Optional[aiohttp.ClientSession] = None
self._cached_info: Optional[Dict[str, Any]] = None
async def _get_session(self) -> aiohttp.ClientSession:
"""Get or create aiohttp session."""
if self._session is None or self._session.closed:
self._session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=self.timeout)
)
return self._session
async def get_system_info(self, use_cache: bool = True) -> Optional[Dict[str, Any]]:
"""
Get comprehensive system information.
Returns information including:
- system: OS name (Linux, Darwin, Windows)
- release: OS release version
- version: Detailed version string
- machine: Architecture (x86_64, arm64, etc.)
- processor: Processor type
- Additional platform-specific info
Args:
use_cache: Whether to use cached info (default: True)
"""
# Check cache
if use_cache and self._cached_info:
logger.debug("Using cached system info")
return self._cached_info
try:
session = await self._get_session()
url = f"{self.base_url}/platform"
async with session.get(url) as response:
if response.status == 200:
info = await response.json()
# Cache the result
if use_cache:
self._cached_info = info
logger.debug(f"System info retrieved: {info.get('system')}")
return info
else:
error_text = await response.text()
logger.error(f"Failed to get system info: HTTP {response.status} - {error_text}")
return None
except Exception as e:
logger.error(f"Failed to get system info: {e}")
return None
async def get_screen_size(self) -> Optional[Dict[str, int]]:
"""
Get screen size.
Returns:
Dict with 'width' and 'height', or None on failure
"""
try:
session = await self._get_session()
url = f"{self.base_url}/screen_size"
async with session.get(url) as response:
if response.status == 200:
size = await response.json()
logger.debug(f"Screen size: {size.get('width')}x{size.get('height')}")
return {
"width": size.get("width"),
"height": size.get("height")
}
else:
error_text = await response.text()
logger.error(f"Failed to get screen size: HTTP {response.status} - {error_text}")
return None
except Exception as e:
logger.error(f"Failed to get screen size: {e}")
return None
async def get_cursor_position(self) -> Optional[Dict[str, int]]:
"""
Get current cursor position.
Returns:
Dict with 'x' and 'y', or None on failure
"""
try:
session = await self._get_session()
url = f"{self.base_url}/cursor_position"
async with session.get(url) as response:
if response.status == 200:
pos = await response.json()
return {
"x": pos.get("x"),
"y": pos.get("y")
}
else:
error_text = await response.text()
logger.error(f"Failed to get cursor position: HTTP {response.status} - {error_text}")
return None
except Exception as e:
logger.error(f"Failed to get cursor position: {e}")
return None
def clear_cache(self):
"""Clear cached system information."""
self._cached_info = None
logger.debug("System info cache cleared")
async def close(self):
"""Close the HTTP session."""
if self._session and not self._session.closed:
await self._session.close()
logger.debug("System info client session closed")
async def __aenter__(self):
"""Context manager entry."""
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit."""
await self.close()
return False
async def get_system_info(base_url: Optional[str] = None) -> Optional[Dict[str, Any]]:
async with SystemInfoClient(base_url=base_url) as client:
return await client.get_system_info(use_cache=False)
async def get_screen_size(base_url: Optional[str] = None) -> Optional[Dict[str, int]]:
async with SystemInfoClient(base_url=base_url) as client:
return await client.get_screen_size()
================================================
FILE: anytool/prompts/__init__.py
================================================
from anytool.prompts.grounding_agent_prompts import GroundingAgentPrompts
__all__ = ["GroundingAgentPrompts"]
================================================
FILE: anytool/prompts/grounding_agent_prompts.py
================================================
from typing import List
class GroundingAgentPrompts:
TASK_COMPLETE = ""
SYSTEM_PROMPT = f"""You are a Grounding Agent. Execute tasks using tools.
# Tool Execution
- Select appropriate tools from descriptions and schemas
- Provide correct parameters
- Call multiple tools if needed
- Tools execute immediately, results appear in next iteration
- If you need results to decide next action, wait for next iteration
# Tool Selection Tips
- **MCP tools** and **Shell tools** are typically faster and more accurate when applicable
- **GUI tools** offer finer-grained control and can handle tasks not covered by MCP/shell tools
- Choose based on the task requirements and tool availability; prefer MCP/shell when they fit well
# Visual Analysis Control
GUI tools auto-analyze screenshots to extract information.
To skip analysis when NOT needed, add parameter:
```json
{{"task_description": "...", "skip_visual_analysis": true}}
```
**Decision Rule:**
- Task goal is OPERATIONAL (open/navigate/click/show): Skip analysis
- Task goal requires KNOWLEDGE EXTRACTION (read/extract/save data): Keep analysis
**Examples:**
- "Open settings page": Operational only, skip analysis
- "Open settings and record all values": Needs knowledge, keep analysis
- "Navigate to GitHub homepage": Operational only, skip analysis
- "Search Python tutorials and save top 5 titles": Needs knowledge, keep analysis
**Key principle:** If you need to extract information FROM the screen for subsequent steps or user reporting, keep analysis (don't skip).
**Note:** Only GUI tools support this parameter. Other backend tools ignore it.
# Task Completion
After each iteration, evaluate if the task is complete:
**If task is COMPLETE:**
- Write a response summarizing what was accomplished
- Include the completion token `{TASK_COMPLETE}` on a new line at the end of your response
- Example response format:
```
I have successfully completed the task. The file has been created at /path/to/file.txt with the requested content.
{TASK_COMPLETE}
```
**If task is NOT complete:**
- Continue by calling the appropriate tools
- Do NOT output `{TASK_COMPLETE}`
- Tool results will appear in the next iteration
The token `{TASK_COMPLETE}` signals that no further iterations are needed."""
@staticmethod
def iteration_summary(
instruction: str,
iteration: int,
max_iterations: int
) -> str:
"""
Build iteration summary prompt for LLMClient auto-summary.
LLM extracts information directly from tool results in conversation history.
"""
return f"""Based on the original task and the tool execution results in the conversation above, generate a structured iteration summary.
**Original Task:**
{instruction}
**Progress:** Iteration {iteration} of {max_iterations}
**Generate Summary in This Format:**
## Iteration {iteration} Progress
Actions taken:
Knowledge obtained (COMPLETE and SPECIFIC):
- File locations:
- Visual content:
- Data retrieved:
- URLs/Links:
- System state:
Errors encountered:
CRITICAL GUIDELINES:
- This summary is for preserving knowledge for subsequent iterations
- Extract ALL concrete information from tool outputs in the conversation above
- Filenames, paths, URLs - use exact values from tool outputs
- Visual content - extract actual text/data visible, not just "saw something"
- Search results - include specific data, not vague descriptions
- The next iteration cannot see current tool outputs - this summary is the ONLY source of knowledge"""
@staticmethod
def visual_analysis(
tool_name: str,
num_screenshots: int,
task_description: str = ""
) -> str:
"""
Build prompt for visual analysis of screenshots.
Args:
tool_name: Tool name that generated the screenshots
num_screenshots: Number of screenshots
task_description: Original task description for context
"""
screenshot_text = "screenshot" if num_screenshots == 1 else f"{num_screenshots} screenshots"
these_text = "this screenshot" if num_screenshots == 1 else "these screenshots"
task_context = f"""
**Original Task**: {task_description}
Focus on extracting information RELEVANT to this task. Prioritize content that helps accomplish the goal.
""" if task_description else ""
return f"""Extract the KNOWLEDGE and INFORMATION from {these_text}. This will be passed to the next iteration so it can continue working with the information (search, analyze, save, etc.). Without this extraction, the visual content would only be viewable by humans and unusable for subsequent operations.
{task_context}
**EXTRACT all visible knowledge content** (prioritize task-relevant information):
1. **Text content**: Articles, documentation, code, messages, descriptions - extract the actual text
2. **Data points**: Numbers, statistics, measurements, values, percentages - be specific
3. **List items**: Names, titles, entries in lists/search results/files - list them out
4. **Structured data**: Information from tables, charts, forms - describe what they contain
5. **Key information**: URLs, paths, names, IDs, dates, labels - anything useful for next steps
**IGNORE interface elements**:
- Buttons, menus, toolbars, navigation bars
- UI design, layout, colors, styling
- Non-informational visual elements
**Goal**: Extract usable knowledge that enables the next agent to work with this information programmatically. Be SPECIFIC and COMPLETE, but FOCUS on what's relevant to the task.
{screenshot_text.capitalize()} from tool '{tool_name}'"""
@staticmethod
def final_summary(
instruction: str,
iterations: int
) -> str:
"""
Build prompt for generating final summary across all iterations.
"""
return f"""Based on the complete conversation history above (including all {iterations} iteration summaries and tool executions), generate a comprehensive final summary.
## Final Task Summary
Task: {instruction}
What was accomplished:
Key information obtained:
- Files:
- Data:
- Findings:
Issues encountered:
Result: <"Success" or "Incomplete">
Guidelines:
- Consolidate information from ALL iteration summaries
- Include concrete deliverables (file paths, data, etc.)
- Be comprehensive but concise
- Focus on what the user cares about"""
@staticmethod
def workspace_directory(workspace_dir: str) -> str:
"""
Build workspace directory information for cross-iteration/cross-backend data sharing.
"""
# Check if this is a benchmark scenario (LiveMCPBench /root mapping)
# In benchmark mode, paths in query are already converted by caller (e.g., map_path_to_local)
is_benchmark = "/root" in workspace_dir or "LiveMCPBench/root" in workspace_dir
if is_benchmark:
# Benchmark mode: all task files are in workspace directory
return f"""**Working Directory**: `{workspace_dir}`
- All task files (input/output) are located in this directory
- Read from and write to this directory for all file operations"""
else:
# Normal mode: workspace is for intermediate results
return f"""**Working Directory**: `{workspace_dir}`
- Persist intermediate results here; later iterations/backends can read what you saved earlier
- Note: User's personal files are NOT here - search in ~/Desktop, ~/Documents, ~/Downloads, etc."""
@staticmethod
def workspace_matching_files(matching_files: List[str]) -> str:
"""
Build alert for files matching task requirements.
"""
files_str = ', '.join([f"`{f}`" for f in matching_files])
return f"""**Workspace Alert**: Files matching task requirements found: {files_str}
- Read these files to verify if they satisfy the task
- If satisfied, mark task as completed
- If not satisfied, modify or recreate as needed"""
@staticmethod
def workspace_recent_files(total_files: int, recent_files: List[str]) -> str:
"""
Build info for recently modified files.
"""
recent_list = ', '.join([f"`{f}`" for f in recent_files[:15]])
return f"""**Workspace Info**: {total_files} files exist, {len(recent_files)} recently modified
Recent files: {recent_list}
Consider checking recent files before creating new ones"""
@staticmethod
def workspace_file_list(files: List[str]) -> str:
"""
Build list of all existing files.
"""
files_list = ', '.join([f"`{f}`" for f in files[:15]])
if len(files) > 15:
files_list += f" (and {len(files) - 15} more)"
return f"**Workspace Info**: {len(files)} existing file(s): {files_list}"
@staticmethod
def iteration_feedback(
iteration: int,
llm_summary: str,
add_guidance: bool = True
) -> str:
"""
Build feedback message to pass iteration summary to next iteration.
"""
content = f"""## Iteration {iteration} Summary
{llm_summary}"""
if add_guidance:
content += f"""
---
Now continue with iteration {iteration + 1}. You can see the full conversation history above. Based on all progress so far, decide whether to:
- Call more tools if the task is not yet complete
- Output {GroundingAgentPrompts.TASK_COMPLETE} if the task is fully accomplished"""
return content
================================================
FILE: anytool/recording/__init__.py
================================================
"""
RecordingManager
├── internal management of platform.RecordingClient
├── internal management of platform.ScreenshotClient
├── internal management of TrajectoryRecorder
└── internal management of ActionRecorder
"""
# Auto-record the tool execution
from .manager import RecordingManager
# Low-level components (advanced users)
from .recorder import TrajectoryRecorder
from .action_recorder import ActionRecorder
# Utility functions
from .utils import (
load_trajectory_from_jsonl,
load_metadata,
format_trajectory_for_export,
analyze_trajectory,
load_recording_session,
filter_trajectory,
extract_errors,
generate_summary_report,
)
from .action_recorder import (
load_agent_actions,
analyze_agent_actions,
format_agent_actions,
)
__all__ = [
# Manager
'RecordingManager',
# Recorders
'TrajectoryRecorder',
'ActionRecorder',
# Trajectory utils
'load_trajectory_from_jsonl',
'load_metadata',
'format_trajectory_for_export',
'analyze_trajectory',
'load_recording_session',
'filter_trajectory',
'extract_errors',
'generate_summary_report',
# Agent action utils
'load_agent_actions',
'analyze_agent_actions',
'format_agent_actions',
]
================================================
FILE: anytool/recording/action_recorder.py
================================================
"""
Agent Action Recorder
Records agent decision-making processes, reasoning, and outputs.
Focuses on high-level agent behaviors rather than low-level tool executions.
"""
import datetime
import json
from typing import Any, Dict, Optional
from pathlib import Path
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
class ActionRecorder:
"""
Records agent actions and decision-making processes.
This recorder captures the 'thinking' layer of the agent:
- Task planning and decomposition
- Tool selection reasoning
- Evaluation decisions
"""
def __init__(self, trajectory_dir: Path):
"""
Initialize action recorder.
Args:
trajectory_dir: Directory to save action records
"""
self.trajectory_dir = trajectory_dir
self.actions_file = trajectory_dir / "agent_actions.jsonl"
self.step_counter = 0
# Ensure directory exists
self.trajectory_dir.mkdir(parents=True, exist_ok=True)
async def record_action(
self,
agent_name: str,
action_type: str,
input_data: Optional[Dict[str, Any]] = None,
reasoning: Optional[Dict[str, Any]] = None,
output_data: Optional[Dict[str, Any]] = None,
metadata: Optional[Dict[str, Any]] = None,
related_tool_steps: Optional[list] = None,
correlation_id: Optional[str] = None,
) -> Dict[str, Any]:
"""
Record an agent action.
Args:
agent_name: Name of the agent performing the action
action_type: Type of action (plan | execute | evaluate | monitor)
input_data: Input data the agent received (simplified)
reasoning: Agent's reasoning process (structured)
output_data: Agent's output/decision (structured)
metadata: Additional metadata (LLM model, tokens, duration, etc.)
related_tool_steps: List of tool execution step numbers related to this action
correlation_id: Optional correlation ID to link related events
"""
self.step_counter += 1
timestamp = datetime.datetime.now().isoformat()
# Infer agent type from agent name
agent_type = self._infer_agent_type(agent_name)
action_info = {
"step": self.step_counter,
"timestamp": timestamp,
"agent_name": agent_name,
"agent_type": agent_type,
"action_type": action_type,
"correlation_id": correlation_id or f"action_{self.step_counter}_{timestamp}",
}
# Add input (with smart truncation)
if input_data:
action_info["input"] = self._truncate_data(input_data, max_length=1000)
# Add reasoning (keep structured)
if reasoning:
action_info["reasoning"] = self._truncate_data(reasoning, max_length=2000)
# Add output (keep structured)
if output_data:
action_info["output"] = self._truncate_data(output_data, max_length=1000)
# Add metadata
if metadata:
action_info["metadata"] = metadata
# Add related tool steps for correlation
if related_tool_steps:
action_info["related_tool_steps"] = related_tool_steps
# Append to JSONL file
await self._append_to_file(action_info)
logger.debug(
f"Recorded {action_type} action from {agent_name} (step {self.step_counter})"
)
return action_info
def _infer_agent_type(self, agent_name: str) -> str:
name_lower = agent_name.lower()
if "host" in name_lower:
return "host"
elif "grounding" in name_lower:
return "grounding"
elif "eval" in name_lower:
return "eval"
elif "coordinator" in name_lower:
return "coordinator"
else:
return "unknown"
def _truncate_data(self, data: Any, max_length: int) -> Any:
if isinstance(data, str):
if len(data) > max_length:
return data[:max_length] + "... [truncated]"
return data
elif isinstance(data, dict):
result = {}
for key, value in data.items():
if isinstance(value, str) and len(value) > max_length:
result[key] = value[:max_length] + "... [truncated]"
elif isinstance(value, (dict, list)):
# Recursively truncate nested structures
result[key] = self._truncate_data(value, max_length)
else:
result[key] = value
return result
elif isinstance(data, list):
# Truncate list items
result = []
for item in data:
if isinstance(item, str) and len(item) > max_length:
result.append(item[:max_length] + "... [truncated]")
elif isinstance(item, (dict, list)):
result.append(self._truncate_data(item, max_length))
else:
result.append(item)
return result
else:
return data
async def _append_to_file(self, action_info: Dict[str, Any]):
"""Append action to JSONL file."""
with open(self.actions_file, "a", encoding="utf-8") as f:
f.write(json.dumps(action_info, ensure_ascii=False))
f.write("\n")
def get_step_count(self) -> int:
"""Get current step count."""
return self.step_counter
def load_agent_actions(trajectory_dir: str) -> list:
"""
Load agent actions from a trajectory directory.
"""
actions_file = Path(trajectory_dir) / "agent_actions.jsonl"
if not actions_file.exists():
logger.warning(f"Agent actions file not found: {actions_file}")
return []
actions = []
try:
with open(actions_file, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
actions.append(json.loads(line))
logger.info(f"Loaded {len(actions)} agent actions from {actions_file}")
return actions
except Exception as e:
logger.error(f"Failed to load agent actions from {actions_file}: {e}")
return []
def analyze_agent_actions(actions: list) -> Dict[str, Any]:
"""
Analyze agent actions and generate statistics.
"""
if not actions:
return {
"total_actions": 0,
"by_agent": {},
"by_type": {},
}
# Count by agent
by_agent = {}
by_type = {}
for action in actions:
agent_name = action.get("agent_name", "unknown")
action_type = action.get("action_type", "unknown")
by_agent[agent_name] = by_agent.get(agent_name, 0) + 1
by_type[action_type] = by_type.get(action_type, 0) + 1
return {
"total_actions": len(actions),
"by_agent": by_agent,
"by_type": by_type,
}
def format_agent_actions(actions: list, format_type: str = "compact") -> str:
"""
Format agent actions for display.
"""
if not actions:
return "No agent actions recorded"
if format_type == "compact":
lines = []
for action in actions:
step = action.get("step", "?")
agent = action.get("agent_name", "?")
action_type = action.get("action_type", "?")
# Try to extract key info from reasoning or output
key_info = ""
if action.get("reasoning"):
thought = action["reasoning"].get("thought", "")
if thought:
key_info = f": {thought[:60]}..."
lines.append(f"Step {step}: [{agent}] {action_type}{key_info}")
return "\n".join(lines)
elif format_type == "detailed":
lines = []
for action in actions:
lines.append(f"\n{'='*60}")
lines.append(f"Step {action.get('step', '?')}: {action.get('agent_name', '?')}")
lines.append(f"Type: {action.get('action_type', '?')}")
lines.append(f"Time: {action.get('timestamp', '?')}")
if action.get("reasoning"):
lines.append("\nReasoning:")
lines.append(json.dumps(action["reasoning"], indent=2, ensure_ascii=False))
if action.get("output"):
lines.append("\nOutput:")
lines.append(json.dumps(action["output"], indent=2, ensure_ascii=False))
if action.get("metadata"):
lines.append("\nMetadata:")
lines.append(json.dumps(action["metadata"], indent=2, ensure_ascii=False))
return "\n".join(lines)
else:
raise ValueError(f"Unknown format type: {format_type}")
================================================
FILE: anytool/recording/manager.py
================================================
import datetime
import json
import ast
import types
from typing import Any, Dict, List, Optional
from pathlib import Path
from anytool.utils.logging import Logger
from .recorder import TrajectoryRecorder
from .action_recorder import ActionRecorder
logger = Logger.get_logger(__name__)
class RecordingManager:
# Global instance management (singleton pattern)
_global_instance: Optional['RecordingManager'] = None
def __init__(
self,
enabled: bool = True,
task_id: str = "",
log_dir: str = "./logs/recordings",
backends: Optional[List[str]] = None,
enable_screenshot: bool = True,
enable_video: bool = False,
enable_conversation_log: bool = True,
auto_save_interval: int = 10,
server_url: Optional[str] = None,
agent_name: str = "GroundingAgent",
):
"""
Initialize automatic recording manager
Args:
enabled: whether to enable recording
task_id: task ID (for naming recording directory)
log_dir: log directory path
backends: list of backends to record (None = all)
(optional: "mcp", "gui", "shell", "system", "web")
enable_screenshot: whether to enable screenshot (through platform.ScreenshotClient)
enable_video: whether to enable video recording (through platform.RecordingClient)
enable_conversation_log: whether to save LLM conversations to conversations.jsonl (default: True)
auto_save_interval: automatic save interval (steps)
server_url: local server address (None = read from config/environment variables)
agent_name: name of the agent performing the recording (default: "GroundingAgent")
"""
self.enabled = enabled
self.task_id = task_id
self.log_dir = log_dir
self.backends = set(backends) if backends else {"mcp", "gui", "shell", "system", "web"}
self.enable_screenshot = enable_screenshot
self.enable_video = enable_video
self.enable_conversation_log = enable_conversation_log
self.auto_save_interval = auto_save_interval
self.server_url = server_url
self.agent_name = agent_name
# internal state
self._recorder: Optional[TrajectoryRecorder] = None
self._action_recorder: Optional[ActionRecorder] = None
self._is_started = False
self._step_counter = 0
# registered LLM clients (for automatic recording)
self._registered_llm_clients = []
# Store original methods for restoration
self._original_methods = {}
# video/screenshot clients (internal management)
self._recording_client = None
self._screenshot_client = None
# Register as global instance
RecordingManager._global_instance = self
@classmethod
def is_recording(cls) -> bool:
"""
Check if there is an active recording session
Returns:
bool: True if recording is active
"""
return cls._global_instance is not None and cls._global_instance._is_started
@classmethod
async def record_retrieved_tools(
cls,
task_instruction: str,
tools: List[Any],
search_debug_info: Optional[Dict[str, Any]] = None,
):
"""
Record the tools retrieved for a task
Args:
task_instruction: The task instruction used for retrieval
tools: List of retrieved tools
search_debug_info: Debug info from search (similarity scores, LLM selections)
"""
instance = cls._global_instance
if not instance or not instance._is_started or not instance._recorder:
return
# Extract tool info
tool_info = []
for tool in tools:
info = {
"name": getattr(tool, "name", str(tool)),
}
if hasattr(tool, "backend_type"):
info["backend"] = tool.backend_type.value if hasattr(tool.backend_type, "value") else str(tool.backend_type)
if hasattr(tool, "_runtime_info") and tool._runtime_info:
info["server_name"] = tool._runtime_info.server_name
tool_info.append(info)
# Build metadata
metadata = {
"instruction": task_instruction[:500], # Truncate long instructions
"count": len(tools),
"tools": tool_info,
}
# Add search debug info if available
if search_debug_info:
metadata["search_debug"] = {
"search_mode": search_debug_info.get("search_mode", ""),
"total_candidates": search_debug_info.get("total_candidates", 0),
"mcp_count": search_debug_info.get("mcp_count", 0),
"non_mcp_count": search_debug_info.get("non_mcp_count", 0),
"llm_filter": search_debug_info.get("llm_filter", {}),
"tool_scores": search_debug_info.get("tool_scores", []),
}
# Save to metadata
await instance._recorder.add_metadata("retrieved_tools", metadata)
logger.info(f"Recorded {len(tools)} retrieved tools (with search debug info: {search_debug_info is not None})")
@classmethod
async def record_iteration_context(
cls,
iteration: int,
messages_input: List[Dict[str, Any]],
messages_output: List[Dict[str, Any]],
llm_response_summary: Dict[str, Any],
max_content_length: int = 5000,
):
"""
Record a single iteration's LLM conversation to conversations.jsonl (real-time).
Args:
iteration: Iteration number
messages_input: Messages sent to LLM
messages_output: Messages after LLM response
llm_response_summary: Summary of LLM response
max_content_length: Max length for message content truncation
"""
instance = cls._global_instance
if not instance or not instance._is_started or not instance._recorder:
return
# Check if conversation recording is enabled
if not getattr(instance, 'enable_conversation_log', True):
return
def truncate_message_content(messages: List[Dict]) -> List[Dict]:
"""Truncate message content to avoid huge log files."""
result = []
for msg in messages:
new_msg = {"role": msg.get("role", "unknown")}
content = msg.get("content", "")
if isinstance(content, str):
if len(content) > max_content_length:
new_msg["content"] = content[:max_content_length] + f"... [truncated, total {len(content)} chars]"
else:
new_msg["content"] = content
elif isinstance(content, list):
# Handle multi-part content (e.g., with images)
new_content = []
for item in content:
if isinstance(item, dict):
if item.get("type") == "image":
new_content.append({"type": "image", "note": "[image data omitted]"})
elif item.get("type") == "text":
text = item.get("text", "")
if len(text) > max_content_length:
new_content.append({
"type": "text",
"text": text[:max_content_length] + f"... [truncated, total {len(text)} chars]"
})
else:
new_content.append(item)
else:
new_content.append(item)
else:
new_content.append(item)
new_msg["content"] = new_content
else:
new_msg["content"] = str(content)[:max_content_length]
if "tool_calls" in msg:
new_msg["tool_calls"] = msg["tool_calls"]
result.append(new_msg)
return result
# Build record
import datetime
record = {
"iteration": iteration,
"timestamp": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
"llm_response_summary": llm_response_summary,
"messages_input": truncate_message_content(messages_input),
"messages_output": truncate_message_content(messages_output),
}
# Append to conversations.jsonl (real-time)
conv_file = instance._recorder.trajectory_dir / "conversations.jsonl"
try:
with open(conv_file, "a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False))
f.write("\n")
except Exception as e:
logger.debug(f"Failed to write conversation log: {e}")
@classmethod
async def record_tool_execution(
cls,
tool_name: str,
backend: str,
parameters: Dict[str, Any],
result: Any,
server_name: Optional[str] = None,
is_success: bool = True,
metadata: Optional[Dict[str, Any]] = None,
):
"""
Record tool execution (internal method, called by BaseTool automatically)
Args:
tool_name: Name of the tool
backend: Backend type (gui, shell, mcp, etc.)
parameters: Tool parameters
result: Tool execution result (content or error message)
server_name: Server name for MCP backend
is_success: Whether the tool execution was successful (default: True for backward compatibility)
metadata: Tool result metadata (e.g. intermediate_steps for GUI)
"""
if not cls._global_instance or not cls._global_instance._is_started:
return
instance = cls._global_instance
# Check if should record this backend
if backend not in instance.backends:
return
# Create mock tool_call and result objects for compatibility with existing _record_* methods
class MockFunctionCall:
def __init__(self, name, arguments):
self.name = name
self.arguments = arguments
class MockToolCall:
def __init__(self, name, arguments):
self.function = MockFunctionCall(name, arguments)
class MockResult:
def __init__(self, content, is_success=True, metadata=None):
self.content = content
self.is_success = is_success
self.is_error = not is_success
self.error = content if not is_success else None
self.metadata = metadata or {}
tool_call = MockToolCall(tool_name, parameters)
mock_result = MockResult(result, is_success=is_success, metadata=metadata)
try:
if backend == "mcp":
server = server_name or "unknown"
await instance._record_mcp(tool_call, mock_result, server)
elif backend == "gui":
await instance._record_gui(tool_call, mock_result)
elif backend == "shell":
await instance._record_shell(tool_call, mock_result)
elif backend == "system":
await instance._record_system(tool_call, mock_result)
elif backend == "web":
await instance._record_web(tool_call, mock_result)
instance._step_counter += 1
except Exception as e:
logger.debug(f"Failed to record tool execution: {e}")
@staticmethod
def _parse_arguments(arg_data):
"""Safely parse tool_call.function.arguments which may be JSON string.
Handles:
1. Proper JSON strings with true/false/null
2. Python literal strings (produced by OpenAI) using ast.literal_eval
3. Already-dict objects (returned by SDK)
"""
if not isinstance(arg_data, str):
return arg_data or {}
# First, try JSON
try:
return json.loads(arg_data)
except json.JSONDecodeError:
pass
# Fallback to Python literal
try:
return ast.literal_eval(arg_data)
except Exception:
logger.debug("Failed to parse arguments, returning raw string")
return {"raw": arg_data}
async def start(self, task_id: Optional[str] = None):
"""Start automatic recording
Args:
task_id: If provided, override the current task_id for this recording session. This allows
external callers (e.g. Coordinator) to specify a meaningful task identifier without
having to recreate the RecordingManager instance.
"""
# Allow dynamic update of task_id before recording actually starts
if task_id:
self.task_id = task_id
if not self.enabled or self._is_started:
return
try:
# check server availability (only when video or screenshot is enabled)
if self.enable_video or self.enable_screenshot:
await self._check_server_availability()
self._recorder = TrajectoryRecorder(
task_name=self.task_id,
log_dir=self.log_dir,
enable_screenshot=self.enable_screenshot,
enable_video=self.enable_video,
server_url=self.server_url,
)
# create action recorder for agent decision tracking
self._action_recorder = ActionRecorder(
trajectory_dir=Path(self._recorder.get_trajectory_dir())
)
# create video client (internal management)
if self.enable_video:
from anytool.platform import RecordingClient
self._recording_client = RecordingClient(base_url=self.server_url)
success = await self._recording_client.start_recording()
if success:
logger.info("Video recording started")
else:
logger.warning("Video recording failed to start")
# create screenshot client (internal management)
if self.enable_screenshot:
from anytool.platform import ScreenshotClient
self._screenshot_client = ScreenshotClient(base_url=self.server_url)
logger.debug("Screenshot client ready")
# save initial metadata
await self._recorder.add_metadata("task_id", self.task_id)
await self._recorder.add_metadata("backends", list(self.backends))
await self._recorder.add_metadata("start_time", datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S"))
# Capture and save initial screenshot if enabled
if self.enable_screenshot and self._screenshot_client:
try:
init_shot = await self._screenshot_client.capture()
if init_shot:
await self._recorder.save_init_screenshot(init_shot)
logger.debug("Initial screenshot saved")
except Exception as e:
logger.debug(f"Failed to capture initial screenshot: {e}")
self._is_started = True
logger.info(f"Recording started: {self._recorder.get_trajectory_dir()}")
except Exception as e:
logger.error(f"Recording failed to start: {e}")
raise
async def _check_server_availability(self):
"""Check if local server is available"""
try:
from anytool.platform import SystemInfoClient
# Use context manager to ensure aiohttp session is closed, avoiding warning of unclosed session
async with SystemInfoClient(base_url=self.server_url) as client:
info = await client.get_system_info()
if info:
logger.info(f"Server connected ({info.get('platform', 'unknown')})")
else:
logger.warning("Server not responding, video/screenshot functionality unavailable")
except Exception:
logger.warning("Cannot connect to server, video/screenshot functionality unavailable")
async def stop(self):
"""Stop automatic recording"""
if not self.enabled or not self._is_started:
return
try:
# stop video recording and save
if self._recording_client:
try:
video_path = None
if self._recorder:
video_path = str(Path(self._recorder.get_trajectory_dir()) / "screen_recording.mp4")
video_bytes = await self._recording_client.end_recording(dest=video_path)
if video_bytes and video_path:
video_size_mb = len(video_bytes) / (1024 * 1024)
logger.info(f"Video recording saved: {video_path} ({video_size_mb:.2f} MB)")
except Exception as e:
logger.warning(f"Video recording failed to save: {e}")
# close RecordingClient session, avoid unclosed session warning
try:
if self._recording_client:
await self._recording_client.close()
except Exception as e:
logger.debug(f"Failed to close RecordingClient session: {e}")
# close screenshot client
if self._screenshot_client:
try:
await self._screenshot_client.close()
except Exception as e:
logger.debug(f"Screenshot client failed to close: {e}")
finally:
self._screenshot_client = None
# finalize trajectory recording
if self._recorder:
# save final metadata
await self._recorder.add_metadata("end_time", datetime.datetime.now().isoformat())
await self._recorder.add_metadata("total_steps", self._step_counter)
# generate summary
await self.generate_summary()
# finalize recording
await self._recorder.finalize()
logger.info(f"Recording completed: {self._recorder.get_trajectory_dir()}")
# Restore original methods for registered LLM clients
for client in self._registered_llm_clients:
client_id = id(client)
if client_id in self._original_methods:
try:
# Restore original complete method
original_method = self._original_methods[client_id]
client.complete = original_method
except Exception as e:
logger.debug(f"Failed to restore original method for LLM client: {e}")
# Clear registered clients and original methods
self._registered_llm_clients.clear()
self._original_methods.clear()
self._is_started = False
self._recorder = None
self._action_recorder = None
except Exception as e:
logger.error(f"Recording failed to stop: {e}")
def register_to_llm(self, llm_client):
if not self.enabled:
return
# Check if already registered to avoid double-wrapping
if id(llm_client) in self._original_methods:
logger.warning(f"LLM client {llm_client} is already registered, skipping")
return
# Save original complete method for restoration
original_complete = llm_client.complete
self._original_methods[id(llm_client)] = original_complete
# Wrap complete method
async def wrapped_complete(self_client, *args, **kwargs):
# Call original method
response = await original_complete(*args, **kwargs)
# Automatically record tool calls
if response.get("tool_results"):
await self._auto_record_tool_results(response["tool_results"])
return response
# Replace method with properly bound method
llm_client.complete = types.MethodType(wrapped_complete, llm_client)
self._registered_llm_clients.append(llm_client)
async def _auto_record_tool_results(self, tool_results: List[Dict]):
"""
Internal method: automatically record tool execution results from LLM client
This is called by register_to_llm() wrapper. Tool results should contain
backend and server_name information.
"""
if not self._recorder or not self._is_started:
return
for tool_result in tool_results:
# Get necessary information from tool_result
tool_call = tool_result.get("tool_call")
result = tool_result.get("result")
backend = tool_result.get("backend")
server_name = tool_result.get("server_name")
if not tool_call or not result:
logger.warning("Tool result missing 'tool_call' or 'result', skipping")
continue
if not backend:
logger.warning(
f"Tool result missing 'backend' field, skipping recording. "
f"Tool: {tool_call.function.name}. "
f"Ensure your LLM client provides backend information in tool_results."
)
continue
# Extract metadata for embedding intermediate_steps (GUI)
result_metadata = result.metadata if hasattr(result, 'metadata') else None
await RecordingManager.record_tool_execution(
tool_name=tool_call.function.name,
backend=backend,
parameters=self._parse_arguments(tool_call.function.arguments),
result=result.content if hasattr(result, 'content') else str(result),
server_name=server_name,
is_success=result.is_success if hasattr(result, 'is_success') else True,
metadata=result_metadata,
)
async def _record_mcp(self, tool_call, result, server: str):
tool_name = tool_call.function.name
parameters = self._parse_arguments(tool_call.function.arguments)
command = f"{server}.{tool_name}"
result_str = str(result.content) if result.is_success else str(result.error)
result_brief = result_str[:200] + "..." if len(result_str) > 200 else result_str
is_actual_success = result.is_success and not result_str.startswith("ERROR:")
step_info = await self._recorder.record_step(
backend="mcp",
tool=tool_name,
command=command,
result={
"status": "success" if is_actual_success else "error",
"output": result_brief,
},
parameters=parameters,
extra={
"server": server,
},
auto_screenshot=self.enable_screenshot
)
# Add agent_name to step_info
step_info["agent_name"] = self.agent_name
async def _record_gui(self, tool_call, result):
tool_name = tool_call.function.name
parameters = self._parse_arguments(tool_call.function.arguments)
# Extract actual pyautogui command (from action_history)
command = "gui_agent"
if result.is_success and hasattr(result, 'metadata') and result.metadata:
action_history = result.metadata.get("action_history", [])
if action_history:
# Get last successful execution action
for action in reversed(action_history):
planned_action = action.get("planned_action", {})
execution_result = action.get("execution_result", {})
if planned_action.get("action_type") == "PYAUTOGUI_COMMAND":
cmd = planned_action.get("command", "")
if cmd and execution_result.get("status") == "success":
command = cmd
break
elif execution_result.get("status") == "success":
action_type = planned_action.get("action_type", "")
if action_type and action_type not in ["WAIT", "DONE", "FAIL"]:
params = planned_action.get("parameters", {})
if params:
param_str = ", ".join([f"{k}={v}" for k, v in list(params.items())[:2]])
command = f"{action_type}({param_str})"
else:
command = action_type
break
result_str = str(result.content) if result.is_success else str(result.error)
is_actual_success = result.is_success
if result.is_success:
first_200_chars = result_str[:200] if result_str else ""
critical_failure_patterns = ["Task failed", "CRITICAL ERROR:", "FATAL:"]
has_critical_failure = any(pattern in first_200_chars for pattern in critical_failure_patterns)
is_actual_success = not has_critical_failure
# Extract intermediate_steps from metadata for embedding in traj.jsonl
extra = {}
if hasattr(result, 'metadata') and result.metadata:
intermediate_steps = result.metadata.get("intermediate_steps")
if intermediate_steps:
extra["intermediate_steps"] = intermediate_steps
step_info = await self._recorder.record_step(
backend="gui",
tool="gui_agent",
command=command,
result={
"status": "success" if is_actual_success else "error",
"output": result_str,
},
parameters=parameters,
auto_screenshot=self.enable_screenshot,
extra=extra if extra else None,
)
step_info["agent_name"] = self.agent_name
async def _record_shell(self, tool_call, result):
tool_name = tool_call.function.name
parameters = self._parse_arguments(tool_call.function.arguments)
task = parameters.get("task", tool_name)
exit_code = 0 if result.is_success else 1
stdout = str(result.content) if result.is_success else ""
stderr = str(result.error) if result.is_error else ""
command = task
if hasattr(result, 'metadata') and result.metadata:
code_history = result.metadata.get("code_history", [])
if code_history:
# Try to find the last successful execution
found_success = False
for code_info in reversed(code_history):
if code_info.get("status") == "success":
lang = code_info.get("lang", "bash")
code = code_info.get("code", "")
# String format code block: ```lang\ncode\n```
command = f"```{lang}\n{code}\n```"
found_success = True
break
# If no successful execution found, use last code block
if not found_success and code_history:
last_code = code_history[-1]
lang = last_code.get("lang", "bash")
code = last_code.get("code", "")
command = f"```{lang}\n{code}\n```"
stdout_brief = stdout[:200] + "..." if len(stdout) > 200 else stdout
stderr_brief = stderr[:200] + "..." if len(stderr) > 200 else stderr
is_actual_success = result.is_success
if result.is_success:
first_200_chars = stdout[:200] if stdout else ""
critical_failure_patterns = ["Task failed after", "[TASK_FAILED:"]
has_critical_failure = any(pattern in first_200_chars for pattern in critical_failure_patterns)
is_actual_success = not has_critical_failure
step_info = await self._recorder.record_step(
backend="shell",
tool="shell_agent",
command=command,
result={
"status": "success" if is_actual_success else "error",
"exit_code": exit_code,
"stdout": stdout_brief,
"stderr": stderr_brief,
},
auto_screenshot=self.enable_screenshot
)
step_info["agent_name"] = self.agent_name
async def _record_system(self, tool_call, result):
tool_name = tool_call.function.name
parameters = self._parse_arguments(tool_call.function.arguments)
command = tool_name
if parameters:
key_params = []
for key in ['path', 'file', 'directory', 'name', 'provider', 'backend']:
if key in parameters and parameters[key]:
key_params.append(f"{parameters[key]}")
if key_params:
command = f"{tool_name}({', '.join(key_params[:2])})"
result_str = str(result.content) if result.is_success else str(result.error)
result_brief = result_str[:200] + "..." if len(result_str) > 200 else result_str
is_actual_success = result.is_success
if result.is_success and result_str:
is_actual_success = not result_str.startswith("ERROR:")
step_info = await self._recorder.record_step(
backend="system",
tool=tool_name,
command=command,
result={
"status": "success" if is_actual_success else "error",
"output": result_brief,
},
auto_screenshot=self.enable_screenshot
)
step_info["agent_name"] = self.agent_name
async def _record_web(self, tool_call, result):
tool_name = tool_call.function.name
parameters = self._parse_arguments(tool_call.function.arguments)
query = parameters.get("query", "")
command = query if query else "deep_research"
result_str = str(result.content) if result.is_success else str(result.error)
is_actual_success = result.is_success
if result.is_success and result_str:
is_actual_success = not result_str.startswith("ERROR:")
step_info = await self._recorder.record_step(
backend="web",
tool="deep_research_agent",
command=command,
result={
"status": "success" if is_actual_success else "error",
"output": result_str, # Full output preserved for training/replay
},
auto_screenshot=self.enable_screenshot
)
# Add agent_name to step_info
step_info["agent_name"] = self.agent_name
async def add_metadata(self, key: str, value: Any):
if self._recorder:
await self._recorder.add_metadata(key, value)
async def save_plan(self, plan: Dict[str, Any], agent_name: str = "GroundingAgent"):
"""
Save agent plan to recording directory.
This integrates planning information with execution trajectory.
Args:
plan: The plan data (usually containing task_updates or plan steps)
agent_name: Name of the agent creating the plan
"""
if not self._recorder or not self._is_started:
logger.warning("Cannot save plan: recording not started")
return
try:
plan_dir = Path(self._recorder.get_trajectory_dir()) / "plans"
plan_dir.mkdir(exist_ok=True)
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
plan_data = {
"version": timestamp,
"created_at": datetime.datetime.now().isoformat(),
"created_by": agent_name,
"plan": plan
}
# Save versioned plan
plan_file = plan_dir / f"plan_{timestamp}.json"
with open(plan_file, 'w', encoding='utf-8') as f:
json.dump(plan_data, f, indent=2, ensure_ascii=False)
# Save current plan (latest)
current_plan_file = plan_dir / "current_plan.json"
with open(current_plan_file, 'w', encoding='utf-8') as f:
json.dump(plan_data, f, indent=2, ensure_ascii=False)
logger.debug(f"Saved plan to recording: {plan_file.name}")
except Exception as e:
logger.error(f"Failed to save plan: {e}")
async def log_decision(
self,
agent_name: str,
decision: str,
context: Optional[Dict[str, Any]] = None
):
"""
Log agent decision with optional context.
This provides insight into agent reasoning process.
Args:
agent_name: Name of the agent making the decision
decision: Description of the decision
context: Additional context information
"""
if not self._recorder or not self._is_started:
logger.warning("Cannot log decision: recording not started")
return
try:
traj_dir = Path(self._recorder.get_trajectory_dir())
log_file = traj_dir / "decisions.log"
timestamp = datetime.datetime.now().isoformat()
log_entry = f"[{timestamp}] {agent_name}: {decision}"
if context:
log_entry += f"\n Context: {json.dumps(context, ensure_ascii=False)}"
log_entry += "\n"
with open(log_file, 'a', encoding='utf-8') as f:
f.write(log_entry)
logger.debug(f"Logged decision from {agent_name}")
except Exception as e:
logger.error(f"Failed to log decision: {e}")
async def record_agent_action(
self,
agent_name: str,
action_type: str,
input_data: Optional[Dict[str, Any]] = None,
reasoning: Optional[Dict[str, Any]] = None,
output_data: Optional[Dict[str, Any]] = None,
metadata: Optional[Dict[str, Any]] = None,
related_tool_steps: Optional[list] = None,
correlation_id: Optional[str] = None,
) -> Optional[Dict[str, Any]]:
"""
Record an agent's action and decision-making process.
Args:
agent_name: Name of the agent performing the action
action_type: Type of action (plan | execute | evaluate | monitor)
input_data: Input data the agent received (simplified)
reasoning: Agent's reasoning process (structured)
output_data: Agent's output/decision (structured)
metadata: Additional metadata (LLM model, tokens, duration, etc.)
related_tool_steps: List of tool execution step numbers related to this action
correlation_id: Optional correlation ID to link related events
Returns:
The recorded action info, or None if recording not started
"""
if not self._action_recorder or not self._is_started:
logger.debug("Cannot record agent action: recording not started")
return None
try:
action_info = await self._action_recorder.record_action(
agent_name=agent_name,
action_type=action_type,
input_data=input_data,
reasoning=reasoning,
output_data=output_data,
metadata=metadata,
related_tool_steps=related_tool_steps,
correlation_id=correlation_id,
)
logger.debug(f"Recorded agent action: {agent_name} - {action_type}")
return action_info
except Exception as e:
logger.error(f"Failed to record agent action: {e}")
return None
async def generate_summary(self) -> Dict[str, Any]:
"""
Generate a comprehensive summary of the recording session.
"""
if not self._recorder or not self._is_started:
logger.warning("Cannot generate summary: recording not started")
return {}
try:
from .action_recorder import load_agent_actions, analyze_agent_actions
from .utils import load_trajectory_from_jsonl, analyze_trajectory
traj_dir = self._recorder.get_trajectory_dir()
# Load all recorded data
trajectory = load_trajectory_from_jsonl(f"{traj_dir}/traj.jsonl")
agent_actions = load_agent_actions(traj_dir)
# Analyze data
traj_stats = analyze_trajectory(trajectory)
action_stats = analyze_agent_actions(agent_actions)
# Build summary
summary = {
"task_id": self.task_id,
"start_time": self._recorder.metadata.get("start_time", ""),
"end_time": self._recorder.metadata.get("end_time", ""),
"trajectory": {
"total_steps": traj_stats.get("total_steps", 0),
"success_count": traj_stats.get("success_count", 0),
"success_rate": traj_stats.get("success_rate", 0),
"by_backend": traj_stats.get("backends", {}),
"by_tool": traj_stats.get("tools", {}),
},
"agent_actions": {
"total_actions": action_stats.get("total_actions", 0),
"by_agent": action_stats.get("by_agent", {}),
"by_type": action_stats.get("by_type", {}),
}
}
# Save summary to file
summary_file = Path(traj_dir) / "summary.json"
with open(summary_file, 'w', encoding='utf-8') as f:
json.dump(summary, f, indent=2, ensure_ascii=False)
logger.info(f"Generated summary: {summary_file}")
return summary
except Exception as e:
logger.error(f"Failed to generate summary: {e}")
return {}
async def __aenter__(self):
await self.start()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
await self.stop()
return False
@property
def recording_status(self) -> bool:
return self._is_started
@property
def trajectory_dir(self) -> Optional[str]:
if self._recorder:
return str(self._recorder.get_trajectory_dir())
return None
@property
def recording_client(self):
return self._recording_client
@property
def screenshot_client(self):
return self._screenshot_client
@property
def step_count(self) -> int:
"""Get current step count"""
return self._step_counter
__all__ = [
'RecordingManager',
]
================================================
FILE: anytool/recording/recorder.py
================================================
import datetime
import json
from typing import Any, Dict, List, Optional
from pathlib import Path
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
class TrajectoryRecorder:
def __init__(
self,
task_name: str = "",
log_dir: str = "./logs/trajectories",
enable_screenshot: bool = True,
enable_video: bool = False,
server_url: Optional[str] = None,
):
"""
Initialize trajectory recorder
Args:
task_name: task name (optional, will be saved in metadata)
log_dir: log directory
enable_screenshot: whether to save screenshots (through platform.ScreenshotClient)
enable_video: whether to enable video recording (through platform.RecordingClient)
server_url: local_server address (None = read from config/environment variables)
"""
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# Simplify naming rule: add prefix if task_name is provided, otherwise use timestamp only
if task_name:
folder_name = f"{task_name}_{timestamp}"
else:
folder_name = timestamp
self.trajectory_dir = Path(log_dir) / folder_name
self.trajectory_dir.mkdir(parents=True, exist_ok=True)
# Create screenshots directory
if enable_screenshot:
self.screenshots_dir = self.trajectory_dir / "screenshots"
self.screenshots_dir.mkdir(exist_ok=True)
else:
self.screenshots_dir = None
# Config
self.task_name = task_name
self.enable_screenshot = enable_screenshot
self.enable_video = enable_video
self.server_url = server_url
# Trajectory data
self.steps: List[Dict] = []
self.step_counter = 0
# Metadata
self.metadata = {
"task_name": task_name,
"start_time": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
"enable_screenshot": enable_screenshot,
"enable_video": enable_video,
}
# Video recorder (lazy initialization)
self._video_recorder = None
# Save initial metadata
self._save_metadata()
async def record_step(
self,
backend: str,
tool: str,
command: str,
result: Optional[Dict[str, Any]] = None,
parameters: Optional[Dict[str, Any]] = None,
screenshot: Optional[bytes] = None,
extra: Optional[Dict[str, Any]] = None,
auto_screenshot: bool = False,
) -> Dict[str, Any]:
"""
Record one step operation
Args:
backend: backend type (gui/shell/mcp/web/system)
tool: tool name (name of BaseTool)
command: human-readable core command
result: execution result
parameters: tool parameters
screenshot: screenshot bytes (if provided)
extra: extra information (e.g. server field for MCP)
auto_screenshot: whether to automatically capture screenshot (through platform.ScreenshotClient)
"""
self.step_counter += 1
step_num = self.step_counter
timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
step_info = {
"step": step_num,
"timestamp": timestamp,
"backend": backend,
}
# MCP needs to record server (between backend and tool)
if extra and "server" in extra:
step_info["server"] = extra.pop("server")
# General fields
step_info["tool"] = tool # BaseTool name
step_info["command"] = command # human-readable core command
# parameters unified write to top level
if parameters:
step_info["parameters"] = parameters
elif extra and "parameters" in extra:
step_info["parameters"] = extra.pop("parameters")
# Execution result remains original
step_info["result"] = result or {}
# Other extra information (e.g. coordinates/url) only added when needed
if extra:
step_info.update(extra)
# Automatic screenshot (if enabled and no screenshot provided)
if auto_screenshot and screenshot is None and self.enable_screenshot:
screenshot = await self._capture_screenshot()
# Save screenshot
if screenshot and self.enable_screenshot and self.screenshots_dir:
screenshot_filename = f"step_{step_num:03d}.png"
screenshot_path = self.screenshots_dir / screenshot_filename
with open(screenshot_path, "wb") as f:
f.write(screenshot)
step_info["screenshot"] = f"screenshots/{screenshot_filename}"
# Add to trajectory
self.steps.append(step_info)
# Save to traj.jsonl in real time
await self._append_to_traj_file(step_info)
return step_info
async def _capture_screenshot(self) -> Optional[bytes]:
"""Capture screenshot automatically through platform.ScreenshotClient"""
try:
from anytool.platform import ScreenshotClient
# Lazy initialization screenshot client
if not hasattr(self, '_screenshot_client'):
try:
self._screenshot_client = ScreenshotClient(base_url=self.server_url)
except Exception:
self._screenshot_client = None
return None
if self._screenshot_client is None:
return None
return await self._screenshot_client.capture()
except Exception:
return None
async def save_init_screenshot(self, screenshot: bytes, filename: str = "init.png"):
"""Save initial screenshot to screenshots dir and update metadata."""
if not (self.enable_screenshot and self.screenshots_dir and screenshot):
return
try:
filepath = self.screenshots_dir / filename
with open(filepath, "wb") as f:
f.write(screenshot)
# Update metadata
self.metadata["init_screenshot"] = f"screenshots/{filename}"
self._save_metadata()
except Exception as e:
logger.debug(f"Failed to save initial screenshot: {e}")
async def _append_to_traj_file(self, step_info: Dict[str, Any]):
"""Add step to traj.jsonl file"""
traj_file = self.trajectory_dir / "traj.jsonl"
with open(traj_file, "a", encoding="utf-8") as f:
f.write(json.dumps(step_info, ensure_ascii=False))
f.write("\n")
def _save_metadata(self):
"""Save metadata to metadata.json"""
metadata_file = self.trajectory_dir / "metadata.json"
with open(metadata_file, "w", encoding="utf-8") as f:
json.dump(self.metadata, f, indent=2, ensure_ascii=False)
async def start_video_recording(self):
"""Start video recording (through platform.RecordingClient)"""
if not self.enable_video:
return
try:
from anytool.recording.video import VideoRecorder
video_path = self.trajectory_dir / "recording.mp4"
self._video_recorder = VideoRecorder(str(video_path), base_url=self.server_url)
success = await self._video_recorder.start()
if not success:
self._video_recorder = None
except Exception as e:
logger.warning(f"Video recording failed to start: {e}")
self._video_recorder = None
async def stop_video_recording(self):
"""Stop video recording"""
if self._video_recorder:
try:
await self._video_recorder.stop()
except Exception:
pass
finally:
self._video_recorder = None
async def add_metadata(self, key: str, value: Any):
"""Add metadata"""
self.metadata[key] = value
self._save_metadata()
async def finalize(self):
"""Finalize recording, save final information"""
self.metadata["end_time"] = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
self.metadata["total_steps"] = self.step_counter
# Backend statistics
backend_counts = {}
for step in self.steps:
backend = step.get("backend", "unknown")
backend_counts[backend] = backend_counts.get(backend, 0) + 1
self.metadata["backend_counts"] = backend_counts
self._save_metadata()
# Close internal ScreenshotClient, avoid unclosed session warning
await self._cleanup_screenshot_client()
# Stop video recording
await self.stop_video_recording()
logger.info(f"Recording completed: {self.trajectory_dir} (steps: {self.step_counter})")
async def _cleanup_screenshot_client(self):
"""Cleanup screenshot client resources"""
if hasattr(self, '_screenshot_client') and self._screenshot_client:
try:
await self._screenshot_client.close()
except Exception as e:
logger.debug(f"Failed to close screenshot client: {e}")
finally:
self._screenshot_client = None
def __del__(self):
"""Ensure resources are cleaned up even if finalize() is not called"""
# Note: This is a safety net. Best practice is to call finalize() explicitly.
if hasattr(self, '_video_recorder') and self._video_recorder:
logger.warning(
f"TrajectoryRecorder for {self.trajectory_dir} was not finalized properly. "
"Consider calling finalize() or using async context manager."
)
def get_trajectory_dir(self) -> str:
"""Get trajectory directory path"""
return str(self.trajectory_dir)
async def __aenter__(self):
"""Async context manager entry"""
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Async context manager exit - ensures finalize() is called"""
await self.finalize()
return False
async def record_gui_step(
recorder: TrajectoryRecorder,
command: str,
task_description: str,
result: Dict[str, Any] = None,
screenshot: Optional[bytes] = None,
max_steps: int = 10,
tool: str = "gui_agent",
) -> Dict[str, Any]:
"""
Record GUI step
Args:
recorder: recorder instance
command: actual executed pyautogui command (e.g. "pyautogui.moveTo(960, 540)")
task_description: task description
result: execution result
screenshot: screenshot
max_steps: maximum number of steps
tool: tool name
"""
parameters = {
"task_description": task_description,
"max_steps": max_steps,
}
return await recorder.record_step(
backend="gui",
tool=tool,
command=command,
result=result,
parameters=parameters,
screenshot=screenshot,
)
async def record_shell_step(
recorder: TrajectoryRecorder,
command: str,
exit_code: int,
stdout: Optional[str] = None,
stderr: Optional[str] = None,
screenshot: Optional[bytes] = None,
tool: str = "shell_agent",
) -> Dict[str, Any]:
"""
Record Shell step
Args:
recorder: recorder instance
command: command executed
exit_code: exit code
stdout: standard output (simplified version, not saved completely)
stderr: standard error (simplified version)
screenshot: screenshot
tool: tool name
"""
stdout_brief = stdout[:200] + "..." if stdout and len(stdout) > 200 else stdout
stderr_brief = stderr[:200] + "..." if stderr and len(stderr) > 200 else stderr
result = {
"status": "success" if exit_code == 0 else "error",
"exit_code": exit_code,
"stdout": stdout_brief,
"stderr": stderr_brief,
}
return await recorder.record_step(
backend="shell",
tool=tool,
command=command,
result=result,
screenshot=screenshot,
)
async def record_mcp_step(
recorder: TrajectoryRecorder,
server: str,
tool_name: str,
parameters: Dict[str, Any],
result: Any,
screenshot: Optional[bytes] = None,
) -> Dict[str, Any]:
"""
Record MCP step
Args:
recorder: recorder instance
server: MCP server name
tool_name: tool name
parameters: tool parameters
result: execution result
screenshot: screenshot
"""
command = f"{server}.{tool_name}"
result_str = str(result)
result_brief = result_str[:200] + "..." if len(result_str) > 200 else result_str
return await recorder.record_step(
backend="mcp",
tool=tool_name,
command=command,
result={"status": "success", "output": result_brief},
parameters=parameters,
screenshot=screenshot,
extra={
"server": server,
}
)
async def record_web_step(
recorder: TrajectoryRecorder,
query: str,
result: Dict[str, Any],
screenshot: Optional[bytes] = None,
tool: str = "deep_research_agent",
) -> Dict[str, Any]:
"""
Record Web step (deep research)
Args:
recorder: recorder instance
query: search query
result: execution result
screenshot: screenshot
tool: tool name
"""
command = query # directly use query as command
return await recorder.record_step(
backend="web",
tool=tool,
command=command,
result=result,
screenshot=screenshot,
)
================================================
FILE: anytool/recording/utils.py
================================================
import json
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
def load_trajectory_from_jsonl(jsonl_path: str) -> List[Dict[str, Any]]:
trajectory = []
# Check if file exists first
if not os.path.exists(jsonl_path):
logger.debug(f"No trajectory file found at {jsonl_path} (this is normal for knowledge-only tasks)")
return []
try:
with open(jsonl_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line:
step = json.loads(line)
trajectory.append(step)
logger.info(f"Loaded {len(trajectory)} steps from {jsonl_path}")
return trajectory
except Exception as e:
logger.error(f"Failed to load trajectory from {jsonl_path}: {e}")
return []
def load_metadata(trajectory_dir: str) -> Optional[Dict[str, Any]]:
metadata_path = os.path.join(trajectory_dir, "metadata.json")
try:
with open(metadata_path, "r", encoding="utf-8") as f:
metadata = json.load(f)
return metadata
except Exception as e:
logger.warning(f"Failed to load metadata from {metadata_path}: {e}")
return None
def format_trajectory_for_export(
trajectory: List[Dict[str, Any]],
format_type: str = "compact"
) -> str:
if format_type == "compact":
return _format_compact(trajectory)
elif format_type == "detailed":
return _format_detailed(trajectory)
elif format_type == "markdown":
return _format_markdown(trajectory)
else:
raise ValueError(f"Unknown format type: {format_type}")
def _format_compact(trajectory: List[Dict[str, Any]]) -> str:
"""Compact format: one line per step."""
lines = []
for step in trajectory:
step_num = step.get("step", "?")
backend = step.get("backend", "?")
server = step.get("server")
tool = step.get("tool", "?")
result_status = "success" if step.get("result", {}).get("status") == "success" else "error"
# Include server name for MCP backend
backend_str = f"{backend}@{server}" if server else backend
lines.append(f"Step {step_num}: [{backend_str}] {tool} -> {result_status}")
return "\n".join(lines)
def _format_detailed(trajectory: List[Dict[str, Any]]) -> str:
"""Detailed format: multiple lines per step with parameters."""
lines = []
for step in trajectory:
step_num = step.get("step", "?")
timestamp = step.get("timestamp", "?")
backend = step.get("backend", "?")
server = step.get("server")
tool = step.get("tool", "?")
command = step.get("command", "?")
parameters = step.get("parameters", {})
result = step.get("result", {})
from anytool.utils.display import Box, BoxStyle
box = Box(width=66, style=BoxStyle.ROUNDED, color='bl')
lines.append("")
lines.append(box.top_line(0))
lines.append(box.text_line(f"Step {step_num} ({timestamp})", align='center', indent=0, text_color='c'))
lines.append(box.separator_line(0))
lines.append(box.text_line(f"Backend: {backend}", indent=0))
if server:
lines.append(box.text_line(f"Server: {server}", indent=0))
lines.append(box.text_line(f"Tool: {tool}", indent=0))
lines.append(box.text_line(f"Command: {command}", indent=0))
lines.append(box.separator_line(0))
# Parameters and result can be multi-line
param_str = json.dumps(parameters, indent=2)
for param_line in param_str.split('\n'):
lines.append(box.text_line(param_line, indent=0))
lines.append(box.separator_line(0))
result_str = json.dumps(result, indent=2)
for result_line in result_str.split('\n'):
lines.append(box.text_line(result_line, indent=0))
lines.append(box.bottom_line(0))
return "\n".join(lines)
def _format_markdown(trajectory: List[Dict[str, Any]]) -> str:
"""Markdown format: table format."""
lines = [
"# Trajectory",
"",
"| Step | Backend | Server | Tool | Status | Screenshot |",
"|------|---------|--------|------|--------|------------|"
]
for step in trajectory:
step_num = step.get("step", "?")
backend = step.get("backend", "?")
server = step.get("server", "-")
tool = step.get("tool", "?")
result_status = "✓" if step.get("result", {}).get("status") == "success" else "✗"
screenshot = "📷" if step.get("screenshot") else ""
lines.append(f"| {step_num} | {backend} | {server} | {tool} | {result_status} | {screenshot} |")
return "\n".join(lines)
def analyze_trajectory(trajectory: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Analyze trajectory and return statistics.
"""
if not trajectory:
return {
"total_steps": 0,
"success_rate": 0.0,
"backends": {},
"action_types": {}
}
total_steps = len(trajectory)
success_count = 0
backends = {}
action_types = {}
for step in trajectory:
# Count successes
if step.get("result", {}).get("status") == "success":
success_count += 1
# Count backends
backend = step.get("backend", "unknown")
backends[backend] = backends.get(backend, 0) + 1
# Count tool types
tool = step.get("tool", "unknown")
action_types[tool] = action_types.get(tool, 0) + 1
return {
"total_steps": total_steps,
"success_count": success_count,
"success_rate": success_count / total_steps if total_steps > 0 else 0.0,
"backends": backends,
"tools": action_types
}
def load_recording_session(recording_dir: str) -> Dict[str, Any]:
"""
Load complete recording session including trajectory, metadata, plans, and snapshots.
Args:
recording_dir: Path to recording directory
Returns:
Dictionary containing all session data:
{
"trajectory": List[Dict],
"metadata": Dict,
"plans": List[Dict],
"decisions": List[str],
"statistics": Dict
}
"""
recording_path = Path(recording_dir)
if not recording_path.exists():
logger.error(f"Recording directory not found: {recording_dir}")
return {}
session = {
"trajectory": [],
"metadata": None,
"plans": [],
"decisions": [],
"statistics": {}
}
# Load trajectory
traj_file = recording_path / "traj.jsonl"
if traj_file.exists():
session["trajectory"] = load_trajectory_from_jsonl(str(traj_file))
session["statistics"] = analyze_trajectory(session["trajectory"])
# Load metadata
metadata_file = recording_path / "metadata.json"
if metadata_file.exists():
session["metadata"] = load_metadata(str(recording_path))
# Load plans
plans_dir = recording_path / "plans"
if plans_dir.exists():
for plan_file in sorted(plans_dir.glob("plan_*.json")):
try:
with open(plan_file, 'r', encoding='utf-8') as f:
session["plans"].append(json.load(f))
except Exception as e:
logger.warning(f"Failed to load plan {plan_file}: {e}")
# Load decisions log
decisions_file = recording_path / "decisions.log"
if decisions_file.exists():
try:
with open(decisions_file, 'r', encoding='utf-8') as f:
session["decisions"] = f.readlines()
except Exception as e:
logger.warning(f"Failed to load decisions: {e}")
return session
def filter_trajectory(
trajectory: List[Dict[str, Any]],
backend: Optional[str] = None,
tool: Optional[str] = None,
status: Optional[str] = None,
time_range: Optional[Tuple[str, str]] = None
) -> List[Dict[str, Any]]:
filtered = trajectory
if backend:
filtered = [s for s in filtered if s.get("backend") == backend]
if tool:
filtered = [s for s in filtered if s.get("tool") == tool]
if status:
filtered = [s for s in filtered if s.get("result", {}).get("status") == status]
if time_range:
start_time, end_time = time_range
filtered = [
s for s in filtered
if start_time <= s.get("timestamp", "") <= end_time
]
return filtered
def extract_errors(trajectory: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
return [
step for step in trajectory
if step.get("result", {}).get("status") == "error"
]
def generate_summary_report(recording_dir: str, output_file: Optional[str] = None) -> str:
session = load_recording_session(recording_dir)
if not session:
return "Error: Could not load recording session"
lines = []
lines.append("# Recording Session Summary\n")
# Metadata section
if session["metadata"]:
lines.append("## Metadata")
metadata = session["metadata"]
lines.append(f"- **Task ID**: {metadata.get('task_id', 'N/A')}")
lines.append(f"- **Start Time**: {metadata.get('start_time', 'N/A')}")
lines.append(f"- **End Time**: {metadata.get('end_time', 'N/A')}")
lines.append(f"- **Total Steps**: {metadata.get('total_steps', 0)}")
lines.append(f"- **Backends**: {', '.join(metadata.get('backends', []))}")
lines.append("")
# Statistics section
if session["statistics"]:
lines.append("## Statistics")
stats = session["statistics"]
lines.append(f"- **Total Steps**: {stats.get('total_steps', 0)}")
lines.append(f"- **Success Count**: {stats.get('success_count', 0)}")
lines.append(f"- **Success Rate**: {stats.get('success_rate', 0):.2%}")
lines.append("")
lines.append("### Backend Distribution")
for backend, count in stats.get('backends', {}).items():
lines.append(f"- {backend}: {count}")
lines.append("")
lines.append("### Tool Distribution")
for tool, count in sorted(stats.get('tools', {}).items(), key=lambda x: x[1], reverse=True):
lines.append(f"- {tool}: {count}")
lines.append("")
# Plans section
if session["plans"]:
lines.append(f"## Plans ({len(session['plans'])} total)")
for i, plan in enumerate(session["plans"], 1):
lines.append(f"### Plan {i}")
lines.append(f"- Created: {plan.get('created_at', 'N/A')}")
lines.append(f"- Created by: {plan.get('created_by', 'N/A')}")
plan_data = plan.get('plan', {})
if 'task_updates' in plan_data:
lines.append(f"- Tasks: {len(plan_data['task_updates'])}")
lines.append("")
# Errors section
if session["trajectory"]:
errors = extract_errors(session["trajectory"])
if errors:
lines.append(f"## Errors ({len(errors)} total)")
for error in errors[:5]: # Show first 5 errors
lines.append(f"- Step {error.get('step')}: {error.get('backend')} - {error.get('tool')}")
error_msg = error.get('result', {}).get('output', 'No error message')
lines.append(f" ```\n {error_msg[:200]}\n ```")
if len(errors) > 5:
lines.append(f" ... and {len(errors) - 5} more errors")
lines.append("")
# Decisions section
if session["decisions"]:
lines.append(f"## Decisions ({len(session['decisions'])} total)")
for decision in session["decisions"][:10]: # Show first 10 decisions
lines.append(f" {decision.strip()}")
if len(session["decisions"]) > 10:
lines.append(f" ... and {len(session['decisions']) - 10} more decisions")
lines.append("")
report = "\n".join(lines)
# Save to file if requested
if output_file:
try:
with open(output_file, 'w', encoding='utf-8') as f:
f.write(report)
logger.info(f"Report saved to {output_file}")
except Exception as e:
logger.error(f"Failed to save report: {e}")
return report
def compare_recordings(recording_dir1: str, recording_dir2: str) -> Dict[str, Any]:
session1 = load_recording_session(recording_dir1)
session2 = load_recording_session(recording_dir2)
stats1 = session1.get("statistics", {})
stats2 = session2.get("statistics", {})
return {
"session1": {
"path": recording_dir1,
"total_steps": stats1.get("total_steps", 0),
"success_rate": stats1.get("success_rate", 0),
"backends": stats1.get("backends", {})
},
"session2": {
"path": recording_dir2,
"total_steps": stats2.get("total_steps", 0),
"success_rate": stats2.get("success_rate", 0),
"backends": stats2.get("backends", {})
},
"differences": {
"step_diff": stats2.get("total_steps", 0) - stats1.get("total_steps", 0),
"success_rate_diff": stats2.get("success_rate", 0) - stats1.get("success_rate", 0)
}
}
================================================
FILE: anytool/recording/video.py
================================================
"""
Video Recorder
Communicates with local_server through platform.RecordingClient
Supports local and remote recording (through configuration LOCAL_SERVER_URL)
"""
from pathlib import Path
from typing import Optional
from anytool.utils.logging import Logger
from anytool.platform import RecordingClient
logger = Logger.get_logger(__name__)
class VideoRecorder:
def __init__(
self,
output_path: str,
base_url: Optional[str] = None,
):
"""
Initialize video recorder
Args:
output_path: output video path
base_url: local_server address (None = read from config/environment variables)
"""
self.output_path = Path(output_path)
self.base_url = base_url
self.is_recording = False
self._client: Optional[RecordingClient] = None
async def start(self):
"""Start recording screen"""
if self.is_recording:
return False
try:
if self._client is None:
self._client = RecordingClient(base_url=self.base_url)
success = await self._client.start_recording()
if success:
self.is_recording = True
logger.info(f"Video recording started")
return True
else:
logger.warning("Video recording failed to start")
return False
except Exception as e:
logger.warning(f"Video recording failed to start: {e}")
return False
async def stop(self):
"""Stop recording screen and save to local"""
if not self.is_recording:
return False
try:
if self._client:
video_bytes = await self._client.end_recording(dest=str(self.output_path))
if video_bytes:
video_size_mb = len(video_bytes) / (1024 * 1024)
self.is_recording = False
logger.info(f"Video recording stopped ({video_size_mb:.2f} MB)")
return True
else:
logger.warning("Video recording failed to stop")
return False
except Exception as e:
logger.warning(f"Video recording failed to stop: {e}")
return False
finally:
if self._client:
try:
await self._client.close()
except Exception:
pass
self._client = None
__all__ = ['VideoRecorder']
================================================
FILE: anytool/recording/viewer.py
================================================
"""
Recording Viewer
Convenient tools for viewing and analyzing recording sessions.
"""
import json
from pathlib import Path
from typing import Optional, Dict, Any, List
from anytool.utils.logging import Logger
from .utils import load_recording_session, generate_summary_report
from .action_recorder import load_agent_actions, analyze_agent_actions, format_agent_actions
logger = Logger.get_logger(__name__)
class RecordingViewer:
"""
Viewer for analyzing recording sessions.
Provides convenient methods to:
- Load and display recordings
- Analyze agent behaviors
- Generate reports
"""
def __init__(self, recording_dir: str):
"""
Initialize viewer with a recording directory.
Args:
recording_dir: Path to recording directory
"""
self.recording_dir = Path(recording_dir)
if not self.recording_dir.exists():
raise ValueError(f"Recording directory not found: {recording_dir}")
# Load session data
self.session = load_recording_session(str(self.recording_dir))
logger.info(f"Loaded recording from {recording_dir}")
def show_summary(self) -> str:
"""
Display a summary of the recording.
Returns:
Formatted summary string
"""
if not self.session.get("metadata"):
return "No metadata available"
metadata = self.session["metadata"]
stats = self.session.get("statistics", {})
lines = []
lines.append("=" * 70)
lines.append("RECORDING SUMMARY")
lines.append("=" * 70)
lines.append(f"Task ID: {metadata.get('task_id', 'N/A')}")
lines.append(f"Start: {metadata.get('start_time', 'N/A')}")
lines.append(f"End: {metadata.get('end_time', 'N/A')}")
lines.append(f"Total Steps: {metadata.get('total_steps', 0)}")
lines.append("")
lines.append("Statistics:")
lines.append(f" - Success Rate: {stats.get('success_rate', 0):.2%}")
lines.append(f" - Success Count: {stats.get('success_count', 0)}/{stats.get('total_steps', 0)}")
lines.append("")
if stats.get("backends"):
lines.append("Backend Usage:")
for backend, count in sorted(stats["backends"].items(), key=lambda x: x[1], reverse=True):
lines.append(f" - {backend}: {count}")
lines.append("=" * 70)
return "\n".join(lines)
def show_agent_actions(self, format_type: str = "compact", agent_name: Optional[str] = None) -> str:
actions = load_agent_actions(str(self.recording_dir))
if agent_name:
actions = [a for a in actions if a.get("agent_name") == agent_name]
if not actions:
return f"No agent actions found{' for ' + agent_name if agent_name else ''}"
# Add header
header = f"\nAGENT ACTIONS ({len(actions)} total)"
if agent_name:
header += f" - {agent_name}"
header += "\n" + "=" * 70
# Format actions
formatted = format_agent_actions(actions, format_type)
return header + "\n" + formatted
def analyze_agents(self) -> str:
actions = load_agent_actions(str(self.recording_dir))
stats = analyze_agent_actions(actions)
lines = []
lines.append("\nAGENT ANALYSIS")
lines.append("=" * 70)
lines.append(f"Total Actions: {stats.get('total_actions', 0)}")
lines.append("")
lines.append("By Agent:")
for agent, count in sorted(stats.get('by_agent', {}).items(), key=lambda x: x[1], reverse=True):
percentage = (count / stats['total_actions'] * 100) if stats['total_actions'] > 0 else 0
lines.append(f" - {agent}: {count} ({percentage:.1f}%)")
lines.append("")
lines.append("By Action Type:")
for action_type, count in sorted(stats.get('by_type', {}).items(), key=lambda x: x[1], reverse=True):
percentage = (count / stats['total_actions'] * 100) if stats['total_actions'] > 0 else 0
lines.append(f" - {action_type}: {count} ({percentage:.1f}%)")
return "\n".join(lines)
def generate_full_report(self, output_file: Optional[str] = None) -> str:
return generate_summary_report(str(self.recording_dir), output_file)
def export_to_json(self, output_file: str):
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(self.session, f, indent=2, ensure_ascii=False)
logger.info(f"Exported session to {output_file}")
def show_timeline(self, max_events: int = 50) -> str:
# Load all events
actions = load_agent_actions(str(self.recording_dir))
trajectory = self.session.get("trajectory", [])
# Combine all events with unified format
timeline = []
# Add agent actions
for action in actions:
timeline.append({
"timestamp": action.get("timestamp", ""),
"type": "agent_action",
"agent_name": action.get("agent_name", ""),
"agent_type": action.get("agent_type", "unknown"),
"action_type": action.get("action_type", ""),
"step": action.get("step"),
"correlation_id": action.get("correlation_id", ""),
"description": f"[{action.get('agent_type', '?').upper()}] {action.get('action_type', '?')}",
"related_tool_steps": action.get("related_tool_steps", []),
})
# Add tool executions
for traj_step in trajectory:
timeline.append({
"timestamp": traj_step.get("timestamp", ""),
"type": "tool_execution",
"backend": traj_step.get("backend", ""),
"tool": traj_step.get("tool", ""),
"step": traj_step.get("step"),
"agent_name": traj_step.get("agent_name", ""),
"description": f"[TOOL:{traj_step.get('backend', '?').upper()}] {traj_step.get('tool', '?')}",
"status": traj_step.get("result", {}).get("status", ""),
})
# Sort by timestamp
timeline.sort(key=lambda x: x.get("timestamp", ""))
# Format output
lines = []
lines.append("\nUNIFIED TIMELINE")
lines.append("=" * 100)
lines.append(f"Total events: {len(timeline)} (showing first {max_events})")
lines.append("")
for i, item in enumerate(timeline[:max_events]):
timestamp = item.get("timestamp", "N/A")
time_str = timestamp.split("T")[1][:8] if "T" in timestamp else timestamp[-8:]
# Format line with type indicator
type_marker = {
"agent_action": "🤖",
"tool_execution": "🔧"
}.get(item.get("type"), "•")
desc = item.get("description", "")
agent = item.get("agent_name", "")
agent_type = item.get("agent_type", "")
line = f"{time_str} {type_marker} {desc}"
# Add agent info if available
if agent and agent_type:
line += f" (by {agent}/{agent_type})"
elif agent:
line += f" (by {agent})"
lines.append(line)
# Show correlations
correlations = []
if item.get("related_tool_steps"):
correlations.append(f"→ tool steps: {item['related_tool_steps']}")
if item.get("related_action_step"):
correlations.append(f"→ action step: {item['related_action_step']}")
if correlations:
for corr in correlations:
lines.append(f" {corr}")
if len(timeline) > max_events:
lines.append(f"\n... and {len(timeline) - max_events} more events")
return "\n".join(lines)
def show_agent_flow(self, agent_name: Optional[str] = None) -> str:
"""
Show the flow of a specific agent's actions and related events.
"""
actions = load_agent_actions(str(self.recording_dir))
if agent_name:
actions = [a for a in actions if a.get("agent_name") == agent_name]
lines = []
lines.append(f"\nAGENT FLOW{' - ' + agent_name if agent_name else ''}")
lines.append("=" * 100)
# Sort by timestamp
actions.sort(key=lambda x: x.get("timestamp", ""))
for action in actions:
timestamp = action.get("timestamp", "N/A").split("T")[1][:8] if "T" in action.get("timestamp", "") else "N/A"
agent_type = action.get("agent_type", "?").upper()
action_type = action.get("action_type", "?")
step = action.get("step", "?")
lines.append(f"{timestamp} [{agent_type}] Action #{step}: {action_type}")
# Show reasoning if available
if action.get("reasoning"):
thought = action["reasoning"].get("thought", "")
if thought:
lines.append(f" 💭 {thought[:80]}...")
# Show output
if action.get("output"):
output = action["output"]
if isinstance(output, dict):
for key in ["message", "status", "evaluation"]:
if key in output:
lines.append(f" 📤 {key}: {str(output[key])[:60]}")
lines.append("")
return "\n".join(lines)
def view_recording(recording_dir: str):
"""
Quick interactive viewer for a recording.
"""
try:
viewer = RecordingViewer(recording_dir)
print(viewer.show_summary())
print("\n")
print(viewer.analyze_agents())
print("\n")
print("Agent Actions (compact):")
print(viewer.show_agent_actions(format_type="compact"))
except Exception as e:
logger.error(f"Failed to view recording: {e}")
print(f"Error: {e}")
def compare_recordings(recording_dir1: str, recording_dir2: str) -> str:
"""
Compare two recordings side by side.
"""
try:
viewer1 = RecordingViewer(recording_dir1)
viewer2 = RecordingViewer(recording_dir2)
lines = []
lines.append("=" * 70)
lines.append("RECORDING COMPARISON")
lines.append("=" * 70)
lines.append("")
# Compare metadata
meta1 = viewer1.session.get("metadata", {})
meta2 = viewer2.session.get("metadata", {})
lines.append("Recording 1:")
lines.append(f" Task: {meta1.get('task_id', 'N/A')}")
lines.append(f" Steps: {meta1.get('total_steps', 0)}")
lines.append("")
lines.append("Recording 2:")
lines.append(f" Task: {meta2.get('task_id', 'N/A')}")
lines.append(f" Steps: {meta2.get('total_steps', 0)}")
lines.append("")
# Compare statistics
stats1 = viewer1.session.get("statistics", {})
stats2 = viewer2.session.get("statistics", {})
lines.append("Differences:")
lines.append(f" Steps: {meta2.get('total_steps', 0) - meta1.get('total_steps', 0):+d}")
lines.append(f" Success Rate: {stats2.get('success_rate', 0) - stats1.get('success_rate', 0):+.2%}")
return "\n".join(lines)
except Exception as e:
logger.error(f"Failed to compare recordings: {e}")
return f"Error: {e}"
# CLI interface
if __name__ == "__main__":
import sys
if len(sys.argv) < 2:
print("Usage: python -m anytool.recording.viewer ")
sys.exit(1)
recording_dir = sys.argv[1]
view_recording(recording_dir)
================================================
FILE: anytool/tool_layer.py
================================================
from __future__ import annotations
import asyncio
import traceback
import uuid
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
from anytool.agents import GroundingAgent
from anytool.llm import LLMClient
from anytool.grounding.core.grounding_client import GroundingClient
from anytool.config import get_config, load_config
from anytool.config.loader import get_agent_config
from anytool.recording import RecordingManager
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
@dataclass
class AnyToolConfig:
# LLM Configuration
llm_model: str = "openrouter/anthropic/claude-sonnet-4.5"
llm_enable_thinking: bool = False
llm_timeout: float = 120.0
llm_max_retries: int = 3
llm_rate_limit_delay: float = 0.0
llm_kwargs: Dict[str, Any] = field(default_factory=dict)
# Separate models for specific tasks (None = use llm_model)
tool_retrieval_model: Optional[str] = None # Model for tool retrieval LLM filter
visual_analysis_model: Optional[str] = None # Model for visual analysis
# Grounding Configuration
grounding_config_path: Optional[str] = None
grounding_max_iterations: int = 20
grounding_system_prompt: Optional[str] = None
# Backend Configuration
backend_scope: Optional[List[str]] = None # None = All backends ["shell", "gui", "mcp", "web", "system"]
# Workspace Configuration
workspace_dir: Optional[str] = None
# Recording Configuration
enable_recording: bool = False
recording_backends: Optional[List[str]] = None
recording_log_dir: str = "./logs/recordings"
enable_screenshot: bool = True
enable_video: bool = True
enable_conversation_log: bool = True # Save LLM conversations to conversations.jsonl
# Logging Configuration
log_level: str = "INFO"
log_to_file: bool = False
log_file_path: Optional[str] = None
def __post_init__(self):
"""Validate configuration"""
if not self.llm_model:
raise ValueError("llm_model is required")
logger.debug(f"AnyToolConfig initialized with model: {self.llm_model}")
class AnyTool:
def __init__(self, config: Optional[AnyToolConfig] = None):
self.config = config or AnyToolConfig()
self._llm_client: Optional[LLMClient] = None
self._grounding_client: Optional[GroundingClient] = None
self._grounding_agent: Optional[GroundingAgent] = None
self._recording_manager: Optional[RecordingManager] = None
self._initialized = False
self._running = False
logger.debug("AnyTool instance created")
async def initialize(self) -> None:
if self._initialized:
logger.warning("AnyTool already initialized")
return
logger.info("Initializing AnyTool...")
try:
self._llm_client = LLMClient(
model=self.config.llm_model,
enable_thinking=self.config.llm_enable_thinking,
rate_limit_delay=self.config.llm_rate_limit_delay,
max_retries=self.config.llm_max_retries,
timeout=self.config.llm_timeout,
**self.config.llm_kwargs
)
logger.info(f"✓ LLM Client: {self.config.llm_model}")
# Load grounding config
# If custom config is provided, merge it with default configs
# load_config supports multiple files and deep merges them (later files override earlier ones)
if self.config.grounding_config_path:
from anytool.config.loader import CONFIG_DIR
from anytool.config.constants import CONFIG_GROUNDING, CONFIG_SECURITY
# Load default configs + custom config (custom values will override defaults)
grounding_config = load_config(
CONFIG_DIR / CONFIG_GROUNDING,
CONFIG_DIR / CONFIG_SECURITY,
self.config.grounding_config_path
)
logger.info(f"Merged custom grounding config: {self.config.grounding_config_path}")
else:
# Load default configs only
grounding_config = get_config()
self._grounding_client = GroundingClient(config=grounding_config)
await self._grounding_client.initialize_all_providers()
backends = list(self._grounding_client.list_providers().keys())
logger.info(f"✓ Grounding Client: {len(backends)} backends")
logger.debug(f" Available backends: {[b.value for b in backends]}")
if self.config.enable_recording:
self._recording_manager = RecordingManager(
enabled=True,
task_id="",
log_dir=self.config.recording_log_dir,
backends=self.config.recording_backends,
enable_screenshot=self.config.enable_screenshot,
enable_video=self.config.enable_video,
enable_conversation_log=self.config.enable_conversation_log,
agent_name="AnyTool",
)
# Inject recording_manager to grounding_client for GUI intermediate steps
self._grounding_client.recording_manager = self._recording_manager
# Register to LLM client for auto-recording tool results
self._recording_manager.register_to_llm(self._llm_client)
logger.info(f"✓ Recording enabled: {len(self._recording_manager.backends or [])} backends")
agent_config = get_agent_config("GroundingAgent")
if agent_config:
# Use config file values, but command-line args (self.config) take priority
max_iterations = agent_config.get("max_iterations", self.config.grounding_max_iterations)
# Command-line backend_scope > config file > default
backend_scope = self.config.backend_scope or agent_config.get("backend_scope") or ["gui", "shell", "mcp", "web", "system"]
visual_analysis_timeout = agent_config.get("visual_analysis_timeout", 30.0)
# Update config with values from config file
self.config.grounding_max_iterations = max_iterations
logger.info(f"Loaded GroundingAgent config from config_agents.json (max_iterations={max_iterations}, visual_analysis_timeout={visual_analysis_timeout}s)")
else:
# Fall back to AnyToolConfig values
max_iterations = self.config.grounding_max_iterations
backend_scope = self.config.backend_scope or ["gui", "shell", "mcp", "web", "system"]
visual_analysis_timeout = 30.0
logger.warning(f"config_agents.json not found, using default config (max_iterations={max_iterations})")
# Create separate LLM client for tool retrieval if configured
tool_retrieval_llm = None
if self.config.tool_retrieval_model:
tool_retrieval_llm = LLMClient(
model=self.config.tool_retrieval_model,
timeout=self.config.llm_timeout,
max_retries=self.config.llm_max_retries,
)
logger.info(f"✓ Tool retrieval LLM: {self.config.tool_retrieval_model}")
self._grounding_agent = GroundingAgent(
name="AnyTool-GroundingAgent",
backend_scope=backend_scope,
llm_client=self._llm_client,
grounding_client=self._grounding_client,
recording_manager=self._recording_manager,
system_prompt=self.config.grounding_system_prompt,
max_iterations=max_iterations,
visual_analysis_timeout=visual_analysis_timeout,
tool_retrieval_llm=tool_retrieval_llm,
visual_analysis_model=self.config.visual_analysis_model,
)
logger.info(f"✓ GroundingAgent: {', '.join(backend_scope)}")
self._initialized = True
logger.info("="*60)
logger.info("AnyTool ready to use!")
logger.info("="*60)
except Exception as e:
logger.error(f"Failed to initialize AnyTool: {e}")
await self.cleanup()
raise
async def execute(
self,
task: str,
context: Optional[Dict[str, Any]] = None,
workspace_dir: Optional[str] = None,
max_iterations: Optional[int] = None,
task_id: Optional[str] = None,
) -> Dict[str, Any]:
"""
Execute a task with AnyTool.
Args:
task: Task instruction
context: Additional context
workspace_dir: Working directory
max_iterations: Max iterations override
task_id: External task ID for recording/logging. If None, generates a random one.
This allows external callers (e.g., OSWorld) to specify their own task ID
so recordings can be easily matched with benchmark results.
"""
if not self._initialized:
raise RuntimeError(
"AnyTool not initialized. "
"Call await tool_layer.initialize() first or use async with."
)
if self._running:
raise RuntimeError("AnyTool is already running a task.")
logger.info("="*60)
logger.info(f"Task: {task[:100]}...")
logger.info("="*60)
self._running = True
start_time = asyncio.get_event_loop().time()
# Use external task_id if provided, otherwise generate one
if task_id is None:
task_id = f"task_{uuid.uuid4().hex[:8]}"
logger.info(f"Task ID: {task_id}")
try:
execution_context = context or {}
execution_context["task_id"] = task_id
execution_context["instruction"] = task
if max_iterations is not None:
execution_context["max_iterations"] = max_iterations
if self._recording_manager:
if self._recording_manager.recording_status:
await self._recording_manager.stop()
logger.debug("Stopped previous recording session")
self._recording_manager.task_id = task_id
await self._recording_manager.start()
logger.info(f"Recording started: {task_id}")
if workspace_dir:
execution_context["workspace_dir"] = workspace_dir
logger.info(f"Workspace: {workspace_dir}")
elif self.config.workspace_dir:
execution_context["workspace_dir"] = self.config.workspace_dir
logger.info(f"Workspace: {self.config.workspace_dir}")
elif self._recording_manager and self._recording_manager.trajectory_dir:
execution_context["workspace_dir"] = self._recording_manager.trajectory_dir
logger.info(f"Workspace: {execution_context['workspace_dir']}")
else:
import tempfile
from pathlib import Path
workspace = Path(tempfile.gettempdir()) / "anytool_workspace" / task_id
workspace.mkdir(parents=True, exist_ok=True)
execution_context["workspace_dir"] = str(workspace)
logger.info(f"Workspace: {execution_context['workspace_dir']}")
logger.info(f"Executing with GroundingAgent (max {max_iterations or self.config.grounding_max_iterations} iterations)...")
result = await self._grounding_agent.process(execution_context)
execution_time = asyncio.get_event_loop().time() - start_time
final_result = {
**result,
"task_id": task_id,
"execution_time": execution_time,
}
status = result.get('status', 'unknown')
iterations = result.get('iterations', 0)
tool_count = len(result.get('tool_executions', []))
logger.info("="*60)
if status == "success":
logger.info(
f"Task completed successfully! "
f"({iterations} iterations, {tool_count} tool calls, {execution_time:.2f}s)"
)
elif status == "incomplete":
logger.warning(
f"Task incomplete after {iterations} iterations. "
f"Consider increasing max_iterations."
)
else:
logger.error(f"Task failed: {result.get('error', 'Unknown error')}")
logger.info("="*60)
return final_result
except Exception as e:
execution_time = asyncio.get_event_loop().time() - start_time
tb = traceback.format_exc(limit=10)
logger.error(f"Task execution failed: {e}", exc_info=True)
return {
"status": "error",
"error": str(e),
"traceback": tb,
"response": f"Task execution error: {str(e)}",
"execution_time": execution_time,
"task_id": task_id,
"iterations": 0,
"tool_executions": [],
}
finally:
if self._recording_manager and self._recording_manager.recording_status:
try:
await self._recording_manager.stop()
logger.debug(f"Recording stopped: {task_id}")
except Exception as e:
logger.warning(f"Failed to stop recording: {e}")
# Trigger quality evolution periodically
await self._maybe_evolve_quality()
self._running = False
async def _maybe_evolve_quality(self) -> None:
"""Trigger quality evolution based on global execution count."""
if not self._grounding_client or not self._grounding_client.quality_manager:
return
# Check if evolution should be triggered (every 10 global executions)
if self._grounding_client.quality_manager.should_evolve():
try:
report = await self._grounding_client.evolve_quality()
if report.get("recommendations"):
logger.info(f"Quality evolution: {report['recommendations']}")
except Exception as e:
logger.debug(f"Quality evolution skipped: {e}")
async def cleanup(self) -> None:
"""
Close all sessions and release resources.
Automatically called when using context manager.
"""
logger.info("Cleaning up AnyTool resources...")
try:
if self._grounding_client:
await self._grounding_client.close_all_sessions()
logger.debug("All grounding sessions closed")
if self._recording_manager and self._recording_manager.recording_status:
try:
await self._recording_manager.stop()
logger.debug("Recording manager stopped")
except Exception as e:
logger.warning(f"Failed to stop recording: {e}")
self._initialized = False
self._running = False
logger.info("AnyTool cleanup complete")
except Exception as e:
logger.error(f"Error during cleanup: {e}", exc_info=True)
def is_initialized(self) -> bool:
return self._initialized
def is_running(self) -> bool:
return self._running
def get_config(self) -> AnyToolConfig:
return self.config
def list_backends(self) -> List[str]:
if not self._initialized:
raise RuntimeError("AnyTool not initialized")
return [backend.value for backend in self._grounding_client.list_providers().keys()]
def list_sessions(self) -> List[str]:
if not self._initialized:
raise RuntimeError("AnyTool not initialized")
return self._grounding_client.list_sessions()
async def __aenter__(self):
"""Context manager entry"""
await self.initialize()
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
"""Context manager exit"""
await self.cleanup()
return False
def __repr__(self) -> str:
status = "initialized" if self._initialized else "not initialized"
if self._running:
status = "running"
backends = ", ".join(self.config.backend_scope) if self.config.backend_scope else "all"
return f""
================================================
FILE: anytool/utils/cli_display.py
================================================
"""CLI Display utilities for AnyTool startup and interaction"""
from anytool.tool_layer import AnyToolConfig
from anytool.utils.display import Box, BoxStyle, colorize
class CLIDisplay:
@staticmethod
def print_banner():
box = Box(width=70, style=BoxStyle.ROUNDED, color='c')
print()
print(box.top_line(indent=4))
print(box.empty_line(indent=4))
title = colorize("AnyTool", 'c', bold=True)
print(box.text_line(title, align='center', indent=4, text_color=''))
subtitle = "Universal Tool-Use Layer for AI Agents"
print(box.text_line(subtitle, align='center', indent=4, text_color='gr'))
print(box.empty_line(indent=4))
print(box.bottom_line(indent=4))
print()
@staticmethod
def print_configuration(config: AnyToolConfig):
box = Box(width=70, style=BoxStyle.ROUNDED, color='bl')
print(box.text_line(colorize("◉ System Configuration", 'c', bold=True), align='center', indent=4, text_color=''))
print(box.separator_line(indent=4))
configs = [
("AI Model", config.llm_model, 'bl'),
("Max Iterations", str(config.grounding_max_iterations), 'c'),
("LLM Timeout", f"{config.llm_timeout}s", 'c'),
]
for label, value, color in configs:
line = f" {label:20s} {colorize(value, color)}"
print(box.text_line(line, indent=4, text_color=''))
print(box.bottom_line(indent=4))
print()
@staticmethod
def print_initialization_progress(steps: list, show_header: bool = True):
box = Box(width=70, style=BoxStyle.ROUNDED, color='g')
if show_header:
print(box.text_line(colorize("► Initializing Components", 'g', bold=True),
align='center', indent=4, text_color=''))
print(box.separator_line(indent=4))
for step, status in steps:
if status == "ok":
icon = colorize("✓", 'g')
elif status == "error":
icon = colorize("✗", 'rd')
else:
icon = colorize("[...]", 'y')
line = f" {icon} {step}"
print(box.text_line(line, indent=4, text_color=''))
print(box.bottom_line(indent=4))
print()
@staticmethod
def print_result_summary(result: dict):
box = Box(width=70, style=BoxStyle.ROUNDED, color='c')
print()
print(box.text_line(colorize("◈ Execution Summary", 'c', bold=True),
align='center', indent=4, text_color=''))
print(box.separator_line(indent=4))
status = result.get("status", "unknown")
status_colors = {
"completed": 'g',
"timeout": 'y',
"error": 'rd',
"max_iterations_reached": 'y',
}
status_color = status_colors.get(status, 'gr')
status_display = colorize(status.upper(), status_color, bold=True)
exec_time = result.get('execution_time', 0)
result_lines = [
f" Status: {status_display}",
f" Execution Time: {colorize(f'{exec_time:.2f}s', 'c')}",
f" Iterations: {colorize(str(result.get('iterations', 0)), 'y')}",
f" Completed Tasks: {colorize(str(result.get('completed_tasks', 0)), 'g')}",
]
if result.get('evaluation_results'):
result_lines.append(f" Evaluations: {colorize(str(len(result['evaluation_results'])), 'bl')}")
for line in result_lines:
print(box.text_line(line, indent=4, text_color=''))
print(box.bottom_line(indent=4))
print()
# Print user response (the actual answer/result)
if result.get('user_response'):
response_box = Box(width=70, style=BoxStyle.ROUNDED, color='g')
print(response_box.text_line(colorize("◈ Result", 'g', bold=True),
align='center', indent=4, text_color=''))
print(response_box.separator_line(indent=4))
user_response = result['user_response']
for line in user_response.split('\n'):
if line.strip():
display_line = f" {line.strip()}"
print(response_box.text_line(display_line, indent=4, text_color=''))
print(response_box.bottom_line(indent=4))
print()
@staticmethod
def print_interactive_header():
box = Box(width=70, style=BoxStyle.ROUNDED, color='c')
print(box.text_line(colorize("⌨ Interactive Mode", 'c', bold=True),
align='center', indent=4, text_color=''))
print(box.separator_line(indent=4))
help_lines = [
"",
colorize(" Ready to execute your tasks!", 'g'),
"",
colorize(" Available Commands:", 'c', bold=True),
" " + colorize("status", 'bl') + " → View system status",
" " + colorize("help", 'bl') + " → Show available commands",
" " + colorize("quit", 'bl') + " → Exit interactive mode",
"",
colorize(" ▸ Enter your task description below:", 'gr'),
"",
]
for line in help_lines:
print(box.text_line(line, indent=4, text_color=''))
print(box.bottom_line(indent=4))
print()
@staticmethod
def print_task_header(query: str, title: str = "▶ Executing Task"):
box = Box(width=70, style=BoxStyle.ROUNDED, color='g')
print()
print(box.text_line(colorize(title, 'g', bold=True), align='center', indent=4, text_color=''))
print(box.separator_line(indent=4))
print(box.text_line("", indent=4, text_color=''))
print(box.text_line(f" {query}", indent=4, text_color=''))
print(box.text_line("", indent=4, text_color=''))
print(box.bottom_line(indent=4))
@staticmethod
def print_system_ready():
box = Box(width=70, style=BoxStyle.ROUNDED, color='g')
print(box.text_line(colorize("◈ System Ready", 'g', bold=True),
align='center', indent=4, text_color=''))
print(box.separator_line(indent=4))
print(box.text_line("", indent=4, text_color=''))
print(box.text_line(colorize(" Real-time UI will display:", 'c'), indent=4, text_color=''))
print(box.text_line(" § Agent activities and status", indent=4, text_color=''))
print(box.text_line(" ⊕ Grounding backend operations", indent=4, text_color=''))
print(box.text_line(" ⊞ Execution logs", indent=4, text_color=''))
print(box.text_line("", indent=4, text_color=''))
print(box.bottom_line(indent=4))
print()
@staticmethod
def print_status(agent):
box = Box(width=70, style=BoxStyle.ROUNDED, color='bl')
print()
print(box.text_line(colorize("System Status", 'bl', bold=True),
align='center', indent=4, text_color=''))
print(box.separator_line(indent=4))
status = agent.get_status()
status_lines = [
f"Initialized: {colorize('Yes' if status['initialized'] else 'No', 'g' if status['initialized'] else 'rd')}",
f"Running: {colorize('Yes' if status['running'] else 'No', 'y' if status['running'] else 'g')}",
]
if "agents" in status:
status_lines.append(f"Agents: {colorize(', '.join(status['agents']), 'c')}")
for line in status_lines:
print(box.text_line(line, indent=4, text_color=''))
print(box.bottom_line(indent=4))
print()
@staticmethod
def print_help():
box = Box(width=70, style=BoxStyle.ROUNDED, color='y')
print()
print(box.text_line(colorize("Available Commands", 'y', bold=True),
align='center', indent=4, text_color=''))
print(box.separator_line(indent=4))
help_items = [
(colorize("status", 'c'), "Show system status"),
(colorize("help", 'c'), "Show this help message"),
(colorize("quit/exit", 'c'), "Exit interactive mode"),
("", ""),
(colorize("Other input", 'gr'), "Execute as task"),
]
for cmd, desc in help_items:
if cmd:
print(box.text_line(f" {cmd:20s} {desc}", indent=4, text_color=''))
else:
print(box.separator_line(indent=4))
print(box.bottom_line(indent=4))
print()
================================================
FILE: anytool/utils/display.py
================================================
from typing import Optional, List
from enum import Enum
import re
class Colors:
RESET = "\033[0m"
BOLD = "\033[1m"
DIM = "\033[2m"
RED = "\033[91m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
BLUE = "\033[94m"
MAGENTA = "\033[95m"
CYAN = "\033[96m"
WHITE = "\033[97m"
GRAY = "\033[90m"
GREEN_SOFT = '\033[38;5;78m'
BLUE_SOFT = '\033[38;5;39m'
CYAN_SOFT = '\033[38;5;51m'
YELLOW_SOFT = '\033[38;5;222m'
RED_SOFT = '\033[38;5;204m'
MAGENTA_SOFT = '\033[38;5;141m'
GRAY_SOFT = '\033[38;5;246m'
class BoxStyle(Enum):
ROUNDED = "rounded" # Rounded corner box ╭─╮╰╯
SQUARE = "square" # Square corner box ┌─┐└┘
DOUBLE = "double" # Double line box ╔═╗╚╝
SIMPLE = "simple" # Simple box ===
BOX_CHARS = {
BoxStyle.ROUNDED: {
'tl': '╭', 'tr': '╮', 'bl': '╰', 'br': '╯',
'h': '─', 'v': '│'
},
BoxStyle.SQUARE: {
'tl': '┌', 'tr': '┐', 'bl': '└', 'br': '┘',
'h': '─', 'v': '│'
},
BoxStyle.DOUBLE: {
'tl': '╔', 'tr': '╗', 'bl': '╚', 'br': '╝',
'h': '═', 'v': '║'
},
}
def strip_ansi(text: str) -> str:
"""
Strip ANSI color codes from text
Args:
text: Text with potential ANSI codes
Returns:
Clean text without ANSI codes
"""
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', text)
def colorize(text: str, color: str = '', bold: bool = False) -> str:
try:
color_map = {
'r': Colors.RESET,
'b': Colors.BOLD,
'd': Colors.DIM,
'g': Colors.GREEN_SOFT,
'bl': Colors.BLUE_SOFT,
'c': Colors.CYAN_SOFT,
'y': Colors.YELLOW_SOFT,
'rd': Colors.RED_SOFT,
'm': Colors.MAGENTA_SOFT,
'gr': Colors.GRAY_SOFT,
}
prefix = Colors.BOLD if bold else ''
code = color_map.get(color, color)
return f"{prefix}{code}{text}{Colors.RESET}"
except:
return text
class Box:
def __init__(self,
width: int = 68,
style: BoxStyle = BoxStyle.ROUNDED,
color: str = 'bl',
padding: int = 2):
self.width = width
self.style = style
self.color = color
self.padding = padding
self.chars = BOX_CHARS.get(style, BOX_CHARS[BoxStyle.ROUNDED])
def top_line(self, indent: int = 2) -> str:
indent_str = " " * indent
if self.style == BoxStyle.SIMPLE:
return colorize(indent_str + "=" * self.width, self.color)
return colorize(
indent_str + self.chars['tl'] + self.chars['h'] * self.width + self.chars['tr'],
self.color
)
def bottom_line(self, indent: int = 2) -> str:
indent_str = " " * indent
if self.style == BoxStyle.SIMPLE:
return colorize(indent_str + "=" * self.width, self.color)
return colorize(
indent_str + self.chars['bl'] + self.chars['h'] * self.width + self.chars['br'],
self.color
)
def separator_line(self, indent: int = 2) -> str:
indent_str = " " * indent
if self.style == BoxStyle.SIMPLE:
return colorize(indent_str + "-" * self.width, self.color)
return colorize(indent_str + " " + self.chars['h'] * self.width, self.color)
def empty_line(self, indent: int = 2) -> str:
indent_str = " " * indent
if self.style == BoxStyle.SIMPLE:
return ""
return colorize(
indent_str + self.chars['v'] + " " * self.width + self.chars['v'],
self.color
)
def text_line(self, text: str, align: str = 'left', indent: int = 2, text_color: str = '') -> str:
indent_str = " " * indent
content_width = self.width - 2 * self.padding
# Strip ANSI codes to get actual display length
clean_text = strip_ansi(text)
text_len = len(clean_text)
# Use original text (may contain colors) or apply new color
display_text = colorize(text, text_color) if text_color else text
if align == 'center':
left_pad = (content_width - text_len) // 2
right_pad = content_width - text_len - left_pad
content = " " * left_pad + display_text + " " * right_pad
elif align == 'right':
left_pad = content_width - text_len
content = " " * left_pad + display_text
else: # left
right_pad = content_width - text_len
content = display_text + " " * right_pad
if self.style == BoxStyle.SIMPLE:
return indent_str + " " * self.padding + content
padding_str = " " * self.padding
return colorize(indent_str + self.chars['v'], self.color) + \
padding_str + content + padding_str + \
colorize(self.chars['v'], self.color)
def build(self,
title: Optional[str] = None,
lines: List[str] = None,
footer: Optional[str] = None,
indent: int = 2) -> str:
result = []
result.append(self.top_line(indent))
if title:
result.append(self.empty_line(indent))
result.append(self.text_line(title, align='center', indent=indent, text_color='c'))
result.append(self.empty_line(indent))
if lines:
for line in lines:
result.append(self.text_line(line, indent=indent))
if footer:
result.append(self.empty_line(indent))
result.append(self.text_line(footer, align='center', indent=indent, text_color='gr'))
result.append(self.bottom_line(indent))
return "\n".join(result)
def print_box(title: Optional[str] = None,
lines: List[str] = None,
footer: Optional[str] = None,
width: int = 68,
style: BoxStyle = BoxStyle.ROUNDED,
color: str = 'bl',
indent: int = 2):
box = Box(width=width, style=style, color=color)
print(box.build(title=title, lines=lines, footer=footer, indent=indent))
def print_banner(title: str,
subtitle: Optional[str] = None,
width: int = 66,
style: BoxStyle = BoxStyle.ROUNDED,
color: str = 'bl',
indent: int = 2):
box = Box(width=width, style=style, color=color)
print()
print(box.top_line(indent))
print(box.empty_line(indent))
print(box.text_line(title, align='center', indent=indent, text_color='c'))
if subtitle:
print(box.text_line(subtitle, align='center', indent=indent, text_color='gr'))
print(box.empty_line(indent))
print(box.bottom_line(indent))
print()
def print_section(title: str,
content: List[str],
color: str = 'c',
indent: int = 2):
indent_str = " " * indent
print(f"\n{indent_str}{colorize('- ' + title, color, bold=True)}")
for line in content:
print(f"{indent_str} {line}")
def print_separator(width: int = 68, color: str = 'bl', indent: int = 2):
indent_str = " " * indent
print(colorize(indent_str + "─" * width, color))
================================================
FILE: anytool/utils/logging.py
================================================
import logging
import os
import sys
import threading
import json
from pathlib import Path
from datetime import datetime
from typing import Optional
from colorama import init
init(autoreset=True)
def _load_log_level_from_config() -> int:
"""
Load log_level from config_grounding.json and convert to ANYTOOL_DEBUG value.
Returns: 0 (WARNING), 1 (INFO), or 2 (DEBUG)
"""
try:
config_path = Path(__file__).parent.parent / "config" / "config_grounding.json"
if config_path.exists():
with open(config_path, 'r', encoding='utf-8') as f:
config = json.load(f)
log_level = config.get("log_level", "INFO").upper()
# Convert log level string to ANYTOOL_DEBUG value
level_map = {
"DEBUG": 2,
"INFO": 1,
"WARNING": 0,
"ERROR": 0,
"CRITICAL": 0
}
return level_map.get(log_level, 1) # Default to INFO
except Exception:
# If any error occurs, silently return default INFO level
pass
return 1 # Default to INFO
# 0=WARNING, 1=INFO, 2=DEBUG; can be overridden by set_debug / environment variable
# Load from config_grounding.json to ensure consistency
ANYTOOL_DEBUG = _load_log_level_from_config()
# Default log directory and file pattern
# Use absolute path to anytool/logs directory
DEFAULT_LOG_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "logs")
DEFAULT_LOG_FILE_PATTERN = "anytool_{timestamp}.log"
class FlushFileHandler(logging.FileHandler):
"""File handler that flushes after each emit for real-time logging"""
def emit(self, record):
super().emit(record)
self.flush() # Immediately flush to disk
class ColoredFormatter(logging.Formatter):
COLORS = {
'DEBUG': '\033[1;36m', # Bold cyan
'INFO': '\033[1;32m', # Bold green
'WARNING': '\033[1;33m', # Bold yellow
'ERROR': '\033[1;31m', # Bold red
'CRITICAL': '\033[1;35m', # Bold magenta
'RESET': '\033[0m',
}
def format(self, record: logging.LogRecord) -> str:
formatted = super().format(record)
level_color = self.COLORS.get(record.levelname, self.COLORS["RESET"])
colored_line = f"{level_color}{formatted}{self.COLORS['RESET']}"
return colored_line
class Logger:
"""
Thread-safe logger facade that:
1. Configures handlers only once (lazy initialization).
2. Ensures all subsequent loggers obtained via ``Logger.get_logger()``
inherit the configured handlers.
3. Dynamically adapts log levels according to ``ANYTOOL_DEBUG``.
"""
_ROOT_NAME = "anytool" # Package root name
# Standard format: time with milliseconds | level | file:line number | message
_LOG_FORMAT = (
"%(asctime)s.%(msecs)03d [%(levelname)-8s] %(filename)s:%(lineno)d - %(message)s"
)
_lock = threading.Lock()
_configured = False
_registered: dict[str, logging.Logger] = {}
@staticmethod
def _get_default_log_file() -> str:
"""Generate default log file path with timestamp (to seconds)
Log files are organized by the running script name:
- logs//anytool_2025-10-24_15-30-00.log
"""
# Get the name of the main script
script_name = "anytool" # Default name
try:
import __main__
if hasattr(__main__, "__file__") and __main__.__file__:
# Extract script name without extension
script_path = os.path.basename(__main__.__file__)
script_name = os.path.splitext(script_path)[0]
except Exception:
# If can't get script name, use default
pass
# Create log directory: logs//
log_dir = os.path.join(DEFAULT_LOG_DIR, script_name)
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
filename = DEFAULT_LOG_FILE_PATTERN.format(timestamp=timestamp)
return os.path.abspath(os.path.join(log_dir, filename))
@classmethod
def get_logger(cls, name: Optional[str] = None) -> logging.Logger:
"""Return a logger with *name* (defaults to ``anytool``).
The first call triggers :meth:`configure` automatically."""
if name is None:
name = cls._ROOT_NAME
# Check if configuration is needed to avoid recursive calls.
need_config = False
with cls._lock:
logger = cls._registered.get(name)
if logger is None:
logger = logging.getLogger(name)
logger.propagate = True
cls._registered[name] = logger
if not cls._configured:
need_config = True
if need_config:
cls.configure()
return logger
@classmethod
def configure(
cls,
*,
level: Optional[int] = None,
fmt: Optional[str] = None,
log_to_console: bool = True,
log_to_file: Optional[str] = "auto",
use_colors: bool = True,
force_color: bool = False,
force: bool = False,
attach_to_root: bool = False,
) -> None:
"""
Configure the logging system. Usually called automatically
on first use; pass ``force=True`` to reconfigure explicitly.
Args:
level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
fmt: Log format string
log_to_console: Whether to output to console
log_to_file: Log file path ("auto" auto-generate by date, None disable, or specify path)
use_colors: Whether to use colors on console
force_color: Force use of colors (even if not supported)
force: Whether to force reconfiguration
attach_to_root: Whether to attach to root logger
If *attach_to_root* is ``True``, handlers are attached to the *root*
logger (``""``). This makes every logger—regardless of its name—
inherit the handlers (handy for standalone scripts) but will also
surface logs from third-party libraries. Choose with care.
"""
with cls._lock:
if cls._configured and not force:
# Already configured and no need to force reconfiguration, only update level.
if level is not None:
cls._update_level(level)
return
resolved_level = cls._resolve_level(level)
fmt_str = fmt or cls._LOG_FORMAT
# Handle log_to_file parameter
actual_log_file = None
if log_to_file == "auto":
actual_log_file = cls._get_default_log_file()
elif log_to_file is not None:
actual_log_file = log_to_file
# Select the logger to attach handlers to (root logger or anytool).
target_logger = (
logging.getLogger() if attach_to_root else logging.getLogger(cls._ROOT_NAME)
)
target_logger.setLevel(resolved_level)
# Clean up old handlers.
for h in target_logger.handlers[:]:
target_logger.removeHandler(h)
# Construct Formatter
date_fmt = "%Y-%m-%d %H:%M:%S"
color_supported = force_color or (use_colors and cls._stdout_supports_color())
console_formatter = (
ColoredFormatter(fmt_str, datefmt=date_fmt) if color_supported
else logging.Formatter(fmt_str, datefmt=date_fmt)
)
file_formatter = logging.Formatter(fmt_str, datefmt=date_fmt)
# Console Handler
if log_to_console:
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(resolved_level)
ch.setFormatter(console_formatter)
target_logger.addHandler(ch)
# File Handler (with real-time flush)
if actual_log_file:
dir_path = os.path.dirname(actual_log_file)
if dir_path:
os.makedirs(dir_path, exist_ok=True)
fh = FlushFileHandler(actual_log_file, encoding="utf-8")
fh.setLevel(resolved_level)
fh.setFormatter(file_formatter)
target_logger.addHandler(fh)
# Record log file location
if not cls._configured:
print(f"Log file enabled: {actual_log_file}")
cls._configured = True
@classmethod
def set_debug(cls, debug_level: int = 2) -> None:
"""Dynamically switch debug level: 0 = WARNING, 1 = INFO, 2 = DEBUG."""
global ANYTOOL_DEBUG
ANYTOOL_DEBUG = max(0, min(debug_level, 2))
cls._update_level(cls._resolve_level(None))
@classmethod
def add_file_handler(
cls,
filepath: str,
logger_name: Optional[str] = None
) -> None:
"""
Append a file handler to the given (default ``anytool``) logger.
Args:
filepath: Log file path
logger_name: Log logger name
"""
logger = cls.get_logger(logger_name or cls._ROOT_NAME)
dir_path = os.path.dirname(filepath)
if dir_path:
os.makedirs(dir_path, exist_ok=True)
fh = FlushFileHandler(filepath, encoding="utf-8")
fh.setLevel(logger.level)
fh.setFormatter(logging.Formatter(cls._LOG_FORMAT, datefmt="%Y-%m-%d %H:%M:%S"))
logger.addHandler(fh)
@classmethod
def reset_configuration(cls) -> None:
"""Remove all handlers and clear registered loggers."""
with cls._lock:
for lg in cls._registered.values():
for h in lg.handlers[:]:
lg.removeHandler(h)
cls._registered.clear()
cls._configured = False
@staticmethod
def _stdout_supports_color() -> bool:
return sys.stdout.isatty() and not os.getenv("NO_COLOR")
@classmethod
def _resolve_level(cls, level: Optional[int]) -> int:
if level is not None:
# Allow passing logging.INFO / "INFO" / 20 etc.
return getattr(logging, str(level).upper(), level)
return {2: logging.DEBUG, 1: logging.INFO}.get(ANYTOOL_DEBUG, logging.WARNING)
@classmethod
def _update_level(cls, level: int) -> None:
for lg in cls._registered.values():
lg.setLevel(level)
for h in lg.handlers:
h.setLevel(level)
# Adjust debug level automatically according to the
# ``ANYTOOL_DEBUG`` (preferred) or legacy ``DEBUG`` environment variable.
_env_debug = os.getenv("ANYTOOL_DEBUG") or os.getenv("DEBUG")
if _env_debug is not None:
try:
Logger.set_debug(int(_env_debug))
except ValueError:
# When not a number, use common format: DEBUG=1/true
Logger.set_debug(2 if _env_debug.strip().lower() in {"1", "true", "yes"} else 0)
# Initialize logger system, attach to root so all loggers inherit the configuration
# This ensures any logger obtained via Logger.get_logger() will work correctly
Logger.configure(attach_to_root=True)
# Get anytool logger for internal logging
logger = Logger.get_logger()
logger.debug("AnyTool logging initialized")
================================================
FILE: anytool/utils/telemetry/__init__.py
================================================
================================================
FILE: anytool/utils/telemetry/events.py
================================================
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any
class BaseTelemetryEvent(ABC):
"""Base class for all telemetry events"""
@property
@abstractmethod
def name(self) -> str:
"""Event name for tracking"""
pass
@property
@abstractmethod
def properties(self) -> dict[str, Any]:
"""Event properties to send with the event"""
pass
@dataclass
class MCPAgentExecutionEvent(BaseTelemetryEvent):
"""Comprehensive event for tracking complete MCP agent execution"""
# Execution method and context
execution_method: str # "run" or "astream"
query: str # The actual user query
success: bool
# Agent configuration
model_provider: str
model_name: str
server_count: int
server_identifiers: list[dict[str, str]]
total_tools_available: int
tools_available_names: list[str]
max_steps_configured: int
memory_enabled: bool
use_server_manager: bool
# Execution PARAMETERS
max_steps_used: int | None
manage_connector: bool
external_history_used: bool
# Execution results
steps_taken: int | None = None
tools_used_count: int | None = None
tools_used_names: list[str] | None = None
response: str | None = None # The actual response
execution_time_ms: int | None = None
error_type: str | None = None
# Context
conversation_history_length: int | None = None
@property
def name(self) -> str:
return "mcp_agent_execution"
@property
def properties(self) -> dict[str, Any]:
return {
# Core execution info
"execution_method": self.execution_method,
"query": self.query,
"query_length": len(self.query),
"success": self.success,
# Agent configuration
"model_provider": self.model_provider,
"model_name": self.model_name,
"server_count": self.server_count,
"server_identifiers": self.server_identifiers,
"total_tools_available": self.total_tools_available,
"tools_available_names": self.tools_available_names,
"max_steps_configured": self.max_steps_configured,
"memory_enabled": self.memory_enabled,
"use_server_manager": self.use_server_manager,
# Execution parameters (always include, even if None)
"max_steps_used": self.max_steps_used,
"manage_connector": self.manage_connector,
"external_history_used": self.external_history_used,
# Execution results (always include, even if None)
"steps_taken": self.steps_taken,
"tools_used_count": self.tools_used_count,
"tools_used_names": self.tools_used_names,
"response": self.response,
"response_length": len(self.response) if self.response else None,
"execution_time_ms": self.execution_time_ms,
"error_type": self.error_type,
"conversation_history_length": self.conversation_history_length,
}
================================================
FILE: anytool/utils/telemetry/telemetry.py
================================================
import logging
import os
import platform
import uuid
from collections.abc import Callable
from functools import wraps
from pathlib import Path
from typing import Any
from posthog import Posthog
from scarf import ScarfEventLogger
from mcp_use.logging import MCP_USE_DEBUG
from mcp_use.telemetry.events import (
BaseTelemetryEvent,
MCPAgentExecutionEvent,
)
from mcp_use.telemetry.utils import get_package_version
logger = logging.getLogger(__name__)
def singleton(cls):
"""A decorator that implements the singleton pattern for a class."""
instance = [None]
def wrapper(*args, **kwargs):
if instance[0] is None:
instance[0] = cls(*args, **kwargs)
return instance[0]
return wrapper
def requires_telemetry(func: Callable) -> Callable:
"""Decorator that skips function execution if telemetry is disabled"""
@wraps(func)
def wrapper(self, *args, **kwargs):
if not self._posthog_client and not self._scarf_client:
return None
return func(self, *args, **kwargs)
return wrapper
def get_cache_home() -> Path:
"""Get platform-appropriate cache directory."""
# XDG_CACHE_HOME for Linux and manually set envs
env_var: str | None = os.getenv("XDG_CACHE_HOME")
if env_var and (path := Path(env_var)).is_absolute():
return path
system = platform.system()
if system == "Windows":
appdata = os.getenv("LOCALAPPDATA") or os.getenv("APPDATA")
if appdata:
return Path(appdata)
return Path.home() / "AppData" / "Local"
elif system == "Darwin": # macOS
return Path.home() / "Library" / "Caches"
else: # Linux or other Unix
return Path.home() / ".cache"
@singleton
class Telemetry:
"""
Service for capturing anonymized telemetry data via PostHog and Scarf.
If the environment variable `MCP_USE_ANONYMIZED_TELEMETRY=false`, telemetry will be disabled.
"""
USER_ID_PATH = str(get_cache_home() / "mcp_use_3" / "telemetry_user_id")
VERSION_DOWNLOAD_PATH = str(get_cache_home() / "mcp_use" / "download_version")
PROJECT_API_KEY = "phc_lyTtbYwvkdSbrcMQNPiKiiRWrrM1seyKIMjycSvItEI"
HOST = "https://eu.i.posthog.com"
SCARF_GATEWAY_URL = "https://mcpuse.gateway.scarf.sh/events"
UNKNOWN_USER_ID = "UNKNOWN_USER_ID"
_curr_user_id = None
def __init__(self):
telemetry_disabled = os.getenv("MCP_USE_ANONYMIZED_TELEMETRY", "true").lower() == "false"
if telemetry_disabled:
self._posthog_client = None
self._scarf_client = None
logger.debug("Telemetry disabled")
else:
logger.info("Anonymized telemetry enabled. Set MCP_USE_ANONYMIZED_TELEMETRY=false to disable.")
# Initialize PostHog
try:
self._posthog_client = Posthog(
project_api_key=self.PROJECT_API_KEY,
host=self.HOST,
disable_geoip=False,
enable_exception_autocapture=True,
)
# Silence posthog's logging unless debug mode (level 2)
if MCP_USE_DEBUG < 2:
posthog_logger = logging.getLogger("posthog")
posthog_logger.disabled = True
except Exception as e:
logger.warning(f"Failed to initialize PostHog telemetry: {e}")
self._posthog_client = None
# Initialize Scarf
try:
self._scarf_client = ScarfEventLogger(
endpoint_url=self.SCARF_GATEWAY_URL,
timeout=3.0,
verbose=MCP_USE_DEBUG >= 2,
)
# Silence scarf's logging unless debug mode (level 2)
if MCP_USE_DEBUG < 2:
scarf_logger = logging.getLogger("scarf")
scarf_logger.disabled = True
except Exception as e:
logger.warning(f"Failed to initialize Scarf telemetry: {e}")
self._scarf_client = None
@property
def user_id(self) -> str:
"""Get or create a persistent anonymous user ID"""
if self._curr_user_id:
return self._curr_user_id
try:
is_first_time = not os.path.exists(self.USER_ID_PATH)
if is_first_time:
logger.debug(f"Creating user ID path: {self.USER_ID_PATH}")
os.makedirs(os.path.dirname(self.USER_ID_PATH), exist_ok=True)
with open(self.USER_ID_PATH, "w") as f:
new_user_id = str(uuid.uuid4())
f.write(new_user_id)
self._curr_user_id = new_user_id
logger.debug(f"User ID path created: {self.USER_ID_PATH}")
else:
with open(self.USER_ID_PATH) as f:
self._curr_user_id = f.read().strip()
# Always check for version-based download tracking
self.track_package_download(
{
"triggered_by": "user_id_property",
}
)
except Exception as e:
logger.debug(f"Failed to get/create user ID: {e}")
self._curr_user_id = self.UNKNOWN_USER_ID
return self._curr_user_id
@requires_telemetry
def capture(self, event: BaseTelemetryEvent) -> None:
"""Capture a telemetry event"""
# Send to PostHog
if self._posthog_client:
try:
# Add package version to all events
properties = event.properties.copy()
properties["mcp_use_version"] = get_package_version()
self._posthog_client.capture(distinct_id=self.user_id, event=event.name, properties=properties)
except Exception as e:
logger.debug(f"Failed to track PostHog event {event.name}: {e}")
# Send to Scarf
if self._scarf_client:
try:
# Add package version and user_id to all events
properties = {}
properties["mcp_use_version"] = get_package_version()
properties["user_id"] = self.user_id
properties["event"] = event.name
# Convert complex types to simple types for Scarf compatibility
self._scarf_client.log_event(properties=properties)
except Exception as e:
logger.debug(f"Failed to track Scarf event {event.name}: {e}")
@requires_telemetry
def track_package_download(self, properties: dict[str, Any] | None = None) -> None:
"""Track package download event specifically for Scarf analytics"""
if self._scarf_client:
try:
current_version = get_package_version()
should_track = False
first_download = False
# Check if version file exists
if not os.path.exists(self.VERSION_DOWNLOAD_PATH):
# First download
should_track = True
first_download = True
# Create directory and save version
os.makedirs(os.path.dirname(self.VERSION_DOWNLOAD_PATH), exist_ok=True)
with open(self.VERSION_DOWNLOAD_PATH, "w") as f:
f.write(current_version)
else:
# Read saved version
with open(self.VERSION_DOWNLOAD_PATH) as f:
saved_version = f.read().strip()
# Compare versions (simple string comparison for now)
if current_version > saved_version:
should_track = True
first_download = False
# Update saved version
with open(self.VERSION_DOWNLOAD_PATH, "w") as f:
f.write(current_version)
if should_track:
logger.debug(f"Tracking package download event with properties: {properties}")
# Add package version and user_id to event
event_properties = (properties or {}).copy()
event_properties["mcp_use_version"] = current_version
event_properties["user_id"] = self.user_id
event_properties["event"] = "package_download"
event_properties["first_download"] = first_download
# Convert complex types to simple types for Scarf compatibility
self._scarf_client.log_event(properties=event_properties)
except Exception as e:
logger.debug(f"Failed to track Scarf package_download event: {e}")
@requires_telemetry
def track_agent_execution(
self,
execution_method: str,
query: str,
success: bool,
model_provider: str,
model_name: str,
server_count: int,
server_identifiers: list[dict[str, str]],
total_tools_available: int,
tools_available_names: list[str],
max_steps_configured: int,
memory_enabled: bool,
use_server_manager: bool,
max_steps_used: int | None,
manage_connector: bool,
external_history_used: bool,
steps_taken: int | None = None,
tools_used_count: int | None = None,
tools_used_names: list[str] | None = None,
response: str | None = None,
execution_time_ms: int | None = None,
error_type: str | None = None,
conversation_history_length: int | None = None,
) -> None:
"""Track comprehensive agent execution"""
event = MCPAgentExecutionEvent(
execution_method=execution_method,
query=query,
success=success,
model_provider=model_provider,
model_name=model_name,
server_count=server_count,
server_identifiers=server_identifiers,
total_tools_available=total_tools_available,
tools_available_names=tools_available_names,
max_steps_configured=max_steps_configured,
memory_enabled=memory_enabled,
use_server_manager=use_server_manager,
max_steps_used=max_steps_used,
manage_connector=manage_connector,
external_history_used=external_history_used,
steps_taken=steps_taken,
tools_used_count=tools_used_count,
tools_used_names=tools_used_names,
response=response,
execution_time_ms=execution_time_ms,
error_type=error_type,
conversation_history_length=conversation_history_length,
)
self.capture(event)
@requires_telemetry
def flush(self) -> None:
"""Flush any queued telemetry events"""
# Flush PostHog
if self._posthog_client:
try:
self._posthog_client.flush()
logger.debug("PostHog client telemetry queue flushed")
except Exception as e:
logger.debug(f"Failed to flush PostHog client: {e}")
# Scarf events are sent immediately, no flush needed
if self._scarf_client:
logger.debug("Scarf telemetry events sent immediately (no flush needed)")
@requires_telemetry
def shutdown(self) -> None:
"""Shutdown telemetry clients and flush remaining events"""
# Shutdown PostHog
if self._posthog_client:
try:
self._posthog_client.shutdown()
logger.debug("PostHog client shutdown successfully")
except Exception as e:
logger.debug(f"Error shutting down PostHog client: {e}")
# Scarf doesn't require explicit shutdown
if self._scarf_client:
logger.debug("Scarf telemetry client shutdown (no action needed)")
================================================
FILE: anytool/utils/telemetry/utils.py
================================================
"""
Utility functions for extracting model information from LangChain LLMs.
This module provides utilities to extract provider and model information
from LangChain language models for telemetry purposes.
"""
import importlib.metadata
from langchain_core.language_models.base import BaseLanguageModel
def get_package_version() -> str:
"""Get the current mcp-use package version."""
try:
return importlib.metadata.version("mcp-use")
except importlib.metadata.PackageNotFoundError:
return "unknown"
def get_model_provider(llm: BaseLanguageModel) -> str:
"""Extract the model provider from LangChain LLM using BaseChatModel standards."""
# Use LangChain's standard _llm_type property for identification
return getattr(llm, "_llm_type", llm.__class__.__name__.lower())
def get_model_name(llm: BaseLanguageModel) -> str:
"""Extract the model name from LangChain LLM using BaseChatModel standards."""
# First try _identifying_params which may contain model info
if hasattr(llm, "_identifying_params"):
identifying_params = llm._identifying_params
if isinstance(identifying_params, dict):
# Common keys that contain model names
for key in ["model", "model_name", "model_id", "deployment_name"]:
if key in identifying_params:
return str(identifying_params[key])
# Fallback to direct model attributes
return getattr(llm, "model", getattr(llm, "model_name", llm.__class__.__name__))
def extract_model_info(llm: BaseLanguageModel) -> tuple[str, str]:
"""Extract both provider and model name from LangChain LLM.
Returns:
Tuple of (provider, model_name)
"""
return get_model_provider(llm), get_model_name(llm)
================================================
FILE: anytool/utils/ui.py
================================================
"""
AnyTool Terminal UI System
Provides real-time CLI visualization for AnyTool execution flow.
Displays agent activities, grounding backends, and detailed logs.
Uses native ANSI colors and custom box drawing for a clean, lightweight interface.
"""
from typing import Optional, Dict, Any, List, Tuple
from datetime import datetime
from enum import Enum
import asyncio
import sys
import shutil
from anytool.utils.display import Box, BoxStyle, colorize
class AgentStatus(Enum):
"""Agent execution status"""
IDLE = "idle"
THINKING = "thinking"
EXECUTING = "executing"
WAITING = "waiting"
class AnyToolUI:
"""
AnyTool Terminal UI
Provides real-time visualization of:
- Agent activities and status
- Grounding backend operations
- Execution logs
- System metrics
Design Philosophy:
- Lightweight and fast (no heavy dependencies)
- Clean ANSI-based rendering
- Minimal CPU overhead
- Easy to customize
"""
def __init__(self, enable_live: bool = True, compact: bool = False):
"""
Initialize UI
Args:
enable_live: Whether to enable live display updates
compact: Use compact layout (for smaller terminals)
"""
self.enable_live = enable_live
self.compact = compact
# Terminal dimensions
self.term_width, self.term_height = self._get_terminal_size()
# State tracking
self.agent_status: Dict[str, AgentStatus] = {}
self.agent_activities: Dict[str, List[str]] = {}
self.grounding_operations: List[Dict[str, Any]] = []
self.grounding_backends: List[Dict[str, Any]] = [] # Backend info (type, servers, etc.)
self.log_buffer: List[Tuple[str, str, datetime]] = [] # (message, level, timestamp)
# Metrics
self.metrics: Dict[str, Any] = {
"start_time": None,
"iterations": 0,
"completed_tasks": 0,
"llm_calls": 0,
"grounding_calls": 0,
}
# Live display state
self._live_running = False
self._live_task: Optional[asyncio.Task] = None
self._last_render: List[str] = []
def _get_terminal_size(self) -> Tuple[int, int]:
"""Get terminal size"""
try:
size = shutil.get_terminal_size((80, 24))
return size.columns, size.lines
except:
return 80, 24
def _clear_screen(self):
"""Clear screen"""
if self.enable_live:
# Clear entire screen and move cursor to top-left
sys.stdout.write('\033[2J\033[H')
sys.stdout.flush()
def _move_cursor_home(self):
"""Move cursor to home position"""
sys.stdout.write('\033[H')
sys.stdout.flush()
def _hide_cursor(self):
"""Hide cursor"""
sys.stdout.write('\033[?25l')
sys.stdout.flush()
def _show_cursor(self):
"""Show cursor"""
sys.stdout.write('\033[?25h')
sys.stdout.flush()
# Banner and Startup
def print_banner(self):
"""Print startup banner"""
box = Box(width=70, style=BoxStyle.ROUNDED, color='c')
print()
print(box.top_line(indent=4))
print(box.empty_line(indent=4))
# Title
title = colorize("AnyTool", 'c', bold=True)
print(box.text_line(title, align='center', indent=4, text_color=''))
# Subtitle
subtitle = "Universal Tool-Use Layer for AI Agents"
print(box.text_line(subtitle, align='center', indent=4, text_color='gr'))
print(box.empty_line(indent=4))
print(box.bottom_line(indent=4))
print()
def print_initialization(self, steps: List[Tuple[str, str]]):
"""
Print initialization steps
Args:
steps: List of (component_name, status) tuples
"""
box = Box(width=70, style=BoxStyle.ROUNDED, color='bl')
print(box.text_line("Initializing Components", align='center', indent=4, text_color='c'))
print(box.separator_line(indent=4))
for component, status in steps:
icon = colorize("✓", 'g') if status == "ok" else colorize("✗", 'rd')
line = f"{icon} {component}"
print(box.text_line(line, indent=4))
print(box.bottom_line(indent=4))
print()
async def start_live_display(self):
"""Start live display"""
if not self.enable_live or self._live_running:
return
self._live_running = True
self.metrics["start_time"] = datetime.now()
self._clear_screen()
self._hide_cursor()
# Start update loop
self._live_task = asyncio.create_task(self._live_update_loop())
async def stop_live_display(self):
"""Stop live display"""
if not self._live_running:
return
self._live_running = False
if self._live_task:
self._live_task.cancel()
try:
await self._live_task
except asyncio.CancelledError:
pass
self._show_cursor()
print() # Add newline after live display
async def _live_update_loop(self):
"""Live update loop"""
while self._live_running:
try:
self.render()
await asyncio.sleep(2.0)
except asyncio.CancelledError:
break
except Exception as e:
print(f"UI render error: {e}")
def render(self):
"""Render entire UI"""
if not self.enable_live or not self._live_running:
return
# Clear and redraw
self._clear_screen()
lines = []
# Header
lines.extend(self._render_header())
lines.append("")
# Stack all panels vertically
lines.extend(self._render_agents())
lines.append("")
lines.extend(self._render_grounding())
lines.append("")
lines.extend(self._render_logs())
output = "\n".join(lines)
sys.stdout.write(output)
sys.stdout.flush()
def update_display(self):
"""Update display (alias for render())"""
self.render()
def _render_header(self) -> List[str]:
"""Render header section"""
lines = []
# Calculate elapsed time
elapsed = "0s"
if self.metrics["start_time"]:
delta = datetime.now() - self.metrics["start_time"]
minutes = delta.seconds // 60
seconds = delta.seconds % 60
if minutes > 0:
elapsed = f"{minutes}m{seconds}s"
else:
elapsed = f"{seconds}s"
status_text = (
f"▶ {colorize('RUNNING', 'g')} | "
f"Time: {colorize(elapsed, 'c')} | "
f"Iter: {colorize(str(self.metrics['iterations']), 'y')} | "
f"Tasks: {colorize(str(self.metrics['completed_tasks']), 'g')} | "
f"LLM: {colorize(str(self.metrics['llm_calls']), 'bl')} | "
f"Grounding: {colorize(str(self.metrics['grounding_calls']), 'm')}"
)
lines.append(" " + status_text)
lines.append(" " + "─" * 60)
return lines
def _render_agents(self) -> List[str]:
"""Render agents section"""
lines = []
lines.append(" " + colorize("§ Agents", 'c', bold=True))
# Agent info
agents = [
("GroundingAgent", 'c', self.agent_status.get("GroundingAgent", AgentStatus.IDLE)),
]
for agent_name, color, status in agents:
# Status icon
status_icons = {
AgentStatus.IDLE: "○",
AgentStatus.THINKING: "◐",
AgentStatus.EXECUTING: "◉",
AgentStatus.WAITING: "◷",
}
icon = status_icons.get(status, "○")
# Recent activity
activities = self.agent_activities.get(agent_name, [])
activity = activities[-1][:40] if activities else "idle"
# Format line
line = f" {colorize(icon, 'y')} {colorize(agent_name, color):<20s} {activity}"
lines.append(line)
return lines
def _render_grounding(self) -> List[str]:
"""Render grounding operations section"""
lines = []
lines.append(" " + colorize("⊕ Grounding Backends", 'c', bold=True))
# Show backend types and servers
if self.grounding_backends:
for backend_info in self.grounding_backends:
backend_name = backend_info.get("name", "unknown")
backend_type = backend_info.get("type", "unknown")
servers = backend_info.get("servers", [])
# Backend type icon
type_icons = {
"gui": "■",
"shell": "$",
"mcp": "◆",
"system": "●",
"web": "◉",
}
icon = type_icons.get(backend_type, "○")
# Format backend line
if backend_type == "mcp" and servers:
servers_str = ", ".join([s[:15] for s in servers])
line = f" {icon} {colorize(backend_name, 'y')} ({backend_type}): {colorize(servers_str, 'gr')}"
else:
line = f" {icon} {colorize(backend_name, 'y')} ({backend_type})"
lines.append(line)
# Show last 3 operations
recent_ops = self.grounding_operations[-3:] if self.grounding_operations else []
if recent_ops:
lines.append(" " + colorize("Recent Operations:", 'gr'))
for op in recent_ops:
backend = op.get("backend", "unknown")
action = op.get("action", "unknown")[:40]
status = op.get("status", "pending")
# Status icon
if status == "success":
icon = colorize("✓", 'g')
elif status == "pending":
icon = colorize("⏳", 'y')
else:
icon = colorize("✗", 'rd')
line = f" {icon} {colorize(backend, 'bl')}: {action}"
lines.append(line)
return lines
def _render_logs(self) -> List[str]:
"""Render logs section"""
lines = []
lines.append(" " + colorize("⊞ Recent Events", 'c', bold=True))
# Show last 5 logs
recent_logs = self.log_buffer[-5:] if self.log_buffer else []
if recent_logs:
for message, level, timestamp in recent_logs:
time_str = timestamp.strftime("%H:%M:%S")
# Truncate long messages
msg_display = message[:55]
log_line = f" {colorize(time_str, 'gr')} | {msg_display}"
lines.append(log_line)
return lines
def update_agent_status(self, agent_name: str, status: AgentStatus):
"""Update agent status"""
self.agent_status[agent_name] = status
def add_agent_activity(self, agent_name: str, activity: str):
"""Add agent activity"""
if agent_name not in self.agent_activities:
self.agent_activities[agent_name] = []
self.agent_activities[agent_name].append(activity)
# Keep only last 10 activities
if len(self.agent_activities[agent_name]) > 10:
self.agent_activities[agent_name] = self.agent_activities[agent_name][-10:]
def update_grounding_backends(self, backends: List[Dict[str, Any]]):
"""
Update grounding backends information
Args:
backends: List of backend info dicts with keys:
- name: backend name
- type: backend type (gui, shell, mcp, system, web)
- servers: list of server names (for mcp)
"""
self.grounding_backends = backends
def add_grounding_operation(self, backend: str, action: str, status: str = "pending"):
"""Add grounding operation"""
self.grounding_operations.append({
"backend": backend,
"action": action,
"status": status,
"timestamp": datetime.now(),
})
self.metrics["grounding_calls"] += 1
def add_log(self, message: str, level: str = "info"):
"""Add log message"""
self.log_buffer.append((message, level, datetime.now()))
# Keep only last 100 logs
if len(self.log_buffer) > 100:
self.log_buffer = self.log_buffer[-100:]
def update_metrics(self, **kwargs):
"""Update metrics"""
self.metrics.update(kwargs)
def print_summary(self, result: Dict[str, Any]):
"""Print execution summary"""
box = Box(width=70, style=BoxStyle.ROUNDED, color='c')
print()
print(box.text_line(colorize("◈ Execution Summary", 'c', bold=True), align='center', indent=4, text_color=''))
print(box.separator_line(indent=4))
# Status
status = result.get("status", "unknown")
status_display = {
"completed": colorize("COMPLETED", 'g', bold=True),
"timeout": colorize("TIMEOUT", 'y', bold=True),
"error": colorize("ERROR", 'rd', bold=True),
}
status_text = status_display.get(status, status)
print(box.text_line(f" Status: {status_text}", indent=4, text_color=''))
print(box.text_line(f" Execution Time: {colorize(f'{result.get('execution_time', 0):.2f}s', 'c')}", indent=4, text_color=''))
print(box.text_line(f" Iterations: {colorize(str(result.get('iterations', 0)), 'y')}", indent=4, text_color=''))
print(box.text_line(f" Completed Tasks: {colorize(str(result.get('completed_tasks', 0)), 'g')}", indent=4, text_color=''))
if result.get('evaluation_results'):
print(box.text_line(f" Evaluations: {colorize(str(len(result['evaluation_results'])), 'bl')}", indent=4, text_color=''))
print(box.bottom_line(indent=4))
print()
def create_ui(enable_live: bool = True, compact: bool = False) -> AnyToolUI:
"""
Create AnyTool UI instance
Args:
enable_live: Whether to enable live display updates
compact: Use compact layout for smaller terminals
"""
return AnyToolUI(enable_live=enable_live, compact=compact)
================================================
FILE: anytool/utils/ui_integration.py
================================================
"""
AnyTool UI Integration
Integrates the UI system with AnyTool core components.
Provides hooks and callbacks to update UI in real-time.
"""
import asyncio
from typing import Optional
from anytool.utils.ui import AnyToolUI, AgentStatus
from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)
class UIIntegration:
"""
UI Integration for AnyTool
Connects AnyTool components with the UI system to provide real-time
visualization of agent activities and execution flow.
"""
def __init__(self, ui: AnyToolUI):
"""
Initialize UI integration
Args:
ui: AnyToolUI instance
"""
self.ui = ui
self._update_task: Optional[asyncio.Task] = None
self._running = False
# Tracked components
self._llm_client = None
self._grounding_client = None
def attach_llm_client(self, llm_client):
"""
Attach LLM client
Args:
llm_client: LLMClient instance
"""
self._llm_client = llm_client
logger.debug("UI attached to LLMClient")
def attach_grounding_client(self, grounding_client):
"""
Attach grounding client
Args:
grounding_client: GroundingClient instance
"""
self._grounding_client = grounding_client
logger.debug("UI attached to GroundingClient")
async def start_monitoring(self, poll_interval: float = 0.5):
"""
Start monitoring and updating UI
Args:
poll_interval: Update interval in seconds
"""
if self._running:
logger.warning("UI monitoring already running")
return
self._running = True
# Immediately update UI once before starting the loop
await self._update_ui()
self._update_task = asyncio.create_task(
self._monitor_loop(poll_interval)
)
logger.info("UI monitoring started")
async def stop_monitoring(self):
"""Stop monitoring"""
if not self._running:
return
self._running = False
if self._update_task:
self._update_task.cancel()
try:
await self._update_task
except asyncio.CancelledError:
pass
logger.info("UI monitoring stopped")
async def _monitor_loop(self, poll_interval: float):
"""
Main monitoring loop
Args:
poll_interval: Update interval in seconds
"""
while self._running:
try:
await self._update_ui()
await asyncio.sleep(poll_interval)
except asyncio.CancelledError:
break
except Exception as e:
logger.error(f"UI update error: {e}", exc_info=True)
async def _update_ui(self):
"""Update UI with current state"""
# Update grounding backends info
if self._grounding_client:
backends = []
try:
# Get list of providers
providers = self._grounding_client.list_providers()
for backend_type, provider in providers.items():
backend_name = backend_type.value if hasattr(backend_type, 'value') else str(backend_type)
backend_info = {
"name": backend_name,
"type": backend_name, # gui, shell, mcp, system, web
"servers": []
}
# For MCP provider, get server names
if backend_name == "mcp":
try:
# Try to get MCP sessions from provider
if hasattr(provider, '_sessions'):
backend_info["servers"] = list(provider._sessions.keys())
except Exception:
pass
backends.append(backend_info)
self.ui.update_grounding_backends(backends)
except Exception as e:
logger.debug(f"Failed to update grounding backends: {e}")
# Refresh display
self.ui.update_display()
# Event handlers - to be called by agents
def on_agent_start(self, agent_name: str, activity: str):
"""
Called when agent starts an activity
Args:
agent_name: Agent name
activity: Activity description
"""
self.ui.update_agent_status(agent_name, AgentStatus.EXECUTING)
self.ui.add_agent_activity(agent_name, activity)
self.ui.add_log(f"{agent_name}: {activity}", level="info")
def on_agent_thinking(self, agent_name: str):
"""
Called when agent is thinking
Args:
agent_name: Agent name
"""
self.ui.update_agent_status(agent_name, AgentStatus.THINKING)
def on_agent_complete(self, agent_name: str, result: str = ""):
"""
Called when agent completes an activity
Args:
agent_name: Agent name
result: Result description
"""
self.ui.update_agent_status(agent_name, AgentStatus.IDLE)
if result:
self.ui.add_log(f"{agent_name}: {result}", level="success")
def on_llm_call(self, model: str, prompt_length: int):
"""
Called when LLM is called
Args:
model: Model name
prompt_length: Prompt length
"""
self.ui.update_metrics(
llm_calls=self.ui.metrics.get("llm_calls", 0) + 1
)
self.ui.add_log(f"LLM call: {model} (prompt: {prompt_length} chars)", level="debug")
def on_grounding_call(self, backend: str, action: str):
"""
Called when grounding backend is called
Args:
backend: Backend name
action: Action description
"""
self.ui.add_grounding_operation(backend, action, status="pending")
self.ui.add_log(f"Grounding [{backend}]: {action}", level="info")
def on_grounding_complete(self, backend: str, action: str, success: bool):
"""
Called when grounding operation completes
Args:
backend: Backend name
action: Action description
success: Whether operation succeeded
"""
status = "success" if success else "error"
# Update last operation status
for op in reversed(self.ui.grounding_operations):
if op["backend"] == backend and op["action"] == action and op["status"] == "pending":
op["status"] = status
break
level = "success" if success else "error"
result = "✓" if success else "✗"
self.ui.add_log(f"Grounding [{backend}]: {action} {result}", level=level)
def on_iteration(self, iteration: int):
"""
Called on each iteration
Args:
iteration: Iteration number
"""
self.ui.update_metrics(iterations=iteration)
def on_error(self, message: str):
"""
Called when an error occurs
Args:
message: Error message
"""
self.ui.add_log(f"ERROR: {message}", level="error")
class UILoggingHandler:
"""
Logging handler that forwards logs to UI
"""
def __init__(self, ui: AnyToolUI):
"""
Initialize logging handler
Args:
ui: AnyToolUI instance
"""
self.ui = ui
def emit(self, record):
"""
Emit a log record to UI
Args:
record: Log record
"""
level_map = {
"DEBUG": "debug",
"INFO": "info",
"WARNING": "warning",
"ERROR": "error",
"CRITICAL": "error",
}
level = level_map.get(record.levelname, "info")
message = record.getMessage()
# Filter out noisy logs
if any(skip in message.lower() for skip in ["processing card", "workflow poll"]):
return
self.ui.add_log(message, level=level)
def create_integration(ui: AnyToolUI) -> UIIntegration:
"""
Create UI integration instance
Args:
ui: AnyToolUI instance
Returns:
UIIntegration instance
"""
return UIIntegration(ui)
================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools>=68.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "anytool"
version = "0.1.0"
description = "AnyTool: Universal Tool-Use Layer for AI Agents"
readme = "README.md"
requires-python = ">=3.10"
license = {text = "MIT"}
authors = [
{name = "lingruixu@HKUDS", email = "lingruixu.db@gmail.com"}
]
dependencies = [
"litellm>=1.70.0",
"python-dotenv>=1.0.0",
"openai>=1.0.0",
"jsonschema>=4.25.0",
"mcp>=1.0.0",
"anthropic>=0.71.0",
"pillow>=12.0.0",
"flask>=3.1.0",
"pyautogui>=0.9.54",
"pydantic>=2.12.0",
"requests>=2.32.0",
]
[project.optional-dependencies]
macos = [
"pyobjc-core>=12.0",
"pyobjc-framework-cocoa>=12.0",
"pyobjc-framework-quartz>=12.0",
"atomacos>=3.2.0",
]
linux = [
"python-xlib>=0.33",
"pyatspi>=2.38.0",
"numpy>=1.24.0",
]
windows = [
"pywinauto>=0.6.8",
"pywin32>=306",
"PyGetWindow>=0.0.9",
]
dev = [
"pytest>=7.0.0",
"pytest-asyncio>=0.21.0",
"black>=23.0.0",
"flake8>=6.0.0",
"mypy>=1.0.0",
]
all = [
"anytool[macos,linux,windows,dev]",
]
[project.urls]
Repository = "https://github.com/HKUDS/AnyTool"
"Bug Tracker" = "https://github.com/HKUDS/AnyTool/issues"
[project.scripts]
anytool = "anytool.__main__:run_main"
anytool-server = "anytool.local_server.main:main"
[tool.setuptools]
packages = {find = {where = ["."], include = ["anytool*"]}}
[tool.setuptools.package-data]
anytool = [
"config/*.json",
"config/*.json.example",
"local_server/config.json",
"local_server/README.md",
]
================================================
FILE: requirements.txt
================================================
# AnyTool core dependencies
litellm>=1.70.0
python-dotenv>=1.0.0
openai>=1.0.0
jsonschema>=4.25.0
mcp>=1.0.0
anthropic>=0.71.0
pillow>=12.0.0
colorama
# Local server dependencies (cross-platform)
flask>=3.1.0
pyautogui>=0.9.54
pydantic>=2.12.0
requests>=2.32.0
# # macOS-specific dependencies (local server)
# pyobjc-core>=12.0; sys_platform == 'darwin'
# pyobjc-framework-cocoa>=12.0; sys_platform == 'darwin'
# pyobjc-framework-quartz>=12.0; sys_platform == 'darwin'
# atomacos>=3.2.0; sys_platform == 'darwin'
# # Linux-specific dependencies (local server)
# python-xlib>=0.33; sys_platform == 'linux'
# pyatspi>=2.38.0; sys_platform == 'linux'
# numpy>=1.24.0; sys_platform == 'linux'
# # Windows-specific dependencies (local server)
# pywinauto>=0.6.8; sys_platform == 'win32'
# pywin32>=306; sys_platform == 'win32'
# PyGetWindow>=0.0.9; sys_platform == 'win32'