Repository: HKUDS/AnyTool
Branch: main
Commit: 506430fec133
Files: 132
Total size: 1.1 MB

Directory structure:
gitextract_sqfmt1l8/

├── .gitignore
├── COMMUNICATION.md
├── LICENSE
├── README.md
├── anytool/
│   ├── __init__.py
│   ├── __main__.py
│   ├── agents/
│   │   ├── __init__.py
│   │   ├── base.py
│   │   └── grounding_agent.py
│   ├── config/
│   │   ├── __init__.py
│   │   ├── config_agents.json
│   │   ├── config_dev.json.example
│   │   ├── config_grounding.json
│   │   ├── config_mcp.json.example
│   │   ├── config_security.json
│   │   ├── constants.py
│   │   ├── grounding.py
│   │   ├── loader.py
│   │   └── utils.py
│   ├── grounding/
│   │   ├── backends/
│   │   │   ├── __init__.py
│   │   │   ├── gui/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── anthropic_client.py
│   │   │   │   ├── anthropic_utils.py
│   │   │   │   ├── config.py
│   │   │   │   ├── provider.py
│   │   │   │   ├── session.py
│   │   │   │   ├── tool.py
│   │   │   │   └── transport/
│   │   │   │       ├── actions.py
│   │   │   │       ├── connector.py
│   │   │   │       └── local_connector.py
│   │   │   ├── mcp/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── client.py
│   │   │   │   ├── config.py
│   │   │   │   ├── installer.py
│   │   │   │   ├── provider.py
│   │   │   │   ├── session.py
│   │   │   │   ├── tool_cache.py
│   │   │   │   ├── tool_converter.py
│   │   │   │   └── transport/
│   │   │   │       ├── connectors/
│   │   │   │       │   ├── __init__.py
│   │   │   │       │   ├── base.py
│   │   │   │       │   ├── http.py
│   │   │   │       │   ├── sandbox.py
│   │   │   │       │   ├── stdio.py
│   │   │   │       │   ├── utils.py
│   │   │   │       │   └── websocket.py
│   │   │   │       └── task_managers/
│   │   │   │           ├── __init__.py
│   │   │   │           ├── sse.py
│   │   │   │           ├── stdio.py
│   │   │   │           ├── streamable_http.py
│   │   │   │           └── websocket.py
│   │   │   ├── shell/
│   │   │   │   ├── __init__.py
│   │   │   │   ├── provider.py
│   │   │   │   ├── session.py
│   │   │   │   └── transport/
│   │   │   │       ├── connector.py
│   │   │   │       └── local_connector.py
│   │   │   └── web/
│   │   │       ├── __init__.py
│   │   │       ├── provider.py
│   │   │       └── session.py
│   │   └── core/
│   │       ├── exceptions.py
│   │       ├── grounding_client.py
│   │       ├── provider.py
│   │       ├── quality/
│   │       │   ├── __init__.py
│   │       │   ├── manager.py
│   │       │   ├── store.py
│   │       │   └── types.py
│   │       ├── search_tools.py
│   │       ├── security/
│   │       │   ├── __init__.py
│   │       │   ├── e2b_sandbox.py
│   │       │   ├── policies.py
│   │       │   └── sandbox.py
│   │       ├── session.py
│   │       ├── system/
│   │       │   ├── __init__.py
│   │       │   ├── provider.py
│   │       │   └── tool.py
│   │       ├── tool/
│   │       │   ├── __init__.py
│   │       │   ├── base.py
│   │       │   ├── local_tool.py
│   │       │   └── remote_tool.py
│   │       ├── transport/
│   │       │   ├── connectors/
│   │       │   │   ├── __init__.py
│   │       │   │   ├── aiohttp_connector.py
│   │       │   │   └── base.py
│   │       │   └── task_managers/
│   │       │       ├── __init__.py
│   │       │       ├── aiohttp_connection_manager.py
│   │       │       ├── async_ctx.py
│   │       │       ├── base.py
│   │       │       ├── noop.py
│   │       │       └── placeholder.py
│   │       └── types.py
│   ├── llm/
│   │   ├── __init__.py
│   │   └── client.py
│   ├── local_server/
│   │   ├── README.md
│   │   ├── __init__.py
│   │   ├── config.json
│   │   ├── feature_checker.py
│   │   ├── health_checker.py
│   │   ├── main.py
│   │   ├── platform_adapters/
│   │   │   ├── __init__.py
│   │   │   ├── linux_adapter.py
│   │   │   ├── macos_adapter.py
│   │   │   ├── pyxcursor.py
│   │   │   └── windows_adapter.py
│   │   ├── requirements.txt
│   │   ├── run.sh
│   │   └── utils/
│   │       ├── __init__.py
│   │       ├── accessibility.py
│   │       └── screenshot.py
│   ├── platform/
│   │   ├── __init__.py
│   │   ├── config.py
│   │   ├── recording.py
│   │   ├── screenshot.py
│   │   └── system_info.py
│   ├── prompts/
│   │   ├── __init__.py
│   │   └── grounding_agent_prompts.py
│   ├── recording/
│   │   ├── __init__.py
│   │   ├── action_recorder.py
│   │   ├── manager.py
│   │   ├── recorder.py
│   │   ├── utils.py
│   │   ├── video.py
│   │   └── viewer.py
│   ├── tool_layer.py
│   └── utils/
│       ├── cli_display.py
│       ├── display.py
│       ├── logging.py
│       ├── telemetry/
│       │   ├── __init__.py
│       │   ├── events.py
│       │   ├── telemetry.py
│       │   └── utils.py
│       ├── ui.py
│       └── ui_integration.py
├── pyproject.toml
└── requirements.txt

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# OS files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
Desktop.ini

# IDE files
.vscode/
.idea/
.pytest_cache/

# Distribution / packaging
dist/
build/
*.egg-info/
*.egg

# Environment files
.env

# MCP files
anytool/config/config_mcp.json

# Logs
logs/

# Embedding cache
.anytool/
embedding_cache/
tool_quality/

# MCP tool cache
mcp_tool_cache.json
mcp_tool_cache_sanitized.json

# Config files
anytool/config/config_dev.json

# LLM keys
anytool/llm/remote_client/

# Local server temp files
anytool/local_server/temp/

examples/

================================================
FILE: COMMUNICATION.md
================================================
We provide QR codes for joining the HKUDS discussion groups on **WeChat** and **Feishu**.

You can join by scanning the QR codes below:

<img src="https://github.com/HKUDS/.github/blob/main/profile/QR.png" alt="WeChat QR Code" width="400"/>

================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2025 HKUDS

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
<div align="center">

<picture>
    <img src="assets/AnyTool_logo.png" width="800px" style="border: none; box-shadow: none;" alt="AnyTool Logo">
</picture>

## AnyTool: Universal Tool-Use Layer for AI Agents

### ✨ **One Line of Code to Supercharge any Agent with <br>Fast, Scalable and Powerful Tool Use** ✨

[![Platform](https://img.shields.io/badge/Platform-macOS%20%7C%20Linux%20%7C%20Windows-99C9BF.svg)](https://github.com/HKUDS/AnyTool/)
[![Python](https://img.shields.io/badge/Python-3.12+-FCE7D6.svg)](https://www.python.org/)
[![License](https://img.shields.io/badge/License-MIT-C1E5F5.svg)](https://opensource.org/licenses/MIT/)
[![Feishu](https://img.shields.io/badge/Feishu-Group-E9DBFC?style=flat&logo=wechat&logoColor=white)](./COMMUNICATION.md) 
[![WeChat](https://img.shields.io/badge/WeChat-Group-C5EAB4?style=flat&logo=wechat&logoColor=white)](./COMMUNICATION.md)

| ⚡ **Fast - Lightning Tool Retrieval** &nbsp;|&nbsp; 📈 **Self-Evolving Tool Orchestration** &nbsp;|&nbsp; ⚡ **Universal Tool Automation** |

</div>

## 🎯 What is AnyTool?

AnyTool is a **Universal Tool-Use Layer** that transforms how AI agents interact with tools. It solves three fundamental challenges that prevent reliable agent automation: **overwhelming tool contexts**, **unreliable community tools**, and **limited capability coverage** -- delivering the first truly intelligent tool orchestration system for production AI agents.

## 💡 Research Highlights

⚡ **Fast - Lightning Tool Retrieval**
- **Smart Context Management**: Progressive tool filtering delivers exact tools in milliseconds through multi-stage pipeline, eliminating context pollution while maintaining speed.

- **Zero-Waste Processing**: Pre-computed embeddings and lazy initialization eliminate redundant processing - tools are instantly ready across all executions.

📈 **Scalable - Self-Evolving Tool Orchestration**
- **Adaptive MCP Tool Selection**: Smart caching and selective re-indexing maintain constant performance from 10 to 10,000 tools with optimal resource usage.
  
- **Self-Evolving Tool Optimization**: System continuously improves through persistent memory, becoming more efficient as your tool ecosystem expands.

🌍 **Powerful - Universal Tool Automation**
- **Quality-Aware Selection**: Built-in reliability tracking and safety controls deliver production-ready automation through persistent learning and execution safeguards.

- **Universal Tool-Use Capability**: Multi-backend architecture seamlessly extends beyond web APIs to system operations, GUI automation, and deep research through unified interface.

## ⚡ Easy-to-Use and Effortless Integration

One line to get intelligent tool orchestration. Zero-config setup transforms complex multi-tool workflows into a single API call.

```python
from anytool import AnyTool

# One line to get intelligent tool orchestration
async with AnyTool() as tool_layer:
    result = await tool_layer.execute(
        "Research trending AI coding tools from GitHub and tech news, "
        "collect their features and user feedback, analyze adoption patterns, "
        "then create a comparison report with insights"
    )
```

---

## 📋 Table of Contents

- [🎯 Quick Start](#-quick-start)
- [🚀 Technical Innovation & Implementation](#-technical-implementation)
- [🔧 Configuration Guide](#-configuration-guide)
- [📖 Code Structure](#-code-structure)
- [🔗 Related Projects](#-related-projects)

---

## 🎯 Quick Start

### 1. Environment Setup

```bash
# Clone repository
git clone https://github.com/HKUDS/AnyTool.git
cd AnyTool

# Create and activate conda environment (includes ffmpeg for video recording)
conda create -n anytool python=3.12 ffmpeg -c conda-forge -y
conda activate anytool

# Install dependencies
pip install -r requirements.txt
```

> [!NOTE]
> Create a `.env` file and add your API keys (refer to `anytool/.env.example`).

### 2. Execution Mode: Local vs Server

AnyTool's Shell and GUI backends support two execution modes. You can configure the mode in `anytool/config/config_grounding.json`:

```jsonc
{
  "shell": { "mode": "local", ... },  // or "server"
  "gui":   { "mode": "local", ... }   // or "server"
}
```

#### Local Mode (Default — no server needed)

In **local mode**, Shell and GUI operations are executed directly in-process via `subprocess` / `asyncio`. This is the simplest setup — **no local server required**. Just use AnyTool as normal, see [Quick Integration](#3-quick-integration) for usage examples.

> [!TIP]
> **Use local mode when** you are running AnyTool on the same machine you want to control (your own laptop / desktop). This is the recommended mode for most users.

#### Server Mode (for remote VMs / isolation)

In **server mode**, Shell and GUI operations are sent over HTTP to a running `local_server` Flask service. This is required when:

- **Controlling a remote VM** — the agent runs on your host, while the server runs inside the VM.
- **Process isolation / sandboxing** — you want script execution in a separate process for security or stability.
- **Multi-machine deployments** — the agent and the execution environment are on different machines.

To use server mode, set `"mode": "server"` in `config_grounding.json`, then install platform-specific dependencies and start the server:

> [!IMPORTANT]
> **Platform-specific setup required**: Different operating systems need different dependencies for desktop control. Please install the required dependencies for your OS before starting the local server:

<details>
<summary><b>macOS Setup</b></summary>

```bash
# Install macOS-specific dependencies
pip install pyobjc-core pyobjc-framework-cocoa pyobjc-framework-quartz atomacos
```

**Permissions Required**: macOS will automatically prompt for permissions when you first run the local server. Grant the following:
- **Accessibility** (for GUI control)
- **Screen Recording** (for screenshots and video capture)

> If prompts don't appear, manually grant permissions in System Settings → Privacy & Security.
</details>

<details>
<summary><b>Linux Setup</b></summary>

```bash
# Install Linux-specific dependencies
pip install python-xlib pyatspi numpy

# Install system packages
sudo apt install at-spi2-core python3-tk scrot
```

> [!NOTE]
> **Optional dependencies:**
> - Accessibility: `pyatspi` + `at-spi2-core`
> - Window management: `wmctrl`
> - Cursor in screenshots: `libx11-dev` + `libxfixes-dev`

</details>

<details>
<summary><b>Windows Setup</b></summary>

```bash
# Install Windows-specific dependencies
pip install pywinauto pywin32 PyGetWindow
```
</details>

After installing the platform-specific dependencies, start the local server:

```bash
python -m anytool.local_server.main
```

> [!NOTE]
> See [`anytool/local_server/README.md`](anytool/local_server/README.md) for complete API documentation and advanced configuration.

#### Mode Comparison

| | Local Mode (`"local"`) | Server Mode (`"server"`) |
|---|---|---|
| **Setup** | Zero — just run your agent | Start `local_server` first |
| **Use case** | Same-machine development | Remote VMs, sandboxing, multi-machine |
| **Shell execution** | `asyncio.subprocess` in-process | HTTP → Flask → `subprocess` |
| **GUI execution** | Direct pyautogui / ScreenshotHelper | HTTP → Flask → pyautogui |
| **Dependencies** | Only core AnyTool | Core + Flask + platform deps |
| **Network** | None required | HTTP between agent ↔ server |

### 3. Quick Integration

AnyTool is a **plug-and-play Universal Tool-Use Layer** for any AI agent. The task passed to `execute()` can come from your agent's planning module, user input, or any workflow system.

```python
import asyncio
from anytool import AnyTool
from anytool.tool_layer import AnyToolConfig

async def main():
    config = AnyToolConfig(
        enable_recording=True,
        recording_backends=["gui", "shell", "mcp", "web"],
        enable_screenshot=True,
        enable_video=True,
    )
    
    async with AnyTool(config=config) as tool_layer:
        result = await tool_layer.execute(
            "Research trending AI coding tools from GitHub and tech news, "
            "collect their features and user feedback, analyze adoption patterns, "
            "then create a comparison report with insights"
        )
        print(result["response"])

asyncio.run(main())
```

> [!TIP]
> **MCP Server Configuration**: For tasks requiring specific tools, add relevant MCP servers to `anytool/config/config_mcp.json`. Unsure which servers to add? Simply add all potentially useful ones, AnyTool's Smart Tool RAG will automatically select the appropriate tools for your task. See [MCP Configuration](#mcp-configuration) for details.

---

## Technical Innovation & Implementation

### 🧩 Challenge 1: MCP Tool Context Overload

**The Problem**. Current MCP agents suffer from a fundamental design flaw: they load ALL configured servers and tools at every execution step, creating an overwhelming action space, creates three critical issues:
- ⚡ **Slow Performance with Massive Context Loading**<br>
  Complete tool set from all pre-configured servers loaded simultaneously at every step, degrading execution speed
  
- 🎯 **Poor Accuracy from Blind Tool Setup**<br>
  Users cannot preview tools before connecting, leading to over-setup "just in case" and confusing tool selection
  
- 💸 **Resource Waste with No Memory**<br>
  Same tools reloaded at every execution step with no caching, causing redundant loading

### ✅ AnyTool's Solution: Tool Context Management Framework

**Motivation**: "Load Everything" → "Retrieve What's Needed"<br>
**Improvement**: Faster tool selection, cleaner context, and efficient resource usage through smart retrieval and memory.

#### **Technical Innovation**:<br>
**🎯 Multi-Stage Tool Retrieval Pipeline**
- **Progressive MCP Tool Filtering**: server selection → tool name matching → tool semantic search → LLM ranking
- **Reduces MCP Tool Search Space**: Each stage narrows down candidate tools for optimizing precision and speed

**💾 Long-Term Tool Memory**
- **Save Once, Use Forever**: Pre-compute tool embeddings once and save them to disk for instant reuse
- **Zero Waste Processing**: No more redundant processing - tools are ready to use immediately across all execution steps

**🧠 Adaptive Tool Selection**
- **Adaptive MCP Tool Ranking**: LLM-based tool selection refinement triggered only when MCP tool results are large or ambiguous
- **Tool Selection Efficiency**: Balances MCP tool accuracy with computational efficiency

**🚀 On-Demand Resource Management**
- **Lazy MCP Server Startup**: MCP server initialization triggered only when specific tools are needed
- **Selective Tool Updates**: Incremental re-indexing of only changed MCP tools, not the entire tool set

---

### 🚨 Challenge 2: MCP Tool Quality Issues

**The Problem**. Current MCP servers suffer from community contribution challenges that create three critical issues:
- 🔍 **Poor Tool Descriptions**<br>
  Misleading claims, non-existent advertised tools, and vague capability specifications lead to wrong tool selection.
  
- 📊 **No Reliability Signals**<br>
  Cannot assess MCP tool quality before use, causing blind selection decisions.
  
- ⚠️ **Security and Safety Gaps**<br>
  Unvetted community tools may execute dangerous operations without proper safeguards.

### ✅ **AnyTool Solution: Self-Contained Quality Management**

**Motivation**: "Blind Tool Trust" → "Smart Quality Assessment"<br>
**Improvement**: Reliable tool selection, safe execution, and autonomous recovery through quality tracking and safety controls.

#### **Technical Innovation:**<br>
**🎯 Quality-Aware Tool Selection**
- **Description Quality Check**: LLM-based evaluation of MCP tool description clarity and completeness.
- **Performance-Based Ranking**: Track call/success rates for each MCP tool in persistent memory to prioritize reliable options.

**💾 Learning-Based Tool Memory**
- **Track Tool Performance**: Remember which MCP tools work well and which fail over time.
- **Smart Tool Prioritization**: Automatically rank tools based on past success rates and description quality.

**🛡️ Safety-First Execution**
- **Block Dangerous Operations**: Prevent arbitrary code execution and require user approval for sensitive MCP tool operations.
- **Execution Safeguards**: Built-in safety controls for all MCP tool executions.

**🚀 Self-Healing Tool Management**
- **Autonomous Tool Switching**: Switch failed MCP tools locally without restarting expensive planning loops.
- **Local Failure Recovery**: Automatically switch to alternative MCP tools on failure without escalating to upper-level agents.
  
---

### 🔄 Challenge 3: Limited MCP Capability Scope

**The Problem**. Current MCP ecosystem focuses primarily on Web APIs and online services, creating significant automation gaps that prevent comprehensive task completion:

- **🖥️ Missing System Operations**<br>
  No native support for file manipulation, process management, or command execution on local systems.

- **🖱️ No Desktop Automation**<br>
  Cannot control GUI applications that lack APIs, limiting automation to web-only scenarios.

- **📊 Incomplete Tool Coverage**<br>
  Limited server categories in community and incomplete tool sets within existing servers create workflow bottlenecks.

### ✅ AnyTool Solution: Universal Capability Extension<br>(MCP + System Commands + GUI Control ≈ Universal Task Completion)

**Motivation**: "Web-Only MCP" → "Universal Task Completion"<br>
**Improvement**: Complete automation coverage through multi-backend architecture that seamlessly extends MCP capabilities beyond web APIs.

**🏗️ Multi-Backend Architecture**
- **MCP Backend**: Community servers for Web APIs and online services
- **Shell Backend**: Bash/Python execution for system-level operations and file management
- **GUI Backend**: Pixel-level automation for any visual application without API requirements
- **Web Backend**: Deep web research and data extraction capabilities

**💡 Self-Evolving Capability Discovery**
- **Intelligent Gap Detection**: Planning agent identifies when MCP tools are insufficient for task requirements
- **Automatic Backend Selection**: Shell/GUI backends automatically fill capability gaps without manual intervention
- **Dynamic Capability Expansion**: Previously impossible tasks become achievable through backend combination

**🎭 Unified Tool Orchestration**
- **Uniform Tool Schema**: All backends expose identical interface for seamless agent tool selection
- **Transparent Backend Switching**: Agents select optimal tools across backend types without knowing implementation details
- **Intelligent Tool Routing**: Automatic routing to the most appropriate backend based on task requirements

**🚀 Seamless Integration Layer**
- **Single Tool Interface**: Unified API that abstracts away backend complexity from AI agents.
- **Cross-Backend Coordination**: Enable complex workflows that span multiple backend capabilities.
- **Consistent Safety Controls**: Apply security and safety measures uniformly across all backend types.

---

## 🔧 Configuration Guide

### Configuration Overview

AnyTool uses a layered configuration system:

- **`config_dev.json`** (highest priority): Local development overrides. Overrides all other configurations.
- **`config_agents.json`**: Agent definitions and backend access control
- **`config_mcp.json`**: MCP server registry
- **`config_grounding.json`**: Backend-specific settings and Smart Tool RAG configuration
- **`config_security.json`**: Security policies with runtime user confirmation for sensitive operations

---

### Agent Configuration

**Path**: `anytool/config/config_agents.json`

**Purpose**: Define agent roles, control backend access scope, and set execution limits to prevent infinite loops.

**Example configuration**:

```json
{
  "agents": [
    {
      "name": "GroundingAgent",
      "class_name": "GroundingAgent",
      "backend_scope": ["gui", "shell", "mcp", "system", "web"],
      "max_iterations": 20
    }
  ]
}
```

**Key Fields**:

| Field | Description | Options/Example |
|-------|-------------|-----------------|
| `backend_scope` | Accessible backends | `[]` or any combination of `["gui", "shell", "mcp", "system", "web"]` |
| `max_iterations` | Maximum execution cycles | Any integer (e.g., `15`, `20`, `50`) or `null` (unlimited) |

---

### MCP Configuration

**Path**: `anytool/config/config_mcp.json` (copy from `config_mcp.json.example`)

**Purpose**: Register MCP servers with connection details. AnyTool automatically discovers tools from all registered servers and makes them available through Smart Tool RAG.

**Example configuration**:

```json
{
  "mcpServers": {
    "github": {
      "command": "npx",
      "args": ["-y", "@modelcontextprotocol/server-github"],
      "env": {
        "GITHUB_PERSONAL_ACCESS_TOKEN": "${GITHUB_TOKEN}"
      }
    }
  }
}
```

---

<details>
<summary><b>Runtime Configuration (AnyToolConfig)</b></summary>

### Runtime Configuration (AnyToolConfig)

**Complete example**:

```python
from anytool import AnyTool
from anytool.tool_layer import AnyToolConfig

config = AnyToolConfig(
    # LLM Configuration
    llm_model="anthropic/claude-sonnet-4-5",
    llm_enable_thinking=False,
    llm_timeout=120.0,
    llm_max_retries=3,
    llm_rate_limit_delay=0.0,
    llm_kwargs={},  # Additional LiteLLM parameters
    
    # Separate models for specific tasks (None = use llm_model)
    tool_retrieval_model=None,   # Model for tool retrieval LLM filter
    visual_analysis_model=None,  # Model for visual analysis
    
    # Grounding Configuration
    grounding_config_path=None,  # Path to custom config file
    grounding_max_iterations=20,
    grounding_system_prompt=None,  # Custom system prompt
    
    # Backend Configuration
    backend_scope=["gui", "shell", "mcp", "web", "system"],
    
    # Workspace Configuration
    workspace_dir=None,  # Auto-create temp dir if None
    
    # Recording Configuration
    enable_recording=True,
    recording_backends=["gui", "shell", "mcp"],
    recording_log_dir="./logs/recordings",
    enable_screenshot=True,
    enable_video=True,
    enable_conversation_log=True,  # Save LLM conversations to conversations.jsonl
    
    # Logging Configuration
    log_level="INFO",
    log_to_file=False,
    log_file_path=None,
)

async with AnyTool(config=config) as tool_layer:
    result = await tool_layer.execute("Your task here")
    # Or with external task_id for benchmark integration:
    # result = await tool_layer.execute("Your task", task_id="my-task-001")
```

</details>

---

<details>
<summary><b>Other Configuration Files</b></summary>

### Backend Configuration

**Path**: `anytool/config/config_grounding.json`

**Purpose**: Configure backend-specific behaviors, timeouts, Smart Tool RAG system for efficient tool selection, and Tool Quality Tracking for self-evolving tool intelligence.

**Key Fields**:

| Backend | Field | Description | Options/Default |
|---------|-------|-------------|-----------------|
| **shell** | `timeout` | Command timeout (seconds) | Any integer (default: `60`) |
| | `conda_env` | Auto-activate conda environment | Environment name or `null` (default: `"anytool"`) |
| | `working_dir` | Working directory for command execution | Any valid path (default: current directory) |
| | `default_shell` | Shell to use | `"/bin/bash"`, `"/bin/zsh"`, etc. |
| **gui** | `timeout` | Operation timeout (seconds) | Any integer (default: `90`) |
| | `screenshot_on_error` | Capture screenshot on failure | `true` or `false` (default: `true`) |
| | `driver_type` | GUI automation driver | `"pyautogui"` or other supported drivers |
| **mcp** | `timeout` | Request timeout (seconds) | Any integer (default: `30`) |
| | `sandbox` | Run in E2B sandbox | `true` or `false` (default: `false`) |
| | `eager_sessions` | Pre-connect all servers at startup | `true` or `false` (default: `false`, lazy connection) |
| **tool_search** | `search_mode` | Tool retrieval strategy | `"semantic"`, `"hybrid"` (semantic + LLM filter), or `"llm"` (default: `"hybrid"`) |
| | `max_tools` | Maximum tools to return from search | Any integer (default: `40`) |
| | `enable_llm_filter` | Enable LLM-based tool pre-filtering | `true` or `false` (default: `true`) |
| | `llm_filter_threshold` | Enable LLM filter when tools exceed this count | Any integer (default: `50`) |
| | `enable_cache_persistence` | Persist embedding cache to disk | `true` or `false` (default: `true`) |
| **tool_quality** | `enabled` | Enable tool quality tracking | `true` or `false` (default: `true`) |
| | `enable_persistence` | Persist quality data to disk | `true` or `false` (default: `true`) |
| | `cache_dir` | Directory for quality cache | Path string (default: `.anytool/tool_quality` in project directory) |
| | `auto_evaluate_descriptions` | Automatically evaluate tool descriptions using LLM | `true` or `false` (default: `true`) |
| | `enable_quality_ranking` | Incorporate quality scores in tool ranking | `true` or `false` (default: `true`) |
| | `evolve_interval` | Trigger self-evolution every N tool executions | Any integer 1-100 (default: `5`) |

---

### Security Configuration

**Path**: `anytool/config/config_security.json`

**Purpose**: Define security policies with command filtering and access control.

**Key Fields**:

| Section | Field | Description | Options |
|---------|-------|-------------|---------|
| **global** | `allow_shell_commands` | Enable shell command execution | `true` or `false` (default: `true`) |
| | `allow_network_access` | Enable network operations | `true` or `false` (default: `true`) |
| | `allow_file_access` | Enable file system operations | `true` or `false` (default: `true`) |
| | `blocked_commands` | Platform-specific command blacklist | Object with `common`, `linux`, `darwin`, `windows` arrays |
| | `sandbox_enabled` | Enable sandboxing for all operations | `true` or `false` (default: `false`) |
| **backend** | `shell`, `mcp`, `gui`, `web` | Per-backend security overrides | Same fields as global, backend-specific |

**Example blocked commands**: `rm -rf`, `shutdown`, `reboot`, `mkfs`, `dd`, `format`, `iptables`

**Behavior**: 
- Blocked commands are **rejected automatically**
- Sandbox mode isolates operations in secure environments (E2B sandbox for MCP)

---

### Developer Configuration

**Path**: `anytool/config/config_dev.json` (copy from `config_dev.json.example`)

**Loading Priority**: `config_grounding.json` → `config_security.json` → `config_dev.json` (dev.json overrides the former ones)

</details>

---

## 📖 Code Structure

### 📖 Quick Overview

> **Legend**: ⚡ Core modules | 🔧 Supporting modules

```
AnyTool/
├── anytool/
│   ├── __init__.py                       # Package exports
│   ├── __main__.py                       # CLI entry point (python -m anytool)
│   ├── tool_layer.py                     # AnyTool main class
│   │
│   ├── ⚡ agents/                         # Agent System
│   ├── ⚡ grounding/                      # Unified Backend System
│   │   ├── core/                         # Core abstractions
│   │   └── backends/                     # Backend implementations
│   │       ├── shell/                    # Shell command execution
│   │       ├── gui/                      # Anthropic Computer Use
│   │       ├── mcp/                      # Model Context Protocol
│   │       └── web/                      # Web search & browsing
│   │
│   ├── 🔧 prompts/                       # Prompt Templates
│   ├── 🔧 llm/                           # LLM Integration
│   ├── 🔧 config/                        # Configuration System
│   ├── 🔧 local_server/                  # GUI Backend Server
│   ├── 🔧 recording/                     # Execution Recording
│   ├── 🔧 platform/                      # Platform Integration
│   └── 🔧 utils/                         # Utilities
│
├── .anytool/                             # Runtime cache
│   ├── embedding_cache/                  # Tool embeddings for Smart Tool RAG
│   └── tool_quality/                     # Persistent tool quality tracking data
│
├── logs/                                 # Execution logs
│
├── requirements.txt                      # Python dependencies
├── pyproject.toml                        # Package configuration
└── README.md
```

---

### 📂 Detailed Module Structure

<details open>
<summary><b>⚡ agents/</b> - Agent System</summary>

```
agents/
├── __init__.py
├── base.py                         # Base agent class with common functionality
└── grounding_agent.py              # Execution Agent (tool calling & iteration control)
```

**Key Responsibilities**: Task execution with intelligent tool selection and iteration control.

</details>

<details open>
<summary><b>⚡ grounding/</b> - Unified Backend System (Core Integration Layer)</summary>

**Key Responsibilities**: Unified tool abstraction, backend routing, session pooling, Smart Tool RAG, and Self-Evolving Quality Tracking*.

#### Core Abstractions

```
grounding/core/
├── grounding_client.py             # Unified interface across all backends
├── provider.py                     # Abstract provider base class
├── session.py                      # Session lifecycle management
├── search_tools.py                 # Smart Tool RAG for semantic search
├── exceptions.py                   # Custom exception definitions
├── types.py                        # Shared type definitions
│
├── tool/                           # Tool abstraction layer
│   ├── base.py                     # Tool base class
│   ├── local_tool.py               # Local tool implementation
│   └── remote_tool.py              # Remote tool implementation
│
├── quality/                        # Self-evolving tool quality tracking
│   ├── manager.py                  # Quality manager with adaptive ranking
│   ├── store.py                    # Persistent quality data storage
│   └── types.py                    # Quality record data types
│
├── security/                       # Security & sandboxing 🔧
│   ├── policies.py                 # Security policy enforcement
│   ├── sandbox.py                  # Sandbox abstraction
│   └── e2b_sandbox.py              # E2B sandbox integration
│
├── system/                         # System-level provider
│   ├── provider.py
│   └── tool.py
│
└── transport/                      # Transport layer abstractions 🔧
    ├── connectors/
    │   ├── base.py
    │   └── aiohttp_connector.py
    └── task_managers/
        ├── base.py
        ├── async_ctx.py
        ├── aiohttp_connection_manager.py
        └── placeholder.py
```

#### Backend Implementations

<details>
<summary><b>Shell Backend</b> - Command execution via local server</summary>

```
backends/shell/
├── provider.py                     # Shell provider implementation
├── session.py                      # Shell session management
└── transport/
    └── connector.py                # HTTP connector to local server
```

</details>

<details>
<summary><b>GUI Backend</b> - Anthropic Computer Use integration</summary>

```
backends/gui/
├── provider.py                     # GUI provider implementation
├── session.py                      # GUI session management
├── tool.py                         # GUI-specific tools
├── anthropic_client.py             # Anthropic API client wrapper
├── anthropic_utils.py              # Utility functions
├── config.py                       # GUI configuration
└── transport/
    ├── connector.py                # Computer Use API connector
    └── actions.py                  # Action execution logic
```

</details>

<details>
<summary><b>MCP Backend</b> - Model Context Protocol servers</summary>

```
backends/mcp/
├── provider.py                     # MCP provider implementation
├── session.py                      # MCP session management
├── client.py                       # MCP client
├── config.py                       # MCP configuration loader
├── installer.py                    # MCP server installer
├── tool_converter.py               # Convert MCP tools to unified format
├── tool_cache.py                   # MCP tool cache for offline tool discovery
└── transport/
    ├── connectors/                 # Multiple transport types
    │   ├── base.py
    │   ├── stdio.py                # Standard I/O connector
    │   ├── http.py                 # HTTP connector
    │   ├── websocket.py            # WebSocket connector
    │   ├── sandbox.py              # Sandboxed connector
    │   └── utils.py
    └── task_managers/              # Protocol-specific managers
        ├── stdio.py
        ├── sse.py
        ├── streamable_http.py
        └── websocket.py
```

</details>

<details>
<summary><b>Web Backend</b> - Search and browsing</summary>

```
backends/web/
├── provider.py                     # Web provider implementation
└── session.py                      # Web session management
```

</details>

</details>

<details>
<summary><b>🔧 prompts/</b> - Prompt Templates</summary>

```
prompts/
├── __init__.py
└── grounding_agent_prompts.py     # Grounding agent system & tool selection prompts
```

</details>

<details>
<summary><b>🔧 llm/</b> - LLM Integration</summary>

```
llm/
├── __init__.py
└── client.py                       # LiteLLM wrapper with retry logic
```

</details>

<details>
<summary><b>🔧 config/</b> - Configuration System</summary>

```
config/
├── __init__.py
├── loader.py                       # Configuration file loader
├── constants.py                    # System constants
├── grounding.py                    # Grounding configuration dataclasses
├── utils.py                        # Configuration utilities
│
├── config_grounding.json           # Backend-specific settings
├── config_agents.json              # Agent configurations
├── config_mcp.json.example         # MCP server definitions (copy to config_mcp.json)
├── config_security.json            # Security policies
└── config_dev.json.example         # Development config template
```

</details>

<details>
<summary><b>🔧 local_server/</b> - GUI Backend Server</summary>

```
local_server/
├── __init__.py
├── main.py                         # Flask application entry point
├── config.json                     # Server configuration
├── feature_checker.py              # Platform feature detection
├── health_checker.py               # Server health monitoring
├── platform_adapters/              # OS-specific implementations
│   ├── macos_adapter.py            # macOS automation (atomacos, pyobjc)
│   ├── linux_adapter.py            # Linux automation (pyatspi, xlib)
│   ├── windows_adapter.py          # Windows automation (pywinauto)
│   └── pyxcursor.py                # Custom cursor handling
├── utils/
│   ├── accessibility.py            # Accessibility tree utilities
│   └── screenshot.py               # Screenshot capture
└── README.md
```

**Purpose**: Lightweight Flask service enabling computer control (GUI, Shell, Files, Screen capture).

</details>

<details>
<summary><b>🔧 recording/</b> - Execution Recording</summary>

```
recording/
├── __init__.py
├── recorder.py                     # Main recording manager
├── manager.py                      # Recording lifecycle management
├── action_recorder.py              # Action-level logging
├── video.py                        # Video capture integration
├── viewer.py                       # Trajectory viewer and analyzer
└── utils.py                        # Recording utilities
```

**Purpose**: Execution audit with trajectory recording and video capture.

</details>

<details>
<summary><b>🔧 platform/</b> - Platform Integration</summary>

```
platform/
├── __init__.py
├── config.py                       # Platform-specific configuration
├── recording.py                    # Recording integration
├── screenshot.py                   # Screenshot utilities
└── system_info.py                  # System information gathering
```

</details>

<details>
<summary><b>🔧 utils/</b> - Shared Utilities</summary>

```
utils/
├── logging.py                      # Structured logging system
├── ui.py                           # Terminal UI components
├── display.py                      # Display formatting utilities
├── cli_display.py                  # CLI-specific display
├── ui_integration.py               # UI integration helpers
└── telemetry/                      # Usage analytics (opt-in)
    ├── __init__.py
    ├── events.py
    ├── telemetry.py
    └── utils.py
```

</details>

<details>
<summary><b>📊 logs/</b> - Execution Logs & Recordings</summary>

```
logs/
├── <script_name>/                        # Main application logs
│   └── anytool_YYYY-MM-DD_HH-MM-SS.log   # Timestamped log files
│
└── recordings/                           # Execution recordings
    └── task_<id>/                        # Individual recording session
        ├── trajectory.json               # Complete execution trajectory
        ├── screenshots/                  # Visual execution record (GUI backend)
        │   ├── tool_<name>_<timestamp>.png
        │   ├── tool_<name>_<timestamp>.png
        │   └── ...                       # Sequential screenshots
        ├── workspace/                    # Task workspace
        │   └── [generated files]         # Files created during execution
        └── screen_recording.mp4          # Video recording (if enabled)
```

**Recording Control**: Enable via `AnyToolConfig(enable_recording=True)`, filter backends with `recording_backends=["gui", "shell", ...]`

</details>

---

## 🔗 Related Projects

AnyTool builds upon excellent open-source projects, we sincerely thank their authors and contributors:

- **[OSWorld](https://github.com/xlang-ai/OSWorld)**: Comprehensive benchmark for evaluating computer-use agents across diverse operating system tasks.
- **[mcp-use](https://github.com/mcp-use/mcp-use)**: Platform that simplifies MCP agent development with client SDKs.

---

<div align="center">

**🌟 If this project helps you, please give us a Star!**

**🤖 Empower AI Agent with intelligent tool orchestration!**  

</div>

---

<p align="center">
  <em> ❤️ Thanks for visiting ✨ AnyTool!</em><br><br>
  <img src="https://visitor-badge.laobi.icu/badge?page_id=HKUDS.AnyTool&style=for-the-badge&color=00d4ff" alt="Views">
</p>


================================================
FILE: anytool/__init__.py
================================================
from importlib import import_module as _imp
from typing import Dict as _Dict, Any as _Any, TYPE_CHECKING as _TYPE_CHECKING

if _TYPE_CHECKING:
    from anytool.tool_layer import AnyTool as AnyTool, AnyToolConfig as AnyToolConfig
    from anytool.agents import GroundingAgent as GroundingAgent
    from anytool.llm import LLMClient as LLMClient
    from anytool.recording import RecordingManager as RecordingManager

__version__ = "0.1.0"

__all__ = [
    # Version
    "__version__",
    
    # Main API
    "AnyTool",
    "AnyToolConfig",

    # Core Components
    "GroundingAgent",
    "GroundingClient",
    "LLMClient",
    "BaseTool",
    "ToolResult",
    "BackendType",

    # Recording System
    "RecordingManager",
    "RecordingViewer",
]

# Map attribute → sub-module that provides it
_attr_to_module: _Dict[str, str] = {
    # Main API
    "AnyTool": "anytool.tool_layer",
    "AnyToolConfig": "anytool.tool_layer",

    # Core Components
    "GroundingAgent": "anytool.agents",
    "GroundingClient": "anytool.grounding.core.grounding_client",
    "LLMClient": "anytool.llm",
    "BaseTool": "anytool.grounding.core.tool.base",
    "ToolResult": "anytool.grounding.core.types",
    "BackendType": "anytool.grounding.core.types",

    # Recording System
    "RecordingManager": "anytool.recording",
    "RecordingViewer": "anytool.recording.viewer",
}


def __getattr__(name: str) -> _Any:
    """Dynamically import sub-modules on first attribute access.

    This keeps the *initial* package import lightweight and avoids raising
    `ModuleNotFoundError` for optional / heavy dependencies until the
    corresponding functionality is explicitly used.
    """
    if name not in _attr_to_module:
        raise AttributeError(f"module 'anytool' has no attribute '{name}'")

    module_name = _attr_to_module[name]
    module = _imp(module_name)
    value = getattr(module, name)
    globals()[name] = value 
    return value


def __dir__():
    return sorted(list(globals().keys()) + list(_attr_to_module.keys()))

================================================
FILE: anytool/__main__.py
================================================
import asyncio
import argparse
import sys
import logging
from typing import Optional

from anytool.tool_layer import AnyTool, AnyToolConfig
from anytool.utils.logging import Logger
from anytool.utils.ui import create_ui, AnyToolUI
from anytool.utils.ui_integration import UIIntegration
from anytool.utils.cli_display import CLIDisplay
from anytool.utils.display import colorize

logger = Logger.get_logger(__name__)


class UIManager:
    def __init__(self, ui: Optional[AnyToolUI], ui_integration: Optional[UIIntegration]):
        self.ui = ui
        self.ui_integration = ui_integration
        self._original_log_levels = {}
    
    async def start_live_display(self):
        if not self.ui or not self.ui_integration:
            return
        
        print()
        print(colorize("  ▣ Starting real-time visualization...", 'c'))
        print()
        await asyncio.sleep(1)
        
        self._suppress_logs()
        
        await self.ui.start_live_display()
        await self.ui_integration.start_monitoring(poll_interval=2.0)
    
    async def stop_live_display(self):
        if not self.ui or not self.ui_integration:
            return
        
        await self.ui_integration.stop_monitoring()
        await self.ui.stop_live_display()
        
        self._restore_logs()
    
    def print_summary(self, result: dict):
        if self.ui:
            self.ui.print_summary(result)
        else:
            CLIDisplay.print_result_summary(result)
    
    def _suppress_logs(self):
        log_names = ["anytool", "anytool.grounding", "anytool.agents"]
        for name in log_names:
            log = logging.getLogger(name)
            self._original_log_levels[name] = log.level
            log.setLevel(logging.CRITICAL)
    
    def _restore_logs(self):
        for name, level in self._original_log_levels.items():
            logging.getLogger(name).setLevel(level)
        self._original_log_levels.clear()


async def _execute_task(anytool: AnyTool, query: str, ui_manager: UIManager):
    await ui_manager.start_live_display()
    result = await anytool.execute(query)
    await ui_manager.stop_live_display()
    ui_manager.print_summary(result)
    return result


async def interactive_mode(anytool: AnyTool, ui_manager: UIManager):
    CLIDisplay.print_interactive_header()
    
    while True:
        try:
            prompt = colorize(">>> ", 'c', bold=True)
            query = input(f"\n{prompt}").strip()
            
            if not query:
                continue
            
            if query.lower() in ['exit', 'quit', 'q']:
                print("\nExiting...")
                break

            if query.lower() == 'status':
                _print_status(anytool)
                continue
            
            if query.lower() == 'help':
                CLIDisplay.print_help()
                continue

            CLIDisplay.print_task_header(query)
            await _execute_task(anytool, query, ui_manager)
            
        except KeyboardInterrupt:
            print("\n\nInterrupt signal detected, exiting...")
            break
        except Exception as e:
            logger.error(f"Error: {e}", exc_info=True)
            print(f"\nError: {e}")


async def single_query_mode(anytool: AnyTool, query: str, ui_manager: UIManager):
    CLIDisplay.print_task_header(query, title="▶ Single Query Execution")
    await _execute_task(anytool, query, ui_manager)


def _print_status(anytool: AnyTool):
    """Print system status"""
    from anytool.utils.display import Box, BoxStyle
    
    box = Box(width=70, style=BoxStyle.ROUNDED, color='bl')
    print()
    print(box.text_line(colorize("System Status", 'bl', bold=True), 
                      align='center', indent=4, text_color=''))
    print(box.separator_line(indent=4))
    
    status_lines = [
        f"Initialized: {colorize('Yes' if anytool.is_initialized() else 'No', 'g' if anytool.is_initialized() else 'rd')}",
        f"Running: {colorize('Yes' if anytool.is_running() else 'No', 'y' if anytool.is_running() else 'g')}",
        f"Model: {colorize(anytool.config.llm_model, 'c')}",
    ]
    
    if anytool.is_initialized():
        backends = anytool.list_backends()
        status_lines.append(f"Backends: {colorize(', '.join(backends), 'c')}")
        
        sessions = anytool.list_sessions()
        status_lines.append(f"Active Sessions: {colorize(str(len(sessions)), 'y')}")
    
    for line in status_lines:
        print(box.text_line(f"  {line}", indent=4, text_color=''))
    
    print(box.bottom_line(indent=4))
    print()


def _create_argument_parser() -> argparse.ArgumentParser:
    """Create command-line argument parser"""
    parser = argparse.ArgumentParser(
        description='AnyTool - Universal Tool-Use Layer for AI Agents',
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    w
    # Subcommands
    subparsers = parser.add_subparsers(dest='command', help='Available commands')
    
    # refresh-cache subcommand
    cache_parser = subparsers.add_parser(
        'refresh-cache',
        help='Refresh MCP tool cache (starts all servers once)'
    )
    cache_parser.add_argument(
        '--config', '-c', type=str,
        help='MCP configuration file path'
    )
    
    # Basic arguments (for run mode)
    parser.add_argument('--config', '-c', type=str, help='Configuration file path (JSON format)')
    parser.add_argument('--query', '-q', type=str, help='Single query mode: execute query directly')
    
    # LLM arguments
    parser.add_argument('--model', '-m', type=str, help='LLM model name')
    
    # Logging arguments
    parser.add_argument('--log-level', type=str, choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], help='Log level')
    
    # Execution arguments
    parser.add_argument('--max-iterations', type=int, help='Maximum iteration count')
    parser.add_argument('--timeout', type=float, help='LLM API call timeout (seconds)')
    
    # UI arguments
    parser.add_argument('--interactive', '-i', action='store_true', help='Force interactive mode')
    parser.add_argument('--no-ui', action='store_true', help='Disable visualization UI')
    parser.add_argument('--ui-compact', action='store_true', help='Use compact UI layout')
    
    return parser


async def refresh_mcp_cache(config_path: Optional[str] = None):
    """Refresh MCP tool cache by starting servers one by one and saving tool metadata."""
    from anytool.grounding.backends.mcp import MCPProvider, get_tool_cache
    from anytool.grounding.core.types import SessionConfig, BackendType
    from anytool.config import load_config, get_config
    
    print("Refreshing MCP tool cache...")
    print("Servers will be started one by one (start -> get tools -> close).")
    print()
    
    # Load config
    if config_path:
        config = load_config(config_path)
    else:
        config = get_config()
    
    # Get MCP config
    mcp_config = getattr(config, 'mcp', None) or {}
    if hasattr(mcp_config, 'model_dump'):
        mcp_config = mcp_config.model_dump()
    
    # Skip dependency checks for refresh-cache (servers are pre-validated)
    mcp_config["check_dependencies"] = False
    
    # Create provider
    provider = MCPProvider(config=mcp_config)
    await provider.initialize()
    
    servers = provider.list_servers()
    total = len(servers)
    print(f"Found {total} MCP servers configured")
    print()
    
    cache = get_tool_cache()
    cache.set_server_order(servers)  # Preserve config order when saving
    total_tools = 0
    success_count = 0
    skipped_count = 0
    failed_servers = []
    
    # Load existing cache to skip already processed servers
    existing_cache = cache.get_all_tools()
    
    # Timeout for each server (in seconds)
    SERVER_TIMEOUT = 60
    
    # Process servers one by one
    for i, server_name in enumerate(servers, 1):
        # Skip if already cached (resume support)
        if server_name in existing_cache:
            cached_tools = existing_cache[server_name]
            total_tools += len(cached_tools)
            skipped_count += 1
            print(f"[{i}/{total}] {server_name}... ⏭ cached ({len(cached_tools)} tools)")
            continue
        
        print(f"[{i}/{total}] {server_name}...", end=" ", flush=True)
        session_id = f"mcp-{server_name}"
        
        try:
            # Create session and get tools with timeout protection
            async with asyncio.timeout(SERVER_TIMEOUT):
                # Create session for this server
                cfg = SessionConfig(
                    session_name=session_id,
                    backend_type=BackendType.MCP,
                    connection_params={"server": server_name},
                )
                session = await provider.create_session(cfg)
                
                # Get tools from this server
                tools = await session.list_tools()
            
            # Convert to metadata format
            tool_metadata = []
            for tool in tools:
                tool_metadata.append({
                    "name": tool.schema.name,
                    "description": tool.schema.description or "",
                    "parameters": tool.schema.parameters or {},
                })
            
            # Save to cache (incremental)
            cache.save_server(server_name, tool_metadata)
            
            # Close session immediately to free resources
            await provider.close_session(session_id)
            
            total_tools += len(tools)
            success_count += 1
            print(f"✓ {len(tools)} tools")
        
        except asyncio.TimeoutError:
            error_msg = f"Timeout after {SERVER_TIMEOUT}s"
            failed_servers.append((server_name, error_msg))
            print(f"✗ {error_msg}")
            
            # Save failed server info to cache
            cache.save_failed_server(server_name, error_msg)
            
            # Try to close session if it was created
            try:
                await provider.close_session(session_id)
            except Exception:
                pass
            
        except Exception as e:
            error_msg = str(e)
            failed_servers.append((server_name, error_msg))
            print(f"✗ {error_msg[:50]}")
            
            # Save failed server info to cache
            cache.save_failed_server(server_name, error_msg)
            
            # Try to close session if it was created
            try:
                await provider.close_session(session_id)
            except Exception:
                pass
    
    print()
    print(f"{'='*50}")
    print(f"✓ Collected {total_tools} tools from {success_count + skipped_count}/{total} servers")
    if skipped_count > 0:
        print(f"  (skipped {skipped_count} cached, processed {success_count} new)")
    print(f"✓ Cache saved to: {cache.cache_path}")
    
    if failed_servers:
        print(f"✗ Failed servers ({len(failed_servers)}):")
        for name, err in failed_servers[:10]:
            print(f"  - {name}: {err[:60]}")
        if len(failed_servers) > 10:
            print(f"  ... and {len(failed_servers) - 10} more (see cache file for details)")
    
    print()
    print("Done! Future list_tools() calls will use cache (no server startup).")


def _load_config(args) -> AnyToolConfig:
    """Load configuration"""
    cli_overrides = {}
    if args.model:
        cli_overrides['llm_model'] = args.model
    if args.max_iterations is not None:
        cli_overrides['grounding_max_iterations'] = args.max_iterations
    if args.timeout is not None:
        cli_overrides['llm_timeout'] = args.timeout
    if args.log_level:
        cli_overrides['log_level'] = args.log_level
    
    try:
        # Load from config file if provided
        if args.config:
            import json
            with open(args.config, 'r', encoding='utf-8') as f:
                config_dict = json.load(f)
            
            # Apply CLI overrides
            config_dict.update(cli_overrides)
            config = AnyToolConfig(**config_dict)
            
            print(f"✓ Loaded from config file: {args.config}")
        else:
            # Use default config + CLI overrides
            config = AnyToolConfig(**cli_overrides)
            print("✓ Using default configuration")
        
        if cli_overrides:
            print(f"✓ CLI overrides: {', '.join(cli_overrides.keys())}")
        
        if args.log_level:
            Logger.set_level(args.log_level)
        
        return config
        
    except Exception as e:
        logger.error(f"Failed to load configuration: {e}")
        sys.exit(1)


def _setup_ui(args) -> tuple[Optional[AnyToolUI], Optional[UIIntegration]]:
    if args.no_ui:
        CLIDisplay.print_banner()
        return None, None
    
    ui = create_ui(enable_live=True, compact=args.ui_compact)
    ui.print_banner()
    ui_integration = UIIntegration(ui)
    return ui, ui_integration


async def _initialize_anytool(config: AnyToolConfig, args) -> AnyTool:
    anytool = AnyTool(config)
    
    init_steps = [("Initializing AnyTool...", "loading")]
    CLIDisplay.print_initialization_progress(init_steps, show_header=False)
    
    if not args.config:
        original_log_level = Logger.get_logger("anytool").level
        for log_name in ["anytool", "anytool.grounding", "anytool.agents"]:
            Logger.get_logger(log_name).setLevel(logging.WARNING)
    
    await anytool.initialize()
    
    # Restore log level
    if not args.config:
        for log_name in ["anytool", "anytool.grounding", "anytool.agents"]:
            Logger.get_logger(log_name).setLevel(original_log_level)
    
    # Print initialization results
    backends = anytool.list_backends()
    init_steps = [
        ("LLM Client", "ok"),
        (f"Grounding Backends ({len(backends)} available)", "ok"),
        ("Grounding Agent", "ok"),
    ]
    
    if config.enable_recording:
        init_steps.append(("Recording Manager", "ok"))
    
    CLIDisplay.print_initialization_progress(init_steps, show_header=True)
    
    return anytool


async def main():
    parser = _create_argument_parser()
    args = parser.parse_args()
    
    # Handle subcommands
    if args.command == 'refresh-cache':
        await refresh_mcp_cache(args.config)
        return 0
    
    # Load configuration
    config = _load_config(args)
    
    # Setup UI
    ui, ui_integration = _setup_ui(args)
    
    # Print configuration
    CLIDisplay.print_configuration(config)
    
    anytool = None
    
    try:
        # Initialize AnyTool
        anytool = await _initialize_anytool(config, args)
        
        # Connect UI (if enabled)
        if ui_integration:
            ui_integration.attach_llm_client(anytool._llm_client)
            ui_integration.attach_grounding_client(anytool._grounding_client)
            CLIDisplay.print_system_ready()
        
        ui_manager = UIManager(ui, ui_integration)
        
        # Run appropriate mode
        if args.query:
            await single_query_mode(anytool, args.query, ui_manager)
        else:
            await interactive_mode(anytool, ui_manager)
        
    except KeyboardInterrupt:
        print("\n\nInterrupt signal detected")
    except Exception as e:
        logger.error(f"Error: {e}", exc_info=True)
        print(f"\nError: {e}")
        return 1
    finally:
        if anytool:
            print("\nCleaning up resources...")
            await anytool.cleanup()
    
    print("\nGoodbye!")
    return 0


def run_main():
    """Run main function"""
    try:
        exit_code = asyncio.run(main())
        sys.exit(exit_code)
    except KeyboardInterrupt:
        print("\n\nProgram interrupted")
        sys.exit(0)


if __name__ == "__main__":
    run_main()

================================================
FILE: anytool/agents/__init__.py
================================================
from anytool.agents.base import BaseAgent, AgentStatus, AgentRegistry
from anytool.agents.grounding_agent import GroundingAgent

__all__ = [
    "BaseAgent",
    "AgentStatus",
    "AgentRegistry",
    "GroundingAgent",
]

================================================
FILE: anytool/agents/base.py
================================================
from __future__ import annotations

import json
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Dict, List, Optional, Type, Any

from anytool.utils.logging import Logger

if TYPE_CHECKING:
    from anytool.llm import LLMClient
    from anytool.grounding.core.grounding_client import GroundingClient
    from anytool.recording import RecordingManager

logger = Logger.get_logger(__name__)


class BaseAgent(ABC):
    def __init__(
        self,
        name: str,
        backend_scope: Optional[List[str]] = None,
        llm_client: Optional[LLMClient] = None,
        grounding_client: Optional[GroundingClient] = None,
        recording_manager: Optional[RecordingManager] = None,
    ) -> None:
        """
        Initialize the BaseAgent.
        
        Args:
            name: Unique name for the agent
            backend_scope: List of backend types this agent can access (e.g., ["gui", "shell", "mcp", "web", "system"])
            llm_client: LLM client for agent reasoning (optional, can be set later)
            grounding_client: Reference to GroundingClient for tool execution
            recording_manager: RecordingManager for recording execution
        """
        self._name = name
        self._grounding_client: Optional[GroundingClient] = grounding_client
        self._backend_scope = backend_scope or []
        self._llm_client = llm_client
        self._recording_manager: Optional[RecordingManager] = recording_manager
        self._step = 0
        self._status = AgentStatus.ACTIVE
        
        self._register_self()
        logger.info(f"Initialized {self.__class__.__name__}: {name}")

    @property
    def name(self) -> str:
        return self._name
    
    @property
    def grounding_client(self) -> Optional[GroundingClient]:
        """Get the grounding client."""
        return self._grounding_client

    @property
    def backend_scope(self) -> List[str]:
        return self._backend_scope

    @property
    def llm_client(self) -> Optional[LLMClient]:
        return self._llm_client

    @llm_client.setter
    def llm_client(self, client: LLMClient) -> None:
        self._llm_client = client

    @property
    def recording_manager(self) -> Optional[RecordingManager]:
        """Get the recording manager."""
        return self._recording_manager

    @property
    def step(self) -> int:
        return self._step

    @property
    def status(self) -> str:
        return self._status

    @abstractmethod
    async def process(self, context: Dict[str, Any]) -> Dict[str, Any]:
        pass

    @abstractmethod
    def construct_messages(self, context: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Construct messages for LLM reasoning.
        Context must contain 'instruction' key.
        """
        pass

    async def get_llm_response(
        self,
        messages: List[Dict[str, Any]],
        tools: Optional[List] = None,
        **kwargs
    ) -> Dict[str, Any]:
        if not self._llm_client:
            raise ValueError(f"LLM client not initialized for agent {self.name}")
        
        try:
            response = await self._llm_client.complete(
                messages=messages,
                tools=tools,
                **kwargs
            )
            return response
        except Exception as e:
            logger.error(f"{self.name}: LLM call failed: {e}", exc_info=True)
            raise

    def response_to_dict(self, response: str) -> Dict[str, Any]:
        try:
            if response.strip().startswith("```json") or response.strip().startswith("```"):
                lines = response.strip().split('\n')
                if lines and lines[0].startswith('```'):
                    lines = lines[1:]
                end_idx = len(lines)
                for i, line in enumerate(lines):
                    if line.strip() == '```':
                        end_idx = i
                        break
                response = '\n'.join(lines[:end_idx])
            
            return json.loads(response)
        except json.JSONDecodeError as e:
            # If parsing fails, try to find and extract just the JSON object/array
            if "Extra data" in str(e):
                try:
                    decoder = json.JSONDecoder()
                    obj, idx = decoder.raw_decode(response)
                    logger.warning(
                        f"{self.name}: Successfully extracted JSON but found extra text after position {idx}. "
                        f"Extra text: {response[idx:idx+100]}..."
                    )
                    return obj
                except Exception as e2:
                    logger.error(f"{self.name}: Failed to extract JSON even with raw_decode: {e2}")
            
            logger.error(f"{self.name}: Failed to parse response: {e}")
            logger.error(f"{self.name}: Response content: {response[:500]}")
            return {"error": "Failed to parse response", "raw": response}

    def increment_step(self) -> None:
        self._step += 1

    @classmethod
    def _register_self(cls) -> None:
        """Register the agent class in the registry upon instantiation."""
        # Get the actual instance class, not BaseAgent
        if cls.__name__ != "BaseAgent" and cls.__name__ not in AgentRegistry._registry:
            AgentRegistry.register(cls.__name__, cls)

    def __repr__(self) -> str:
        return f"<{self.__class__.__name__}(name={self.name}, step={self.step}, status={self.status})>"


class AgentStatus:
    """Constants for agent status."""
    ACTIVE = "active"
    IDLE = "idle"
    WAITING = "waiting"


class AgentRegistry:
    """
    Registry for managing agent classes.
    Allows dynamic registration and retrieval of agent types.
    """

    _registry: Dict[str, Type[BaseAgent]] = {}

    @classmethod
    def register(cls, name: str, agent_cls: Type[BaseAgent]) -> None:
        if name in cls._registry:
            logger.warning(f"Agent class '{name}' already registered, overwriting")
        cls._registry[name] = agent_cls
        logger.debug(f"Registered agent class: {name}")

    @classmethod
    def get_cls(cls, name: str) -> Type[BaseAgent]:
        if name not in cls._registry:
            raise ValueError(f"No agent class registered under '{name}'")
        return cls._registry[name]

    @classmethod
    def list_registered(cls) -> List[str]:
        return list(cls._registry.keys())

    @classmethod
    def clear(cls) -> None:
        cls._registry.clear()
        logger.debug("Agent registry cleared")

================================================
FILE: anytool/agents/grounding_agent.py
================================================
from __future__ import annotations

import copy
import json
from typing import TYPE_CHECKING, Any, Dict, List, Optional

from anytool.agents.base import BaseAgent
from anytool.grounding.core.types import BackendType, ToolResult
from anytool.platform.screenshot import ScreenshotClient
from anytool.prompts import GroundingAgentPrompts
from anytool.utils.logging import Logger

if TYPE_CHECKING:
    from anytool.llm import LLMClient
    from anytool.grounding.core.grounding_client import GroundingClient
    from anytool.recording import RecordingManager

logger = Logger.get_logger(__name__)


class GroundingAgent(BaseAgent):
    def __init__(
        self,
        name: str = "GroundingAgent",
        backend_scope: Optional[List[str]] = None,
        llm_client: Optional[LLMClient] = None,
        grounding_client: Optional[GroundingClient] = None,
        recording_manager: Optional[RecordingManager] = None,
        system_prompt: Optional[str] = None,
        max_iterations: int = 15,
        visual_analysis_timeout: float = 30.0,
        tool_retrieval_llm: Optional[LLMClient] = None,
        visual_analysis_model: Optional[str] = None,
    ) -> None:
        """
        Initialize the Grounding Agent.
        
        Args:
            name: Agent name
            backend_scope: List of backends this agent can access (None = all available)
            llm_client: LLM client for reasoning
            grounding_client: GroundingClient for tool execution
            recording_manager: RecordingManager for recording execution
            system_prompt: Custom system prompt
            max_iterations: Maximum LLM reasoning iterations for self-correction
            visual_analysis_timeout: Timeout for visual analysis LLM calls in seconds
            tool_retrieval_llm: LLM client for tool retrieval filter (None = use llm_client)
            visual_analysis_model: Model name for visual analysis (None = use llm_client.model)
        """
        super().__init__(
            name=name,
            backend_scope=backend_scope or ["gui", "shell", "mcp", "web", "system"],
            llm_client=llm_client,
            grounding_client=grounding_client,
            recording_manager=recording_manager
        )
       
        self._system_prompt = system_prompt or self._default_system_prompt()
        self._max_iterations = max_iterations
        self._visual_analysis_timeout = visual_analysis_timeout
        self._tool_retrieval_llm = tool_retrieval_llm
        self._visual_analysis_model = visual_analysis_model
        
        logger.info(f"Grounding Agent initialized: {name}")
        logger.info(f"Backend scope: {self._backend_scope}")
        logger.info(f"Max iterations: {self._max_iterations}")
        logger.info(f"Visual analysis timeout: {self._visual_analysis_timeout}s")
        if tool_retrieval_llm:
            logger.info(f"Tool retrieval model: {tool_retrieval_llm.model}")
        if visual_analysis_model:
            logger.info(f"Visual analysis model: {visual_analysis_model}")
    
    def _truncate_messages(
        self, 
        messages: List[Dict[str, Any]], 
        keep_recent: int = 8,
        max_tokens_estimate: int = 120000
    ) -> List[Dict[str, Any]]:
        if len(messages) <= keep_recent + 2:  # +2 for system and initial user
            return messages
        
        total_text = json.dumps(messages, ensure_ascii=False)
        estimated_tokens = len(total_text) // 4
        
        if estimated_tokens < max_tokens_estimate:
            return messages
        
        logger.info(f"Truncating message history: {len(messages)} messages, "
                   f"~{estimated_tokens:,} tokens -> keeping recent {keep_recent} rounds")
        
        system_messages = []
        user_instruction = None
        conversation_messages = []
        
        for msg in messages:
            role = msg.get("role")
            if role == "system":
                system_messages.append(msg)
            elif role == "user" and user_instruction is None:
                user_instruction = msg
            else:
                conversation_messages.append(msg)
        
        recent_messages = conversation_messages[-(keep_recent * 2):] if conversation_messages else []
        
        truncated = system_messages.copy()
        if user_instruction:
            truncated.append(user_instruction)
        truncated.extend(recent_messages)
        
        logger.info(f"After truncation: {len(truncated)} messages, "
                   f"~{len(json.dumps(truncated, ensure_ascii=False))//4:,} tokens (estimated)")
        
        return truncated
    
    async def process(self, context: Dict[str, Any]) -> Dict[str, Any]:
        """
        Process a task execution request with multi-round iteration control.
        """
        instruction = context.get("instruction", "")
        if not instruction:
            logger.error("Grounding Agent: No instruction provided")
            return {"error": "No instruction provided", "status": "error"}
        
        # Store current instruction for visual analysis context
        self._current_instruction = instruction
        
        logger.info(f"Grounding Agent: Processing instruction at step {self.step}")
        
        # Exist workspace files check
        workspace_info = await self._check_workspace_artifacts(context)
        if workspace_info["has_files"]:
            context["workspace_artifacts"] = workspace_info
            logger.info(f"Workspace has {len(workspace_info['files'])} existing files: {workspace_info['files']}")
        
        # Get available tools (auto-search with cap)
        tools = await self._get_available_tools(instruction)
        
        # Get search debug info (similarity scores, LLM selections)
        search_debug_info = None
        if self.grounding_client:
            search_debug_info = self.grounding_client.get_last_search_debug_info()
        
        # Build retrieved tools list for return value
        retrieved_tools_list = []
        for tool in tools:
            tool_info = {
                "name": getattr(tool, "name", str(tool)),
                "description": getattr(tool, "description", ""),
            }
            if hasattr(tool, "backend_type"):
                tool_info["backend"] = tool.backend_type.value if hasattr(tool.backend_type, "value") else str(tool.backend_type)
            if hasattr(tool, "_runtime_info") and tool._runtime_info:
                tool_info["server_name"] = tool._runtime_info.server_name
            
            # Add similarity score if available
            if search_debug_info and search_debug_info.get("tool_scores"):
                for score_info in search_debug_info["tool_scores"]:
                    if score_info["name"] == tool_info["name"]:
                        tool_info["similarity_score"] = score_info["score"]
                        break
            
            retrieved_tools_list.append(tool_info)
        
        # Record retrieved tools
        if self._recording_manager:
            from anytool.recording import RecordingManager
            await RecordingManager.record_retrieved_tools(
                task_instruction=instruction,
                tools=tools,
                search_debug_info=search_debug_info,
            )
        
        # Initialize iteration state
        max_iterations = context.get("max_iterations", self._max_iterations)
        current_iteration = 0
        all_tool_results = []
        iteration_contexts = []
        consecutive_empty_responses = 0  # Track consecutive empty LLM responses
        MAX_CONSECUTIVE_EMPTY = 5  # Exit after this many empty responses
        
        # Build initial messages
        messages = self.construct_messages(context)
        
        try:
            while current_iteration < max_iterations:
                current_iteration += 1
                logger.info(f"Grounding Agent: Iteration {current_iteration}/{max_iterations}")
                
                # Truncate message history to prevent context length issues
                # Start truncating after 5 iterations to keep context manageable
                if current_iteration >= 5:
                    messages = self._truncate_messages(
                        messages, 
                        keep_recent=8,  # 保留最近8轮对话
                        max_tokens_estimate=120000  # Claude Sonnet 4.5 上下文限制是200K，保守使用120K
                    )
                
                messages_input_snapshot = copy.deepcopy(messages)
                
                # [DISABLED] Iteration summary generation
                # Tool results (including visual analysis) are already in context,
                # LLM can make decisions directly without separate summary.
                # To re-enable, uncomment below and pass iteration_summary_prompt to complete()
                # iteration_summary_prompt = GroundingAgentPrompts.iteration_summary(
                #     instruction=instruction,
                #     iteration=current_iteration,
                #     max_iterations=max_iterations
                # ) if context.get("auto_execute", True) else None
                
                # Call LLMClient for single round
                # LLM will decide whether to call tools or finish with <COMPLETE>
                llm_response = await self._llm_client.complete(
                    messages=messages,
                    tools=tools if context.get("auto_execute", True) else None,
                    execute_tools=context.get("auto_execute", True),
                    summary_prompt=None,  # Disabled
                    tool_result_callback=self._visual_analysis_callback
                )
                
                # Update messages with LLM response
                messages = llm_response["messages"]
                
                # Collect tool results
                tool_results_this_iteration = llm_response.get("tool_results", [])
                if tool_results_this_iteration:
                    all_tool_results.extend(tool_results_this_iteration)

                # [DISABLED] Iteration summary logging
                # llm_summary = llm_response.get("iteration_summary")
                # if llm_summary:
                #     logger.info(f"Iteration {current_iteration} summary: {llm_summary[:150]}...")
                
                assistant_message = llm_response.get("message", {})
                assistant_content = assistant_message.get("content", "")
                
                has_tool_calls = llm_response.get('has_tool_calls', False)
                logger.info(f"Iteration {current_iteration} - Has tool calls: {has_tool_calls}, "
                          f"Tool results: {len(tool_results_this_iteration)}, "
                          f"Content length: {len(assistant_content)} chars")
                
                if len(assistant_content) > 0:
                    logger.info(f"Iteration {current_iteration} - Assistant content preview: {repr(assistant_content[:300])}")
                    consecutive_empty_responses = 0  # Reset counter on valid response
                else:
                    if not has_tool_calls:
                        consecutive_empty_responses += 1
                        logger.warning(f"Iteration {current_iteration} - NO tool calls and NO content "
                                     f"(empty response {consecutive_empty_responses}/{MAX_CONSECUTIVE_EMPTY})")
                        
                        if consecutive_empty_responses >= MAX_CONSECUTIVE_EMPTY:
                            logger.error(f"Exiting due to {MAX_CONSECUTIVE_EMPTY} consecutive empty LLM responses. "
                                       "This may indicate API issues, rate limiting, or context too long.")
                            break
                    else:
                        consecutive_empty_responses = 0  # Reset if we have tool calls
                
                # Snapshot messages after LLM call (accumulated context)
                messages_output_snapshot = copy.deepcopy(messages)
                
                # Record iteration context
                iteration_context = {
                    "iteration": current_iteration,
                    "messages_input": messages_input_snapshot,
                    "messages_output": messages_output_snapshot,
                    "llm_response_summary": {
                        "assistant_content": assistant_content,
                        "has_tool_calls": has_tool_calls,
                        # "iteration_summary": llm_summary,  # Disabled with iteration summary
                        "tool_calls_count": len(tool_results_this_iteration),
                    },
                }
                iteration_contexts.append(iteration_context)
                
                # Real-time save to conversations.jsonl
                from anytool.recording import RecordingManager
                await RecordingManager.record_iteration_context(
                    iteration=current_iteration,
                    messages_input=messages_input_snapshot,
                    messages_output=messages_output_snapshot,
                    llm_response_summary=iteration_context["llm_response_summary"],
                )
                
                # Check for completion token in assistant content
                # [DISABLED] Also check in iteration summary when enabled
                # is_complete = (
                #     GroundingAgentPrompts.TASK_COMPLETE in assistant_content or
                #     (llm_summary and GroundingAgentPrompts.TASK_COMPLETE in llm_summary)
                # )
                is_complete = GroundingAgentPrompts.TASK_COMPLETE in assistant_content
                
                if is_complete:
                    # Task is complete - LLM generated completion token
                    logger.info(f"Task completed at iteration {current_iteration} (found {GroundingAgentPrompts.TASK_COMPLETE})")
                    break
                
                else:
                    # LLM didn't generate <COMPLETE>, continue to next iteration
                    if tool_results_this_iteration:
                        logger.debug(f"Task in progress, LLM called {len(tool_results_this_iteration)} tools")
                    else:
                        logger.debug(f"Task in progress, LLM did not generate <COMPLETE>")
                    
                    # Remove previous iteration guidance to avoid accumulation
                    messages = [
                        msg for msg in messages 
                        if not (msg.get("role") == "system" and "Iteration" in msg.get("content", "") and "complete" in msg.get("content", ""))
                    ]
                    
                    guidance_msg = {
                        "role": "system",
                        "content": f"Iteration {current_iteration} complete. "
                                   f"Check if task is finished - if yes, output {GroundingAgentPrompts.TASK_COMPLETE}. "
                                   f"If not, continue with next action."
                    }
                    messages.append(guidance_msg)
                    
                    # [DISABLED] Full iteration feedback with summary
                    # self._remove_previous_guidance(messages)
                    # feedback_msg = self._build_iteration_feedback(
                    #     iteration=current_iteration,
                    #     llm_summary=llm_summary,
                    #     add_guidance=True
                    # )
                    # if feedback_msg:
                    #     messages.append(feedback_msg)
                    #     logger.debug(f"Added iteration {current_iteration} feedback with guidance")
                    
                    continue
            
            # Build final result
            result = await self._build_final_result(
                instruction=instruction,
                messages=messages,
                all_tool_results=all_tool_results,
                iterations=current_iteration,
                max_iterations=max_iterations,
                iteration_contexts=iteration_contexts,
                retrieved_tools_list=retrieved_tools_list,
                search_debug_info=search_debug_info,
            )
            
            # Record agent action to recording manager
            if self._recording_manager:
                await self._record_agent_execution(result, instruction)
            
            # Increment step
            self.increment_step()
            
            logger.info(f"Grounding Agent: Execution completed with status: {result.get('status')}")
            return result
            
        except Exception as e:
            logger.error(f"Grounding Agent: Execution failed: {e}")
            result = {
                "error": str(e),
                "status": "error",
                "instruction": instruction,
                "iteration": current_iteration
            }
            self.increment_step()
            return result
    
    def _default_system_prompt(self) -> str:
        """Default system prompt for the grounding agent."""
        return GroundingAgentPrompts.SYSTEM_PROMPT

    def construct_messages(
        self,
        context: Dict[str, Any]
    ) -> List[Dict[str, Any]]:
        messages = [{"role": "system", "content": self._system_prompt}]
        
        # Get instruction from context
        instruction = context.get("instruction", "")
        if not instruction:
            raise ValueError("context must contain 'instruction' field")
        
        # Add workspace directory
        workspace_dir = context.get("workspace_dir")
        if workspace_dir:
            messages.append({
                "role": "system",
                "content": GroundingAgentPrompts.workspace_directory(workspace_dir)
            })
        
        # Add workspace artifacts information
        workspace_artifacts = context.get("workspace_artifacts")
        if workspace_artifacts and workspace_artifacts.get("has_files"):
            files = workspace_artifacts.get("files", [])
            matching_files = workspace_artifacts.get("matching_files", [])
            recent_files = workspace_artifacts.get("recent_files", [])
            
            if matching_files:
                artifact_msg = GroundingAgentPrompts.workspace_matching_files(matching_files)
            elif len(recent_files) >= 2:
                artifact_msg = GroundingAgentPrompts.workspace_recent_files(
                    total_files=len(files),
                    recent_files=recent_files
                )
            else:
                artifact_msg = GroundingAgentPrompts.workspace_file_list(files)
            
            messages.append({
                "role": "system",
                "content": artifact_msg
            })
        
        # User instruction
        messages.append({"role": "user", "content": instruction})
        
        return messages

    async def _get_available_tools(self, task_description: Optional[str]) -> List:
        """
        Retrieve tools with auto-search + cap to control prompt bloat.
        Falls back to returning all tools if search fails.
        """
        grounding_client = self.grounding_client
        if not grounding_client:
            return []

        backends = [BackendType(name) for name in self._backend_scope]

        try:
            # Use dedicated tool retrieval LLM if configured, otherwise use main LLM
            retrieval_llm = self._tool_retrieval_llm or self._llm_client
            tools = await grounding_client.get_tools_with_auto_search(
                task_description=task_description,
                backend=backends,
                use_cache=True,
                llm_callable=retrieval_llm,
            )
            logger.info(
                f"GroundingAgent selected {len(tools)} tools (auto-search) from {len(backends)} backends"
            )
            return tools
        except Exception as e:
            logger.warning(f"Auto-search tools failed, falling back to full list: {e}")

        # Fallback: fetch all tools (previous behaviour)
        all_tools = []
        for backend_name in self._backend_scope:
            try:
                backend_type = BackendType(backend_name)
                tools = await grounding_client.list_tools(backend=backend_type)
                all_tools.extend(tools)
                logger.debug(f"Retrieved {len(tools)} tools from backend: {backend_name}")
            except Exception as e:
                logger.debug(f"Could not get tools from {backend_name}: {e}")

        logger.info(
            f"GroundingAgent fallback retrieved {len(all_tools)} tools from {len(self._backend_scope)} backends"
        )
        return all_tools

    async def _visual_analysis_callback(
        self,
        result: ToolResult,
        tool_name: str,
        tool_call: Dict,
        backend: str
    ) -> ToolResult:
        """
        Callback for LLMClient to handle visual analysis after tool execution.
        """
        # 1. Check if LLM requested to skip visual analysis
        skip_visual_analysis = False
        try:
            arguments = tool_call.function.arguments
            if isinstance(arguments, str):
                args = json.loads(arguments.strip() or "{}")
            else:
                args = arguments
            
            if isinstance(args, dict) and args.get("skip_visual_analysis"):
                skip_visual_analysis = True
                logger.info(f"Visual analysis skipped for {tool_name} (meta-parameter set by LLM)")
        except Exception as e:
            logger.debug(f"Could not parse tool arguments: {e}")
        
        # 2. If skip requested, return original result
        if skip_visual_analysis:
            return result
        
        # 3. Check if this backend needs visual analysis
        if backend != "gui":
            return result
        
        # 4. Check if tool has visual data
        metadata = getattr(result, 'metadata', None)
        has_screenshots = metadata and (metadata.get("screenshot") or metadata.get("screenshots"))
        
        # 5. If no visual data, try to capture a screenshot
        if not has_screenshots:
            try:
                logger.info(f"No visual data from {tool_name}, capturing screenshot...")
                screenshot_client = ScreenshotClient()
                screenshot_bytes = await screenshot_client.capture()
                
                if screenshot_bytes:
                    # Add screenshot to result metadata
                    if metadata is None:
                        result.metadata = {}
                        metadata = result.metadata
                    metadata["screenshot"] = screenshot_bytes
                    has_screenshots = True
                    logger.info(f"Screenshot captured for visual analysis")
                else:
                    logger.warning("Failed to capture screenshot")
            except Exception as e:
                logger.warning(f"Error capturing screenshot: {e}")
        
        # 6. If still no screenshots, return original result
        if not has_screenshots:
            logger.debug(f"No visual data available for {tool_name}")
            return result
        
        # 7. Perform visual analysis
        return await self._enhance_result_with_visual_context(result, tool_name)
    
    async def _enhance_result_with_visual_context(
        self,
        result: ToolResult,
        tool_name: str
    ) -> ToolResult:
        """
        Enhance tool result with visual analysis for grounding agent workflows.
        """
        import asyncio
        import base64
        import litellm
        
        try:
            metadata = getattr(result, 'metadata', None)
            if not metadata:
                return result
            
            # Collect all screenshots
            screenshots_bytes = []
            
            # Check for multiple screenshots first
            if metadata.get("screenshots"):
                screenshots_list = metadata["screenshots"]
                if isinstance(screenshots_list, list):
                    screenshots_bytes = [s for s in screenshots_list if s]
            # Fall back to single screenshot
            elif metadata.get("screenshot"):
                screenshots_bytes = [metadata["screenshot"]]
            
            if not screenshots_bytes:
                return result
            
            # Select key screenshots if there are too many
            selected_screenshots = self._select_key_screenshots(screenshots_bytes, max_count=3)
            
            # Convert to base64
            visual_b64_list = []
            for visual_data in selected_screenshots:
                if isinstance(visual_data, bytes):
                    visual_b64_list.append(base64.b64encode(visual_data).decode('utf-8'))
                else:
                    visual_b64_list.append(visual_data)  # Already base64
            
            # Build prompt based on number of screenshots
            num_screenshots = len(visual_b64_list)
            
            prompt = GroundingAgentPrompts.visual_analysis(
                tool_name=tool_name,
                num_screenshots=num_screenshots,
                task_description=getattr(self, '_current_instruction', '')
            )

            # Build content with text prompt + all images
            content = [{"type": "text", "text": prompt}]
            for visual_b64 in visual_b64_list:
                content.append({
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/png;base64,{visual_b64}"
                    }
                })

            # Use dedicated visual analysis model if configured, otherwise use main LLM model
            visual_model = self._visual_analysis_model or (self._llm_client.model if self._llm_client else "openrouter/anthropic/claude-sonnet-4.5")
            response = await asyncio.wait_for(
                litellm.acompletion(
                    model=visual_model,
                    messages=[{
                        "role": "user",
                        "content": content
                    }],
                    timeout=self._visual_analysis_timeout
                ),
                timeout=self._visual_analysis_timeout + 5
            )
            
            analysis = response.choices[0].message.content.strip()
            
            # Inject visual analysis into content
            original_content = result.content or "(no text output)"
            enhanced_content = f"{original_content}\n\n**Visual content**: {analysis}"
            
            # Create enhanced result
            enhanced_result = ToolResult(
                status=result.status,
                content=enhanced_content,
                error=result.error,
                metadata={**metadata, "visual_analyzed": True, "visual_analysis": analysis},
                execution_time=result.execution_time
            )
            
            logger.info(f"Enhanced {tool_name} result with visual analysis ({num_screenshots} screenshot(s))")
            return enhanced_result
            
        except asyncio.TimeoutError:
            logger.warning(f"Visual analysis timed out for {tool_name}, returning original result")
            return result
        except Exception as e:
            logger.warning(f"Failed to analyze visual content for {tool_name}: {e}")
            return result
    
    def _select_key_screenshots(
        self, 
        screenshots: List[bytes], 
        max_count: int = 3
    ) -> List[bytes]:
        """
        Select key screenshots if there are too many.
        """
        if len(screenshots) <= max_count:
            return screenshots
        
        selected_indices = set()
        
        # Always include last (final state)
        selected_indices.add(len(screenshots) - 1)
        
        # If room, include first (initial state)
        if max_count >= 2:
            selected_indices.add(0)
        
        # Fill remaining slots with evenly spaced middle screenshots
        remaining_slots = max_count - len(selected_indices)
        if remaining_slots > 0:
            # Calculate spacing
            available_indices = [
                i for i in range(1, len(screenshots) - 1)
                if i not in selected_indices
            ]
            
            if available_indices:
                step = max(1, len(available_indices) // (remaining_slots + 1))
                for i in range(remaining_slots):
                    idx = min((i + 1) * step, len(available_indices) - 1)
                    if idx < len(available_indices):
                        selected_indices.add(available_indices[idx])
        
        # Return screenshots in original order
        selected = [screenshots[i] for i in sorted(selected_indices)]
        
        logger.debug(
            f"Selected {len(selected)} screenshots at indices {sorted(selected_indices)} "
            f"from total of {len(screenshots)}"
        )
        
        return selected

    def _get_workspace_path(self, context: Dict[str, Any]) -> Optional[str]:
        """
        Get workspace directory path from context.
        """
        return context.get("workspace_dir")
    
    def _scan_workspace_files(
        self,
        workspace_path: str,
        recent_threshold: int = 600 # seconds
    ) -> Dict[str, Any]:
        """
        Scan workspace directory and collect file information.
        
        Args:
            workspace_path: Path to workspace directory
            recent_threshold: Threshold in seconds for recent files
            
        Returns:
            Dictionary with file information:
                - files: List of all filenames
                - file_details: Dict mapping filename to file info (size, modified, age_seconds)
                - recent_files: List of recently modified filenames
        """
        import os
        import time
        
        result = {
            "files": [],
            "file_details": {},
            "recent_files": []
        }
        
        if not workspace_path or not os.path.exists(workspace_path):
            return result
        
        # Recording system files to exclude from workspace scanning
        excluded_files = {"metadata.json", "traj.jsonl"}
        
        try:
            current_time = time.time()
            
            for filename in os.listdir(workspace_path):
                filepath = os.path.join(workspace_path, filename)
                if os.path.isfile(filepath) and filename not in excluded_files:
                    result["files"].append(filename)
                    
                    # Get file stats
                    stat = os.stat(filepath)
                    file_info = {
                        "size": stat.st_size,
                        "modified": stat.st_mtime,
                        "age_seconds": current_time - stat.st_mtime
                    }
                    result["file_details"][filename] = file_info
                    
                    # Track recently created/modified files
                    if file_info["age_seconds"] < recent_threshold:
                        result["recent_files"].append(filename)
            
            result["files"] = sorted(result["files"])
        
        except Exception as e:
            logger.debug(f"Error scanning workspace files: {e}")
        
        return result
    
    async def _check_workspace_artifacts(self, context: Dict[str, Any]) -> Dict[str, Any]:
        """
        Check workspace directory for existing artifacts that might be relevant to the task.
        Enhanced to detect if task might already be completed.
        """
        import re
        
        workspace_info = {"has_files": False, "files": [], "file_details": {}, "recent_files": []}
        
        try:
            # Get workspace path
            workspace_path = self._get_workspace_path(context)
            
            # Scan workspace files
            scan_result = self._scan_workspace_files(workspace_path, recent_threshold=600)
            
            if scan_result["files"]:
                workspace_info["has_files"] = True
                workspace_info["files"] = scan_result["files"]
                workspace_info["file_details"] = scan_result["file_details"]
                workspace_info["recent_files"] = scan_result["recent_files"]
                
                logger.info(f"Grounding Agent: Found {len(scan_result['files'])} existing files in workspace "
                           f"({len(scan_result['recent_files'])} recent)")
                
                # Check if instruction mentions specific filenames
                instruction = context.get("instruction", "")
                if instruction:
                    # Look for potential file references in instruction
                    potential_outputs = []
                    # Match common file patterns: filename.ext, "filename", 'filename'
                    file_patterns = re.findall(r'["\']?([a-zA-Z0-9_\-]+\.[a-zA-Z0-9]+)["\']?', instruction)
                    for pattern in file_patterns:
                        if pattern in scan_result["files"]:
                            potential_outputs.append(pattern)
                    
                    if potential_outputs:
                        workspace_info["matching_files"] = potential_outputs
                        logger.info(f"Grounding Agent: Found {len(potential_outputs)} files matching task: {potential_outputs}")
        
        except Exception as e:
            logger.debug(f"Could not check workspace artifacts: {e}")
        
        return workspace_info
    
    def _build_iteration_feedback(
        self,
        iteration: int,
        llm_summary: Optional[str] = None,
        add_guidance: bool = True
    ) -> Optional[Dict[str, str]]:
        """
        Build feedback message to add to next iteration.
        """
        if not llm_summary:
            return None
        
        feedback_content = GroundingAgentPrompts.iteration_feedback(
            iteration=iteration,
            llm_summary=llm_summary,
            add_guidance=add_guidance
        )
        
        return {
            "role": "system",
            "content": feedback_content
        }
    
    def _remove_previous_guidance(self, messages: List[Dict[str, Any]]) -> None:
        """
        Remove guidance section from previous iteration feedback messages.
        """
        for msg in messages:
            if msg.get("role") == "system":
                content = msg.get("content", "")
                # Check if this is an iteration feedback message with guidance
                if "## Iteration" in content and "Summary" in content and "---" in content:
                    # Remove everything from "---" onwards (the guidance part)
                    summary_only = content.split("---")[0].strip()
                    msg["content"] = summary_only

    async def _generate_final_summary(
        self,
        instruction: str,
        messages: List[Dict],
        iterations: int
    ) -> tuple[str, bool, List[Dict]]:
        """
        Generate final summary across all iterations for reporting to upper layer.
        
        Returns:
            tuple[str, bool, List[Dict]]: (summary_text, success_flag, context_used)
                - summary_text: The generated summary or error message
                - success_flag: True if summary was generated successfully, False otherwise
                - context_used: The cleaned messages used for generating summary
        """
        final_summary_prompt = {
            "role": "user",
            "content": GroundingAgentPrompts.final_summary(
                instruction=instruction,
                iterations=iterations
            )
        }
        
        clean_messages = []
        for msg in messages:
            # Skip tool result messages
            if msg.get("role") == "tool":
                continue
            # Copy message and remove tool_calls if present
            clean_msg = msg.copy()
            if "tool_calls" in clean_msg:
                del clean_msg["tool_calls"]
            clean_messages.append(clean_msg)
        
        clean_messages.append(final_summary_prompt)
        
        # Save context for return
        context_for_return = copy.deepcopy(clean_messages)
        
        try:
            # Call LLMClient to generate final summary (without tools)
            summary_response = await self._llm_client.complete(
                messages=clean_messages,
                tools=None,
                execute_tools=False
            )
            
            final_summary = summary_response.get("message", {}).get("content", "")
            
            if final_summary:
                logger.info(f"Generated final summary: {final_summary[:200]}...")
                return final_summary, True, context_for_return
            else:
                logger.warning("LLM returned empty final summary")
                return f"Task completed after {iterations} iteration(s). Check execution history for details.", True, context_for_return
        
        except Exception as e:
            logger.error(f"Error generating final summary: {e}")
            return f"Task completed after {iterations} iteration(s), but failed to generate summary: {str(e)}", False, context_for_return
    

    async def _build_final_result(
        self,
        instruction: str,
        messages: List[Dict],
        all_tool_results: List[Dict],
        iterations: int,
        max_iterations: int,
        iteration_contexts: List[Dict] = None,
        retrieved_tools_list: List[Dict] = None,
        search_debug_info: Dict[str, Any] = None,
    ) -> Dict[str, Any]:
        """
        Build final execution result.
        
        Args:
            instruction: Original instruction
            messages: Complete conversation history (including all iteration summaries)
            all_tool_results: All tool execution results
            iterations: Number of iterations performed
            max_iterations: Maximum allowed iterations
            iteration_contexts: Context snapshots for each iteration
            retrieved_tools_list: List of tools retrieved for this task
            search_debug_info: Debug info from tool search (similarity scores, LLM selections)
        """
        is_complete = self._check_task_completion(messages)
        
        tool_executions = self._format_tool_executions(all_tool_results)
        
        result = {
            "instruction": instruction,
            "step": self.step,
            "iterations": iterations,
            "tool_executions": tool_executions,
            "messages": messages,
            "iteration_contexts": iteration_contexts or [],
            "retrieved_tools_list": retrieved_tools_list or [],
            "search_debug_info": search_debug_info,
            "keep_session": True
        }
        
        if is_complete:
            logger.info("Task completed with <COMPLETE> marker")
            # Use LLM's own completion response directly (no extra LLM call needed)
            # LLM already generates a summary before outputting <COMPLETE>
            last_response = self._extract_last_assistant_message(messages)
            # Remove the <COMPLETE> token from response for cleaner output
            result["response"] = last_response.replace(GroundingAgentPrompts.TASK_COMPLETE, "").strip()
            result["status"] = "success"
            
            # [DISABLED] Extra LLM call to generate final summary
            # final_summary, summary_success, final_summary_context = await self._generate_final_summary(
            #     instruction=instruction,
            #     messages=messages,
            #     iterations=iterations
            # )
            # result["response"] = final_summary
            # result["final_summary_context"] = final_summary_context
        else:
            result["response"] = self._extract_last_assistant_message(messages)
            result["status"] = "incomplete"
            result["warning"] = (
                f"Task reached max iterations ({max_iterations}) without completion. "
                f"This may indicate the task needs more steps or clarification."
            )
        
        return result
    
    def _format_tool_executions(self, all_tool_results: List[Dict]) -> List[Dict]:
        executions = []
        for tr in all_tool_results:
            tool_result_obj = tr.get("result")
            tool_call = tr.get("tool_call")
            
            status = "unknown"
            if hasattr(tool_result_obj, 'status'):
                status_obj = tool_result_obj.status
                status = getattr(status_obj, 'value', status_obj)
            
            # Extract tool_name and arguments from tool_call object (litellm format)
            tool_name = "unknown"
            arguments = {}
            if tool_call is not None:
                if hasattr(tool_call, 'function'):
                    # tool_call is an object with .function attribute
                    tool_name = getattr(tool_call.function, 'name', 'unknown')
                    args_raw = getattr(tool_call.function, 'arguments', '{}')
                    if isinstance(args_raw, str):
                        try:
                            arguments = json.loads(args_raw) if args_raw.strip() else {}
                        except json.JSONDecodeError:
                            arguments = {}
                    else:
                        arguments = args_raw if isinstance(args_raw, dict) else {}
                elif isinstance(tool_call, dict):
                    # Fallback: tool_call is a dict
                    func = tool_call.get("function", {})
                    tool_name = func.get("name", "unknown")
                    args_raw = func.get("arguments", "{}")
                    if isinstance(args_raw, str):
                        try:
                            arguments = json.loads(args_raw) if args_raw.strip() else {}
                        except json.JSONDecodeError:
                            arguments = {}
                    else:
                        arguments = args_raw if isinstance(args_raw, dict) else {}
            
            executions.append({
                "tool_name": tool_name,
                "arguments": arguments,
                "backend": tr.get("backend"),
                "server_name": tr.get("server_name"),
                "status": status,
                "content": tool_result_obj.content if hasattr(tool_result_obj, 'content') else None,
                "error": tool_result_obj.error if hasattr(tool_result_obj, 'error') else None,
                "execution_time": tool_result_obj.execution_time if hasattr(tool_result_obj, 'execution_time') else None,
                "metadata": tool_result_obj.metadata if hasattr(tool_result_obj, 'metadata') else {},
            })
        return executions
    
    def _check_task_completion(self, messages: List[Dict]) -> bool:
        for msg in reversed(messages):
            if msg.get("role") == "assistant":
                content = msg.get("content", "")
                return GroundingAgentPrompts.TASK_COMPLETE in content
        return False
    
    def _extract_last_assistant_message(self, messages: List[Dict]) -> str:
        for msg in reversed(messages):
            if msg.get("role") == "assistant":
                return msg.get("content", "")
        return ""
    
    async def _record_agent_execution(
        self,
        result: Dict[str, Any],
        instruction: str
    ) -> None:
        """
        Record agent execution to recording manager.
        
        Args:
            result: Execution result
            instruction: Original instruction
        """
        if not self._recording_manager:
            return
        
        # Extract tool execution summary
        tool_summary = []
        if result.get("tool_executions"):
            for exec_info in result["tool_executions"]:
                tool_summary.append({
                    "tool": exec_info.get("tool_name", "unknown"),
                    "backend": exec_info.get("backend", "unknown"),
                    "status": exec_info.get("status", "unknown"),
                })
        
        await self._recording_manager.record_agent_action(
            agent_name=self.name,
            action_type="execute",
            input_data={"instruction": instruction},
            reasoning={
                "response": result.get("response", ""),
                "tools_selected": tool_summary,
            },
            output_data={
                "status": result.get("status", "unknown"),
                "iterations": result.get("iterations", 0),
                "num_tool_executions": len(result.get("tool_executions", [])),
            },
            metadata={
                "step": self.step,
                "instruction": instruction,
            }
        )

================================================
FILE: anytool/config/__init__.py
================================================
from .grounding import *
from .loader import *
from .constants import * 
from .utils import *
from . import constants

__all__ = [
    # Grounding Config
    "BackendConfig",
    "ShellConfig",
    "WebConfig",
    "MCPConfig",
    "GUIConfig",
    "ToolSearchConfig",
    "SessionConfig",
    "SecurityPolicy",
    "GroundingConfig",
    
    # Loader
    "CONFIG_DIR",
    "load_config",
    "get_config",
    "reset_config",
    "save_config",
    "load_agents_config",
    "get_agent_config",
    
    # Utils
    "get_config_value",
    "load_json_file",
    "save_json_file",
] + constants.__all__

================================================
FILE: anytool/config/config_agents.json
================================================
{
  "agents": [
    {
      "name": "GroundingAgent",
      "class_name": "GroundingAgent",
      "backend_scope": ["gui", "shell", "mcp", "system", "web"],
      "max_iterations": 15,
      "visual_analysis_timeout": 60.0
    }
  ]
}

================================================
FILE: anytool/config/config_dev.json.example
================================================
{
  "comment": "[Optional] Loading grounding.json → security.json → dev.json (dev.json overrides the former ones)",
  
  "debug": true,
  "log_level": "DEBUG",
  
  "security_policies": {
    "global": {
      "blocked_commands": []
    }
  }
}

================================================
FILE: anytool/config/config_grounding.json
================================================
{
  "shell": {
    "mode": "local",
    "timeout": 60,
    "max_retries": 3,
    "retry_interval": 3.0,
    "default_shell": "/bin/bash",
    "working_dir": null,
    "env": {},
    "conda_env": null,
    "default_port": 5000
  },
  "mcp": {
    "timeout": 30,
    "max_retries": 3,
    "retry_interval": 2.0,
    "sandbox": false,
    "auto_initialize": true,
    "eager_sessions": false,
    "sse_read_timeout": 300.0,
    "check_dependencies": true,
    "auto_install": true
  },
  "gui": {
    "mode": "local",
    "timeout": 90,
    "max_retries": 3,
    "retry_interval": 5.0,
    "driver_type": "pyautogui",
    "failsafe": false,
    "screenshot_on_error": true,
    "pkgs_prefix": "import pyautogui; import time; pyautogui.FAILSAFE = {failsafe}; {command}"
  },
  "tool_search": {
    "embedding_model": "BAAI/bge-small-en-v1.5",
    "max_tools": 40,
    "search_mode": "hybrid",
    "enable_llm_filter": true,
    "llm_filter_threshold": 50,
    "enable_cache_persistence": true,
    "cache_dir": null
  },
  "tool_quality": {
    "enabled": true,
    "enable_persistence": true,
    "cache_dir": null,
    "auto_evaluate_descriptions": true,
    "enable_quality_ranking": true,
    "evolve_interval": 5
  },
  
  "tool_cache_ttl": 600,
  "tool_cache_maxsize": 500,

  "debug": false,
  "log_level": "INFO",
  "enabled_backends": [
    {
      "name": "shell",
      "provider_cls": "anytool.grounding.backends.shell.ShellProvider"
    },
    {
      "name": "web",
      "provider_cls": "anytool.grounding.backends.web.WebProvider"
    },
    {
      "name": "mcp",
      "provider_cls": "anytool.grounding.backends.mcp.MCPProvider"
    },
    {
      "name": "gui",
      "provider_cls": "anytool.grounding.backends.gui.GUIProvider"
    }
  ],
  
  "_comment_system_backend": "Note: 'system' backend is automatically registered and always available. It provides meta-level tools for querying system state. Do not add it to enabled_backends as it requires special initialization."
}

================================================
FILE: anytool/config/config_mcp.json.example
================================================


================================================
FILE: anytool/config/config_security.json
================================================
{
  "security_policies": {
    "global": {
      "allow_shell_commands": true,
      "allow_network_access": true,
      "allow_file_access": true,
      "blocked_commands": {
        "common": ["rm", "-rf", "shutdown", "reboot", "poweroff", "halt"],
        "linux": ["mkfs", "dd", "iptables", "systemctl", "init", "kill", "-9", "pkill"],
        "darwin": ["diskutil", "dd", "pfctl", "launchctl", "killall"],
        "windows": ["del", "format", "rd", "rmdir", "/s", "/q", "taskkill", "/f"]
      },
      "sandbox_enabled": false
    },
    "backend": {
      "shell": {
        "allow_shell_commands": true,
        "allow_file_access": true,
        "blocked_commands": {
          "common": ["rm", "-rf", "shutdown", "reboot", "poweroff", "halt"],
          "linux": [
            "mkfs", "mkfs.ext4", "mkfs.xfs",
            "dd",
            "iptables", "ip6tables", "nftables",
            "systemctl", "service",
            "fdisk", "parted", "gdisk",
            "mount", "umount",
            "chmod", "777",
            "chown", "root",
            "passwd",
            "useradd", "userdel", "usermod",
            "kill", "-9", "pkill", "killall"
          ],
          "darwin": [
            "diskutil",
            "dd",
            "pfctl",
            "launchctl",
            "dscl",
            "chmod", "777",
            "chown", "root",
            "passwd",
            "killall",
            "pmset"
          ],
          "windows": [
            "del", "erase",
            "format",
            "rd", "rmdir", "/s", "/q",
            "diskpart",
            "reg", "delete",
            "net", "user",
            "taskkill", "/f",
            "wmic"
          ]
        },
        "sandbox_enabled": false
      },
      "mcp": {
        "sandbox_enabled": false
      },
      "web": {
        "allow_network_access": true,
        "allowed_domains": []
      }
    }
  }
}

================================================
FILE: anytool/config/constants.py
================================================
from pathlib import Path

CONFIG_GROUNDING = "config_grounding.json"
CONFIG_SECURITY = "config_security.json"
CONFIG_MCP = "config_mcp.json"
CONFIG_DEV = "config_dev.json"
CONFIG_AGENTS = "config_agents.json"

LOG_LEVELS = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]

# Project root directory (AnyTool/)
PROJECT_ROOT = Path(__file__).parent.parent.parent


__all__ = [
    "CONFIG_GROUNDING",
    "CONFIG_SECURITY",
    "CONFIG_MCP",
    "CONFIG_DEV",
    "CONFIG_AGENTS",
    "LOG_LEVELS",
    "PROJECT_ROOT",
]

================================================
FILE: anytool/config/grounding.py
================================================
from typing import Dict, Optional, Any, List, Literal
try:
    from pydantic import BaseModel, Field, field_validator
    PYDANTIC_V2 = True
except ImportError:
    from pydantic import BaseModel, Field, validator as field_validator
    PYDANTIC_V2 = False

from anytool.grounding.core.types import (
    SessionConfig, 
    SecurityPolicy,
    BackendType
)
from .constants import LOG_LEVELS


class ConfigMixin:
    """Mixin to add utility methods for config access"""
    
    def get_value(self, key: str, default=None):
        """
        Safely get config value, works with both dict and Pydantic models.
        
        Args:
            key: Configuration key
            default: Default value if key not found
        """
        if isinstance(self, dict):
            return self.get(key, default)
        else:
            return getattr(self, key, default)


class BackendConfig(BaseModel, ConfigMixin):
    """Base backend configuration"""
    enabled: bool = Field(True, description="Whether the backend is enabled")
    timeout: int = Field(30, ge=1, le=300, description="Timeout in seconds")
    max_retries: int = Field(3, ge=0, le=10, description="Maximum retry attempts")


class ShellConfig(BackendConfig):
    """
    Shell backend configuration
    
    Attributes:
        enabled: Whether shell backend is enabled
        mode: Execution mode - "local" runs scripts in-process via subprocess,
              "server" connects to a running local_server via HTTP
        timeout: Default timeout for shell operations (seconds)
        max_retries: Maximum number of retry attempts for failed operations
        retry_interval: Wait time between retries (seconds)
        default_shell: Path to default shell executable
        working_dir: Default working directory for bash scripts
        env: Default environment variables for shell operations
        conda_env: Conda environment name to activate before execution (optional)
        default_port: Default port for shell server connection (only used in server mode)
    """
    mode: Literal["local", "server"] = Field("local", description="Execution mode: 'local' (in-process subprocess) or 'server' (HTTP local_server)")
    retry_interval: float = Field(3.0, ge=0.1, le=60.0, description="Wait time between retries in seconds")
    default_shell: str = Field("/bin/bash", description="Default shell path")
    working_dir: Optional[str] = Field(None, description="Default working directory for bash scripts")
    env: Dict[str, str] = Field(default_factory=dict, description="Default environment variables")
    conda_env: Optional[str] = Field(None, description="Conda environment name to activate (e.g., 'myenv')")
    default_port: int = Field(5000, ge=1, le=65535, description="Default port for shell server")
    
    @field_validator('default_shell')
    @classmethod
    def validate_shell(cls, v):
        if not v or not isinstance(v, str):
            raise ValueError("Shell path must be a non-empty string")
        return v
    
    @field_validator('working_dir')
    @classmethod
    def validate_working_dir(cls, v):
        if v is not None and not isinstance(v, str):
            raise ValueError("Working directory must be a string")
        return v

class WebConfig(BackendConfig):
    """
    Web backend configuration - AI Deep Research
    
    Attributes:
        enabled: Whether web backend is enabled
        timeout: Default timeout for web operations (seconds)
        max_retries: Maximum number of retry attempts
    
    Note:
        All web-specific parameters (API key, base URL) are loaded from 
        environment variables or use default values in WebSession:
        - OPENROUTER_API_KEY: API key for deep research (required)
        - Deep research base URL defaults to "https://openrouter.ai/api/v1"
    """
    pass


class MCPConfig(BackendConfig):
    """MCP backend configuration"""
    sandbox: bool = Field(False, description="Whether to enable sandbox")
    auto_initialize: bool = Field(True, description="Whether to auto initialize")
    eager_sessions: bool = Field(False, description="Whether to eagerly create sessions for all servers on initialization")
    retry_interval: float = Field(2.0, ge=0.1, le=60.0, description="Wait time between retries in seconds")
    servers: Dict[str, Dict[str, Any]] = Field(default_factory=dict, description="MCP servers configuration, loaded from config_mcp.json")
    sse_read_timeout: float = Field(300.0, ge=1.0, le=3600.0, description="SSE read timeout in seconds for HTTP/Sandbox connectors")


class GUIConfig(BackendConfig):
    """
    GUI backend configuration
    
    Attributes:
        mode: Execution mode - "local" runs GUI operations in-process,
              "server" connects to a running local_server via HTTP
    """
    mode: Literal["local", "server"] = Field("local", description="Execution mode: 'local' (in-process) or 'server' (HTTP local_server)")
    retry_interval: float = Field(5.0, ge=0.1, le=60.0, description="Wait time between retries in seconds")
    driver_type: str = Field("pyautogui", description="GUI driver type")
    failsafe: bool = Field(False, description="Whether to enable pyautogui failsafe mode")
    screenshot_on_error: bool = Field(True, description="Whether to capture screenshot on error")
    pkgs_prefix: str = Field(
        "import pyautogui; import time; pyautogui.FAILSAFE = {failsafe}; {command}",
        description="Python command prefix for pyautogui setup"
    )


class ToolSearchConfig(BaseModel):
    """Tool search and ranking configuration"""
    embedding_model: str = Field(
        "BAAI/bge-small-en-v1.5",
        description="Embedding model name for semantic search"
    )
    max_tools: int = Field(
        20,
        ge=1,
        le=1000,
        description="Maximum number of tools to return from search"
    )
    search_mode: str = Field(
        "hybrid",
        description="Default search mode: semantic, keyword, or hybrid"
    )
    enable_llm_filter: bool = Field(
        True,
        description="Whether to use LLM for backend/server filtering"
    )
    llm_filter_threshold: int = Field(
        50,
        ge=1,
        le=1000,
        description="Only apply LLM filter when tool count exceeds this threshold"
    )
    enable_cache_persistence: bool = Field(
        False,
        description="Whether to persist embeddings to disk"
    )
    cache_dir: Optional[str] = Field(
        None,
        description="Directory for embedding cache. None means use default <project_root>/.anytool/embedding_cache"
    )
    
    @field_validator('search_mode')
    @classmethod
    def validate_search_mode(cls, v):
        valid_modes = ['semantic', 'keyword', 'hybrid']
        if v.lower() not in valid_modes:
            raise ValueError(f"Search mode must be one of {valid_modes}, got: {v}")
        return v.lower()


class ToolQualityConfig(BaseModel):
    """Tool quality tracking configuration"""
    enabled: bool = Field(
        True,
        description="Whether to enable tool quality tracking"
    )
    enable_persistence: bool = Field(
        True,
        description="Whether to persist quality data to disk"
    )
    cache_dir: Optional[str] = Field(
        None,
        description="Directory for quality cache. None means use default <project_root>/.anytool/tool_quality"
    )
    auto_evaluate_descriptions: bool = Field(
        True,
        description="Whether to automatically evaluate tool descriptions using LLM"
    )
    enable_quality_ranking: bool = Field(
        True,
        description="Whether to incorporate quality scores in tool ranking"
    )
    evolve_interval: int = Field(
        5,
        ge=1,
        le=100,
        description="Trigger quality evolution every N tool executions"
    )


class GroundingConfig(BaseModel):
    """
    Main configuration for Grounding module.
    
    Contains configuration for all grounding backends and grounding-level settings.
    Note: Local server connection uses defaults or environment variables (LOCAL_SERVER_URL).
    """
    # Backend configurations
    shell: ShellConfig = Field(default_factory=ShellConfig)
    web: WebConfig = Field(default_factory=WebConfig)
    mcp: MCPConfig = Field(default_factory=MCPConfig)
    gui: GUIConfig = Field(default_factory=GUIConfig)
    system: BackendConfig = Field(default_factory=BackendConfig)
    
    # Grounding-level settings
    tool_search: ToolSearchConfig = Field(default_factory=ToolSearchConfig)
    tool_quality: ToolQualityConfig = Field(default_factory=ToolQualityConfig)
    
    enabled_backends: List[Dict[str, str]] = Field(
        default_factory=list,
        description="List of enabled backends, each item: {'name': str, 'provider_cls': str}"
    )
    
    session_defaults: SessionConfig = Field(
        default_factory=lambda: SessionConfig(
            session_name="",
            backend_type=BackendType.SHELL,
            timeout=30,
            auto_reconnect=True,
            health_check_interval=30
        )
    )
    
    tool_cache_ttl: int = Field(
        300,
        ge=1,
        le=3600,
        description="Tool cache time-to-live in seconds"
    )
    tool_cache_maxsize: int = Field(
        300,
        ge=1,
        le=10000,
        description="Maximum number of tool cache entries"
    )
    
    debug: bool = Field(False, description="Debug mode")
    log_level: str = Field("INFO", description="Log level")
    security_policies: Dict[str, Any] = Field(default_factory=dict)
    
    @field_validator('log_level')
    @classmethod
    def validate_log_level(cls, v):
        if v.upper() not in LOG_LEVELS:
            raise ValueError(f"Log level must be one of {LOG_LEVELS}, got: {v}")
        return v.upper()
    
    def get_backend_config(self, backend_type: str) -> BackendConfig:
        """Get configuration for specified backend"""
        name = backend_type.lower()
        if not hasattr(self, name):
            from anytool.utils.logging import Logger
            logger = Logger.get_logger(__name__)
            logger.warning(f"Unknown backend type: {backend_type}")
            return BackendConfig()
        return getattr(self, name)
    
    def get_security_policy(self, backend_type: str) -> SecurityPolicy:
        global_policy = self.security_policies.get("global", {})
        backend_policy = self.security_policies.get("backend", {}).get(backend_type.lower(), {})
        merged_policy = {**global_policy, **backend_policy}
        return SecurityPolicy.from_dict(merged_policy)


__all__ = [
    "BackendConfig",
    "ShellConfig",
    "WebConfig",
    "MCPConfig",
    "GUIConfig",
    "ToolSearchConfig",
    "ToolQualityConfig",
    "GroundingConfig",
]

================================================
FILE: anytool/config/loader.py
================================================
import threading
from pathlib import Path
from typing import Union, Iterable, Dict, Any, Optional

from .grounding import GroundingConfig
from .constants import (
    CONFIG_GROUNDING,
    CONFIG_SECURITY,
    CONFIG_DEV,
    CONFIG_MCP,
    CONFIG_AGENTS
)
from anytool.utils.logging import Logger
from .utils import load_json_file, save_json_file as save_json

logger = Logger.get_logger(__name__)


CONFIG_DIR = Path(__file__).parent

# Global configuration singleton
_config: GroundingConfig | None = None
_config_lock = threading.RLock()  # Use RLock to support recursive locking


def _deep_merge_dict(base: dict, update: dict) -> dict:
    """Deep merge two dictionaries, update's values will override base's values"""
    result = base.copy()
    for key, value in update.items():
        if key in result and isinstance(result[key], dict) and isinstance(value, dict):
            result[key] = _deep_merge_dict(result[key], value)
        else:
            result[key] = value
    return result

def _load_json_file(path: Path) -> Dict[str, Any]:
    """Load single JSON configuration file.
    
    This function wraps the generic load_json_file and adds global configuration specific error handling and logging.
    """
    if not path.exists():
        logger.debug(f"Configuration file does not exist, skipping: {path}")
        return {}
    
    try:
        data = load_json_file(path)
        logger.info(f"Loaded configuration file: {path}")
        return data
    except Exception as e:
        logger.warning(f"Failed to load configuration file {path}: {e}")
        return {}

def _load_multiple_files(paths: Iterable[Path]) -> Dict[str, Any]:
    """Load configuration from multiple files"""
    merged = {}
    for path in paths:
        data = _load_json_file(path)
        if data:
            merged = _deep_merge_dict(merged, data)
    return merged

def load_config(*config_paths: Union[str, Path]) -> GroundingConfig:
    """
    Load configuration files
    """
    global _config
    
    with _config_lock:
        if config_paths:
            paths = [Path(p) for p in config_paths]
        else:
            paths = [
                CONFIG_DIR / CONFIG_GROUNDING,
                CONFIG_DIR / CONFIG_SECURITY,
                CONFIG_DIR / CONFIG_DEV,  # Optional: development environment configuration
            ]
        
        # Load and merge configuration
        raw_data = _load_multiple_files(paths)
        
        # Load MCP configuration (separate processing)
        # Check if mcpServers already provided in merged custom configs
        has_custom_mcp_servers = "mcpServers" in raw_data
        
        if has_custom_mcp_servers:
            # Use mcpServers from custom config
            if "mcp" not in raw_data:
                raw_data["mcp"] = {}
            raw_data["mcp"]["servers"] = raw_data.pop("mcpServers")
            logger.debug(f"Using custom MCP servers from provided config ({len(raw_data['mcp']['servers'])} servers)")
        else:
            # Load default MCP servers from config_mcp.json
            mcp_data = _load_json_file(CONFIG_DIR / CONFIG_MCP)
            if mcp_data and "mcpServers" in mcp_data:
                if "mcp" not in raw_data:
                    raw_data["mcp"] = {}
                raw_data["mcp"]["servers"] = mcp_data["mcpServers"]
                logger.debug(f"Loaded MCP servers from default config_mcp.json ({len(raw_data['mcp']['servers'])} servers)")
        
        # Validate and create configuration object
        try:
            _config = GroundingConfig.model_validate(raw_data)
        except Exception as e:
            logger.error(f"Validation failed, using default configuration: {e}")
            _config = GroundingConfig()
        
        # Adjust log level according to configuration
        if _config.debug:
            Logger.set_debug(2)
        elif _config.log_level:
            try:
                Logger.configure(level=_config.log_level)
            except Exception as e:
                logger.warning(f"Failed to set log level {_config.log_level}: {e}")
    
    return _config

def get_config() -> GroundingConfig:
    """
    Get global configuration instance.
    
    Usage:
        - Get configuration in Provider: get_config().get_backend_config('shell')
        - Get security policy in Tool: get_config().get_security_policy('shell')
    """
    global _config
    
    if _config is None:
        with _config_lock:
            if _config is None:
                load_config()
    
    return _config

def reset_config() -> None:
    """Reset configuration (for testing)"""
    global _config
    with _config_lock:
        _config = None

def save_config(config: GroundingConfig, path: Union[str, Path]) -> None:
    save_json(config.model_dump(), path)
    logger.info(f"Configuration saved to: {path}")


def load_agents_config() -> Dict[str, Any]:
    agents_config_path = CONFIG_DIR / CONFIG_AGENTS
    return _load_json_file(agents_config_path)


def get_agent_config(agent_name: str) -> Optional[Dict[str, Any]]:
    """
    Get the configuration of the specified agent
    """
    agents_config = load_agents_config()
    
    if "agents" not in agents_config:
        logger.warning(f"No 'agents' key found in {CONFIG_AGENTS}")
        return None
    
    for agent_cfg in agents_config.get("agents", []):
        if agent_cfg.get("name") == agent_name:
            return agent_cfg
    
    logger.warning(f"Agent '{agent_name}' not found in {CONFIG_AGENTS}")
    return None


__all__ = [
    "CONFIG_DIR",
    "load_config",
    "get_config",
    "reset_config",
    "save_config",
    "load_agents_config",
    "get_agent_config"
]

================================================
FILE: anytool/config/utils.py
================================================
import json
from pathlib import Path
from typing import Any


def get_config_value(config: Any, key: str, default=None):
    if isinstance(config, dict):
        return config.get(key, default)
    else:
        return getattr(config, key, default)


def load_json_file(filepath: str | Path) -> dict[str, Any]:
    filepath = Path(filepath) if isinstance(filepath, str) else filepath
    
    with open(filepath, 'r', encoding='utf-8') as f:
        return json.load(f)


def save_json_file(data: dict[str, Any], filepath: str | Path, indent: int = 2) -> None:
    filepath = Path(filepath) if isinstance(filepath, str) else filepath
        
    # Ensure directory exists
    filepath.parent.mkdir(parents=True, exist_ok=True)
    
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=indent, ensure_ascii=False)


__all__ = ["get_config_value", "load_json_file", "save_json_file"]

================================================
FILE: anytool/grounding/backends/__init__.py
================================================
# Use lazy imports to avoid loading all backends unconditionally

def _lazy_import_provider(provider_name: str):
    """Lazy import provider class"""
    if provider_name == 'mcp':
        from .mcp.provider import MCPProvider
        return MCPProvider
    elif provider_name == 'shell':
        from .shell.provider import ShellProvider
        return ShellProvider
    elif provider_name == 'web':
        from .web.provider import WebProvider
        return WebProvider
    elif provider_name == 'gui':
        from .gui.provider import GUIProvider
        return GUIProvider
    else:
        raise ImportError(f"Unknown provider: {provider_name}")


class _ProviderRegistry:
    """Lazy provider registry"""
    def __getitem__(self, key):
        return _lazy_import_provider(key)
    
    def __contains__(self, key):
        return key in ['mcp', 'shell', 'web', 'gui']

BACKEND_PROVIDERS = _ProviderRegistry()

__all__ = [
    'BACKEND_PROVIDERS',
    '_lazy_import_provider'
]

================================================
FILE: anytool/grounding/backends/gui/__init__.py
================================================
from .provider import GUIProvider
from .session import GUISession
from .transport.connector import GUIConnector
from .transport.local_connector import LocalGUIConnector

try:
    from .anthropic_client import AnthropicGUIClient
    from . import anthropic_utils
    _anthropic_available = True
except ImportError:
    _anthropic_available = False

__all__ = [
    # Core Provider and Session
    "GUIProvider",
    "GUISession",
    
    # Transport layer
    "GUIConnector",
    "LocalGUIConnector",
]

# Add Anthropic modules to exports if available
if _anthropic_available:
    __all__.extend(["AnthropicGUIClient", "anthropic_utils"])

================================================
FILE: anytool/grounding/backends/gui/anthropic_client.py
================================================
import base64
import os
import time
from typing import Any, Dict, Optional, Tuple, List
from anytool.utils.logging import Logger
from PIL import Image
import io

logger = Logger.get_logger(__name__)

try:
    from anthropic import (
        Anthropic,
        AnthropicBedrock,
        AnthropicVertex,
        APIError,
        APIResponseValidationError,
        APIStatusError,
    )
    from anthropic.types.beta import (
        BetaMessageParam,
        BetaTextBlockParam,
    )
    ANTHROPIC_AVAILABLE = True
except ImportError:
    logger.warning("Anthropic SDK not available. Install with: pip install anthropic")
    ANTHROPIC_AVAILABLE = False

# Import utility functions
from .anthropic_utils import (
    APIProvider,
    PROVIDER_TO_DEFAULT_MODEL_NAME,
    COMPUTER_USE_BETA_FLAG,
    PROMPT_CACHING_BETA_FLAG,
    get_system_prompt,
    inject_prompt_caching,
    maybe_filter_to_n_most_recent_images,
    response_to_params,
)

# API retry configuration
API_RETRY_TIMES = 10
API_RETRY_INTERVAL = 5  # seconds


class AnthropicGUIClient:
    """
    Anthropic LLM Client for GUI operations.
    Uses Claude Sonnet 4.5 with computer-use-2025-01-24 API.
    
    Features:
    - Vision-based screen understanding
    - Automatic screenshot resizing (configurable display size)
    - Coordinate scaling between display and actual screen
    """
    
    def __init__(
        self,
        model: str = "claude-sonnet-4-5",
        platform: str = "Ubuntu",
        api_key: Optional[str] = None,
        provider: str = "anthropic",
        max_tokens: int = 4096,
        screen_size: Tuple[int, int] = (1920, 1080),
        display_size: Tuple[int, int] = (1024, 768),  # Computer use display size
        pyautogui_size: Optional[Tuple[int, int]] = None,  # PyAutoGUI working size
        only_n_most_recent_images: int = 3,
        enable_prompt_caching: bool = True,
        backup_api_key: Optional[str] = None,
    ):
        """
        Initialize Anthropic GUI Client for Claude Sonnet 4.5.
        
        Args:
            model: Model name (only "claude-sonnet-4-5" supported)
            platform: Platform type (Ubuntu, Windows, or macOS)
            api_key: Anthropic API key (defaults to ANTHROPIC_API_KEY env var)
            provider: API provider (only "anthropic" supported)
            max_tokens: Maximum tokens for response
            screen_size: Actual screenshot resolution (width, height) - physical pixels
            display_size: Display size for computer use tool (width, height)
                         Screenshots will be resized to this size before sending to API
            pyautogui_size: PyAutoGUI working size (logical pixels). If None, assumed same as screen_size.
                           On Retina/HiDPI displays, this may be screen_size / 2
            only_n_most_recent_images: Number of recent screenshots to keep in history
            enable_prompt_caching: Whether to enable prompt caching for cost optimization
            backup_api_key: Backup API key (defaults to ANTHROPIC_API_KEY_BACKUP env var)
        """
        if not ANTHROPIC_AVAILABLE:
            raise RuntimeError("Anthropic SDK not installed. Install with: pip install anthropic")
        
        # Only support claude-sonnet-4-5
        if model != "claude-sonnet-4-5":
            logger.warning(f"Model '{model}' not supported. Using 'claude-sonnet-4-5'")
            model = "claude-sonnet-4-5"
        
        self.model = model
        self.platform = platform
        self.api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
        if not self.api_key:
            raise ValueError("Anthropic API key not provided. Set ANTHROPIC_API_KEY env var or pass api_key parameter")
        
        # Backup API key for failover
        self.backup_api_key = backup_api_key or os.environ.get("ANTHROPIC_API_KEY_BACKUP")
        
        # Only support anthropic provider
        if provider != "anthropic":
            logger.warning(f"Provider '{provider}' not supported. Using 'anthropic'")
            provider = "anthropic"
        
        self.provider = APIProvider(provider)
        self.max_tokens = max_tokens
        self.screen_size = screen_size
        self.display_size = display_size
        self.pyautogui_size = pyautogui_size or screen_size  # Default to screen_size if not specified
        self.only_n_most_recent_images = only_n_most_recent_images
        self.enable_prompt_caching = enable_prompt_caching
        
        # Message history
        self.messages: List[BetaMessageParam] = []
        
        # Calculate resize factor for coordinate scaling
        # Step 1: LLM coordinates (display_size) -> Physical pixels (screen_size)
        # Step 2: Physical pixels -> PyAutoGUI logical pixels (pyautogui_size)
        self.resize_factor = (
            self.pyautogui_size[0] / display_size[0],  # x scale factor
            self.pyautogui_size[1] / display_size[1]   # y scale factor
        )
        
        logger.info(
            f"Initialized AnthropicGUIClient:\n"
            f"  Model: {model}\n"
            f"  Platform: {platform}\n"
            f"  Screen Size (physical): {screen_size}\n"
            f"  PyAutoGUI Size (logical): {self.pyautogui_size}\n"
            f"  Display Size (LLM): {display_size}\n"
            f"  Resize Factor (LLM->PyAutoGUI): {self.resize_factor}\n"
            f"  Prompt Caching: {enable_prompt_caching}"
        )
    
    def _create_client(self, api_key: Optional[str] = None):
        """Create Anthropic client (only supports anthropic provider)."""
        key = api_key or self.api_key
        return Anthropic(api_key=key, max_retries=4)
    
    def _resize_screenshot(self, screenshot_bytes: bytes) -> bytes:
        """
        Resize screenshot to display size for Computer Use API.
        
        For computer-use-2025-01-24, the screenshot must be resized to the
        display_width_px x display_height_px specified in the tool definition.
        """
        screenshot_image = Image.open(io.BytesIO(screenshot_bytes))
        resized_image = screenshot_image.resize(self.display_size, Image.Resampling.LANCZOS)
        
        output_buffer = io.BytesIO()
        resized_image.save(output_buffer, format='PNG')
        return output_buffer.getvalue()
    
    def _scale_coordinates(self, x: int, y: int) -> Tuple[int, int]:
        """
        Scale coordinates from display size to actual screen size.
        
        The API returns coordinates in display_size (e.g., 1024x768).
        We need to scale them to actual screen_size (e.g., 1920x1080) for execution.
        
        Args:
            x, y: Coordinates in display size space
            
        Returns:
            Scaled coordinates in actual screen size space
        """
        scaled_x = int(x * self.resize_factor[0])
        scaled_y = int(y * self.resize_factor[1])
        return scaled_x, scaled_y
    
    async def plan_action(
        self,
        task_description: str,
        screenshot: bytes,
        action_history: List[Dict[str, Any]] = None,
    ) -> Tuple[Optional[str], List[str]]:
        """
        Plan next action based on task and current screenshot.
        Includes prompt caching, error handling, and backup API key support.
        
        Args:
            task_description: Task to accomplish
            screenshot: Current screenshot (PNG bytes)
            action_history: Previous actions (for context)
        
        Returns:
            Tuple of (reasoning, list of pyautogui commands)
        """
        # Resize screenshot
        resized_screenshot = self._resize_screenshot(screenshot)
        screenshot_b64 = base64.b64encode(resized_screenshot).decode('utf-8')
        
        # Initialize messages with first task + screenshot
        if not self.messages:
            # IMPORTANT: Image should come BEFORE text for better model understanding
            # This matches OSWorld's implementation which has proven effectiveness
            self.messages.append({
                "role": "user",
                "content": [
                    {
                        "type": "image",
                        "source": {
                            "type": "base64",
                            "media_type": "image/png",
                            "data": screenshot_b64,
                        },
                    },
                    {"type": "text", "text": task_description},
                ]
            })
        
        # Filter images BEFORE adding new screenshot to control message size
        # This is critical to avoid exceeding the 25MB API limit
        image_truncation_threshold = 10
        if self.only_n_most_recent_images and len(self.messages) > 1:
            # Reserve 1 slot for the screenshot we're about to add
            maybe_filter_to_n_most_recent_images(
                self.messages,
                max(1, self.only_n_most_recent_images - 1),
                min_removal_threshold=1,  # More aggressive filtering
            )
        
        # Add tool result from previous action if exists
        if self.messages and self.messages[-1]["role"] == "assistant":
            last_content = self.messages[-1]["content"]
            if isinstance(last_content, list) and any(
                block.get("type") == "tool_use" for block in last_content
            ):
                tool_use_id = next(
                    block["id"] for block in last_content 
                    if block.get("type") == "tool_use"
                )
                self._add_tool_result(tool_use_id, "Success", resized_screenshot)
        
        # Define tools and betas for claude-sonnet-4-5 with computer-use-2025-01-24
        tools = [{
            'name': 'computer',
            'type': 'computer_20250124',
            'display_width_px': self.display_size[0],
            'display_height_px': self.display_size[1],
            'display_number': 1
        }]
        betas = [COMPUTER_USE_BETA_FLAG]
        
        # Prepare system prompt with optional caching
        system = BetaTextBlockParam(
            type="text",
            text=get_system_prompt(self.platform)
        )
        
        # Enable prompt caching if supported and enabled
        if self.enable_prompt_caching:
            betas.append(PROMPT_CACHING_BETA_FLAG)
            inject_prompt_caching(self.messages)
            system["cache_control"] = {"type": "ephemeral"}  # type: ignore
        
        # Model name - use claude-sonnet-4-5 directly
        model_name = "claude-sonnet-4-5"
        
        # Enable thinking for complex computer use tasks
        extra_body = {"thinking": {"type": "enabled", "budget_tokens": 2048}}
        
        # Log request details for debugging
        # Count current images in messages
        total_images = sum(
            1
            for message in self.messages
            for item in (message.get("content", []) if isinstance(message.get("content"), list) else [])
            if isinstance(item, dict) and item.get("type") == "image"
        )
        tool_result_images = sum(
            1
            for message in self.messages
            for item in (message.get("content", []) if isinstance(message.get("content"), list) else [])
            if isinstance(item, dict) and item.get("type") == "tool_result"
            for content in item.get("content", [])
            if isinstance(content, dict) and content.get("type") == "image"
        )
        logger.info(
            f"Anthropic API request:\n"
            f"  Model: {model_name}\n"
            f"  Display Size: {self.display_size}\n"
            f"  Betas: {betas}\n"
            f"  Images: {total_images} ({tool_result_images} in tool_results)\n"
            f"  Messages: {len(self.messages)}"
        )
        
        # Try API call with retry and backup
        client = self._create_client()
        response = None
        
        try:
            # Retry loop with automatic image count reduction on 25MB error
            for attempt in range(API_RETRY_TIMES):
                try:
                    response = client.beta.messages.create(
                        max_tokens=self.max_tokens,
                        messages=self.messages,
                        model=model_name,
                        system=[system],
                        tools=tools,
                        betas=betas,
                        extra_body=extra_body
                    )
                    logger.info(f"API call succeeded on attempt {attempt + 1}")
                    break
                    
                except (APIError, APIStatusError, APIResponseValidationError) as e:
                    error_msg = str(e)
                    logger.warning(f"Anthropic API error (attempt {attempt+1}/{API_RETRY_TIMES}): {error_msg}")
                    
                    # Handle 25MB payload limit error (including HTTP 413)
                    if ("25000000" in error_msg or 
                        "Member must have length less than or equal to" in error_msg or 
                        "request_too_large" in error_msg or 
                        "413" in str(e)):
                        logger.warning("Detected 25MB limit error, reducing image count")
                        current_count = self.only_n_most_recent_images
                        new_count = max(1, current_count // 2)
                        self.only_n_most_recent_images = new_count
                        
                        maybe_filter_to_n_most_recent_images(
                            self.messages,
                            new_count,
                            min_removal_threshold=1,  # Aggressive filtering when hitting limit
                        )
                        logger.info(f"Image count reduced from {current_count} to {new_count}")
                    
                    if attempt < API_RETRY_TIMES - 1:
                        time.sleep(API_RETRY_INTERVAL)
                    else:
                        raise
        
        except (APIError, APIStatusError, APIResponseValidationError) as e:
            logger.error(f"Primary API key failed: {e}")
            
            # Try backup API key if available
            if self.backup_api_key:
                logger.warning("Retrying with backup API key...")
                try:
                    backup_client = self._create_client(self.backup_api_key)
                    response = backup_client.beta.messages.create(
                        max_tokens=self.max_tokens,
                        messages=self.messages,
                        model=model_name,
                        system=[system],
                        tools=tools,
                        betas=betas,
                        extra_body=extra_body
                    )
                    logger.info("Successfully used backup API key")
                except Exception as backup_e:
                    logger.error(f"Backup API key also failed: {backup_e}")
                    return None, ["FAIL"]
            else:
                return None, ["FAIL"]
        
        except Exception as e:
            logger.error(f"Unexpected error: {e}")
            return None, ["FAIL"]
        
        if not response:
            return None, ["FAIL"]
        
        # Parse response using utility function
        response_params = response_to_params(response)
        
        # Extract reasoning and commands
        reasoning = ""
        commands = []
        
        for block in response_params:
            block_type = block.get("type")
            
            if block_type == "text":
                reasoning = block.get("text", "")
            elif block_type == "thinking":
                reasoning = block.get("thinking", "")
            elif block_type == "tool_use":
                tool_input = block.get("input", {})
                command = self._parse_computer_tool_use(tool_input)
                if command:
                    commands.append(command)
                else:
                    logger.warning(f"Failed to parse tool_use: {tool_input}")
        
        # Store assistant response
        self.messages.append({
            "role": "assistant",
            "content": response_params
        })
        
        logger.info(f"Parsed {len(commands)} commands from response")
        
        return reasoning, commands
    
    def _add_tool_result(
        self,
        tool_use_id: str,
        result: str,
        screenshot_bytes: Optional[bytes] = None
    ):
        """
        Add tool result to message history.
        IMPORTANT: Put screenshot BEFORE text for consistency with initial message.
        """
        # Build content list with image first (if provided), then text
        content_list = []
        
        # Add screenshot first if provided (consistent with initial message ordering)
        if screenshot_bytes is not None:
            screenshot_b64 = base64.b64encode(screenshot_bytes).decode('utf-8')
            content_list.append({
                "type": "image",
                "source": {
                    "type": "base64",
                    "media_type": "image/png",
                    "data": screenshot_b64
                }
            })
        
        # Then add text result
        content_list.append({"type": "text", "text": result})
        
        tool_result_content = [{
            "type": "tool_result",
            "tool_use_id": tool_use_id,
            "content": content_list
        }]
        
        self.messages.append({
            "role": "user",
            "content": tool_result_content
        })
    
    def _parse_computer_tool_use(self, tool_input: Dict[str, Any]) -> Optional[str]:
        """
        Parse Anthropic computer tool use to pyautogui command.
        
        Args:
            tool_input: Tool input from Anthropic (action, coordinate, text, etc.)
        
        Returns:
            PyAutoGUI command string or control command (DONE, FAIL)
        """
        action = tool_input.get("action")
        if not action:
            return None
        
        # Action conversion
        action_conversion = {
            "left click": "click",
            "right click": "right_click"
        }
        action = action_conversion.get(action, action)
        
        text = tool_input.get("text")
        coordinate = tool_input.get("coordinate")
        scroll_direction = tool_input.get("scroll_direction")
        scroll_amount = tool_input.get("scroll_amount", 5)
        
        # Scale coordinates to actual screen size
        if coordinate:
            coordinate = self._scale_coordinates(coordinate[0], coordinate[1])
        
        # Build commands
        command = ""
        
        if action == "mouse_move":
            if coordinate:
                x, y = coordinate
                command = f"pyautogui.moveTo({x}, {y}, duration=0.5)"
        
        elif action in ("left_click", "click"):
            if coordinate:
                x, y = coordinate
                command = f"pyautogui.click({x}, {y})"
            else:
                command = "pyautogui.click()"
        
        elif action == "right_click":
            if coordinate:
                x, y = coordinate
                command = f"pyautogui.rightClick({x}, {y})"
            else:
                command = "pyautogui.rightClick()"
        
        elif action == "double_click":
            if coordinate:
                x, y = coordinate
                command = f"pyautogui.doubleClick({x}, {y})"
            else:
                command = "pyautogui.doubleClick()"
        
        elif action == "middle_click":
            if coordinate:
                x, y = coordinate
                command = f"pyautogui.middleClick({x}, {y})"
            else:
                command = "pyautogui.middleClick()"
        
        elif action == "left_click_drag":
            if coordinate:
                x, y = coordinate
                command = f"pyautogui.dragTo({x}, {y}, duration=0.5)"
        
        elif action == "key":
            if text:
                keys = text.split('+')
                # Key conversion
                key_conversion = {
                    "page_down": "pagedown",
                    "page_up": "pageup",
                    "super_l": "win",
                    "super": "command",
                    "escape": "esc"
                }
                converted_keys = [key_conversion.get(k.strip().lower(), k.strip().lower()) for k in keys]
                
                # Press and release keys
                for key in converted_keys:
                    command += f"pyautogui.keyDown('{key}'); "
                for key in reversed(converted_keys):
                    command += f"pyautogui.keyUp('{key}'); "
                # Remove trailing semicolon and space
                command = command.rstrip('; ')
        
        elif action == "type":
            if text:
                command = f"pyautogui.typewrite({repr(text)}, interval=0.01)"
        
        elif action == "scroll":
            if scroll_direction in ("up", "down"):
                scroll_value = scroll_amount if scroll_direction == "up" else -scroll_amount
                if coordinate:
                    x, y = coordinate
                    command = f"pyautogui.scroll({scroll_value}, {x}, {y})"
                else:
                    command = f"pyautogui.scroll({scroll_value})"
            elif scroll_direction in ("left", "right"):
                scroll_value = scroll_amount if scroll_direction == "right" else -scroll_amount
                if coordinate:
                    x, y = coordinate
                    command = f"pyautogui.hscroll({scroll_value}, {x}, {y})"
                else:
                    command = f"pyautogui.hscroll({scroll_value})"
        
        elif action == "screenshot":
            # Screenshot is automatically handled by the system
            # Return special marker to indicate no action needed
            return "SCREENSHOT"
        
        elif action == "wait":
            # Wait for specified duration
            duration = tool_input.get("duration", 1)
            command = f"pyautogui.sleep({duration})"
        
        elif action == "done":
            return "DONE"
        
        elif action == "fail":
            return "FAIL"
        
        return command if command else None
    
    def reset(self):
        """Reset message history."""
        self.messages = []
        logger.info("Reset AnthropicGUIClient message history")

================================================
FILE: anytool/grounding/backends/gui/anthropic_utils.py
================================================
from typing import List, cast
from enum import Enum
from datetime import datetime
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)

try:
    from anthropic.types.beta import (
        BetaCacheControlEphemeralParam,
        BetaContentBlockParam,
        BetaImageBlockParam,
        BetaMessage,
        BetaMessageParam,
        BetaTextBlock,
        BetaTextBlockParam,
        BetaToolResultBlockParam,
        BetaToolUseBlockParam,
    )
    ANTHROPIC_AVAILABLE = True
except ImportError:
    ANTHROPIC_AVAILABLE = False


# Beta flags
# For claude-sonnet-4-5 with computer-use-2025-01-24
COMPUTER_USE_BETA_FLAG = "computer-use-2025-01-24"
PROMPT_CACHING_BETA_FLAG = "prompt-caching-2024-07-31"


class APIProvider(Enum):
    """API Provider enumeration"""
    ANTHROPIC = "anthropic"
    # BEDROCK = "bedrock"
    # VERTEX = "vertex"


# Provider to model name mapping (simplified for claude-sonnet-4-5 only)
PROVIDER_TO_DEFAULT_MODEL_NAME: dict = {
    (APIProvider.ANTHROPIC, "claude-sonnet-4-5"): "claude-sonnet-4-5",
    # (APIProvider.BEDROCK, "claude-sonnet-4-5"): "us.anthropic.claude-sonnet-4-5-v1:0",
    # (APIProvider.VERTEX, "claude-sonnet-4-5"): "claude-sonnet-4-5-v1",
}


def get_system_prompt(platform: str = "Ubuntu") -> str:
    """
    Get system prompt based on platform.
    
    Args:
        platform: Platform type (Ubuntu, Windows, macOS, or Darwin)
    
    Returns:
        System prompt string
    """
    # Normalize platform name
    platform_lower = platform.lower()
    
    if platform_lower in ["windows", "win32"]:
        return f"""<SYSTEM_CAPABILITY>
* You are utilising a Windows virtual machine using x86_64 architecture with internet access.
* You can use the computer tool to interact with the desktop: take screenshots, click, type, and control applications.
* To accomplish tasks, you MUST use the computer tool to see the screen and take actions.
* To open browser, please just click on the Chrome icon. Note, Chrome is what is installed on your system.
* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
* DO NOT ask users for clarification during task execution. DO NOT stop to request more information from users. Always take action using available tools.
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
* Home directory of this Windows system is 'C:\\Users\\user'.
* When you want to open some applications on Windows, please use Double Click on it instead of clicking once.
* After each action, the system will provide you with a new screenshot showing the result.
* Continue taking actions until the task is complete.
</SYSTEM_CAPABILITY>"""
    elif platform_lower in ["macos", "darwin", "mac"]:
        return f"""<SYSTEM_CAPABILITY>
* You are utilising a macOS system with internet access.
* You can use the computer tool to interact with the desktop: take screenshots, click, type, and control applications.
* To accomplish tasks, you MUST use the computer tool to see the screen and take actions.
* To open browser, please just click on the Chrome icon. Note, Chrome is what is installed on your system.
* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
* DO NOT ask users for clarification during task execution. DO NOT stop to request more information from users. Always take action using available tools.
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
* Home directory of this macOS system is typically '/Users/[username]' or can be accessed via '~'.
* On macOS, use Command (⌘) key combinations instead of Ctrl (e.g., Command+C for copy).
* After each action, the system will provide you with a new screenshot showing the result.
* Continue taking actions until the task is complete.
* When the task is completed, simply describe what you've done in your response WITHOUT using the tool again.
</SYSTEM_CAPABILITY>"""
    else:  # Ubuntu/Linux
        return f"""<SYSTEM_CAPABILITY>
* You are utilising an Ubuntu virtual machine using x86_64 architecture with internet access.
* You can use the computer tool to interact with the desktop: take screenshots, click, type, and control applications.
* To accomplish tasks, you MUST use the computer tool to see the screen and take actions.
* To open browser, please just click on the Chrome icon. Note, Chrome is what is installed on your system.
* When viewing a page it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
* DO NOT ask users for clarification during task execution. DO NOT stop to request more information from users. Always take action using available tools.
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
* The current date is {datetime.today().strftime('%A, %B %d, %Y')}.
* Home directory of this Ubuntu system is '/home/user'.
* After each action, the system will provide you with a new screenshot showing the result.
* Continue taking actions until the task is complete.
</SYSTEM_CAPABILITY>"""


def inject_prompt_caching(messages: List[BetaMessageParam]) -> None:
    """
    Set cache breakpoints for the 3 most recent turns.
    One cache breakpoint is left for tools/system prompt, to be shared across sessions.
    
    Args:
        messages: Message history (modified in place)
    """
    if not ANTHROPIC_AVAILABLE:
        return
    
    breakpoints_remaining = 3
    for message in reversed(messages):
        if message["role"] == "user" and isinstance(
            content := message["content"], list
        ):
            if breakpoints_remaining:
                breakpoints_remaining -= 1
                # Use type ignore to bypass TypedDict check until SDK types are updated
                content[-1]["cache_control"] = BetaCacheControlEphemeralParam(  # type: ignore
                    {"type": "ephemeral"}
                )
            else:
                content[-1].pop("cache_control", None)
                # we'll only ever have one extra turn per loop
                break


def maybe_filter_to_n_most_recent_images(
    messages: List[BetaMessageParam],
    images_to_keep: int,
    min_removal_threshold: int,
) -> None:
    """
    With the assumption that images are screenshots that are of diminishing value as
    the conversation progresses, remove all but the final `images_to_keep` tool_result
    images in place, with a chunk of min_removal_threshold to reduce the amount we
    break the implicit prompt cache.
    
    Args:
        messages: Message history (modified in place)
        images_to_keep: Number of recent images to keep
        min_removal_threshold: Minimum number of images to remove at once (for cache efficiency)
    """
    if not ANTHROPIC_AVAILABLE or images_to_keep is None:
        return
    
    tool_result_blocks = cast(
        list[BetaToolResultBlockParam],
        [
            item
            for message in messages
            for item in (
                message["content"] if isinstance(message["content"], list) else []
            )
            if isinstance(item, dict) and item.get("type") == "tool_result"
        ],
    )
    
    total_images = sum(
        1
        for tool_result in tool_result_blocks
        for content in tool_result.get("content", [])
        if isinstance(content, dict) and content.get("type") == "image"
    )
    
    images_to_remove = total_images - images_to_keep
    # for better cache behavior, we want to remove in chunks
    images_to_remove -= images_to_remove % min_removal_threshold
    
    for tool_result in tool_result_blocks:
        if isinstance(tool_result.get("content"), list):
            new_content = []
            for content in tool_result.get("content", []):
                if isinstance(content, dict) and content.get("type") == "image":
                    if images_to_remove > 0:
                        images_to_remove -= 1
                        continue
                new_content.append(content)
            tool_result["content"] = new_content


def response_to_params(response: BetaMessage) -> List[BetaContentBlockParam]:
    """
    Convert Anthropic response to parameter list.
    Handles both text blocks, tool use blocks, and thinking blocks.
    
    Args:
        response: Anthropic API response
    
    Returns:
        List of content blocks
    """
    if not ANTHROPIC_AVAILABLE:
        return []
    
    res: List[BetaContentBlockParam] = []
    if response.content:
        for block in response.content:
            # Check block type using type attribute
            # Note: type may be a string or enum, so convert to string for comparison
            block_type = str(getattr(block, "type", ""))
            
            if block_type == "text":
                # Regular text block
                if isinstance(block, BetaTextBlock) and block.text:
                    res.append(BetaTextBlockParam(type="text", text=block.text))
            elif block_type == "thinking":
                # Thinking block (for Claude 4 and Sonnet 3.7)
                thinking_block = {
                    "type": "thinking",
                    "thinking": getattr(block, "thinking", ""),
                }
                if hasattr(block, "signature"):
                    thinking_block["signature"] = getattr(block, "signature", None)
                res.append(cast(BetaContentBlockParam, thinking_block))
            elif block_type == "tool_use":
                # Tool use block - only include required fields to avoid API errors
                # (e.g., 'caller' field is not permitted by Anthropic API)
                tool_use_dict = {
                    "type": "tool_use",
                    "id": block.id,
                    "name": block.name,
                    "input": block.input,
                }
                res.append(cast(BetaToolUseBlockParam, tool_use_dict))
            else:
                # Unknown block type - try to handle generically
                try:
                    res.append(cast(BetaContentBlockParam, block.model_dump()))
                except Exception as e:
                    logger.warning(f"Failed to parse block type {block_type}: {e}")
        return res
    else:
        return []


================================================
FILE: anytool/grounding/backends/gui/config.py
================================================
from typing import Dict, Any, Optional
import os
import platform as platform_module
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


def build_llm_config(user_config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
    """
    Build complete LLM configuration with auto-detection and environment variables.
    
    Auto-detects:
    - API key from environment variables (ANTHROPIC_API_KEY)
    - Platform from system (macOS/Windows/Ubuntu)
    - Provider defaults to 'anthropic'
    
    User-provided config values will override auto-detected values.
    
    Args:
        user_config: User-provided configuration (optional)
        
    Returns:
        Complete LLM configuration dict
        
    Example:
        >>> # Auto-detect everything
        >>> config = build_llm_config()
        
        >>> # Override specific values
        >>> config = build_llm_config({
        ...     "model": "claude-3-5-sonnet-20241022",
        ...     "max_tokens": 8192
        ... })
    """
    if user_config is None:
        user_config = {}
    
    # Auto-detect platform
    system = platform_module.system()
    if system == "Darwin":
        detected_platform = "macOS"
    elif system == "Windows":
        detected_platform = "Windows"
    else:  # Linux
        detected_platform = "Ubuntu"
    
    # Auto-detect API key from environment
    api_key = os.environ.get("ANTHROPIC_API_KEY")
    if not api_key:
        logger.warning(
            "ANTHROPIC_API_KEY not found in environment. "
            "Please set it: export ANTHROPIC_API_KEY='your-key'"
        )
    
    # Build configuration with precedence: user_config > auto-detected > defaults
    config = {
        "type": user_config.get("type", "anthropic"),
        "model": user_config.get("model", "claude-sonnet-4-5"),
        "platform": user_config.get("platform", detected_platform),
        "api_key": user_config.get("api_key", api_key),
        "provider": user_config.get("provider", "anthropic"),
        "max_tokens": user_config.get("max_tokens", 4096),
        "only_n_most_recent_images": user_config.get("only_n_most_recent_images", 3),
        "enable_prompt_caching": user_config.get("enable_prompt_caching", True),
    }
    
    # Optional: screen_size (will be auto-detected from screenshot later)
    if "screen_size" in user_config:
        config["screen_size"] = user_config["screen_size"]
    
    logger.info(f"Built LLM config - Platform: {config['platform']}, Model: {config['model']}")
    if config["api_key"]:
        logger.info(f"API key loaded: {config['api_key'][:10]}...")
    
    return config

================================================
FILE: anytool/grounding/backends/gui/provider.py
================================================
from typing import Dict, Any, Union
from anytool.grounding.core.types import BackendType, SessionConfig
from anytool.grounding.core.provider import Provider
from anytool.grounding.core.session import BaseSession
from anytool.config import get_config
from anytool.config.utils import get_config_value
from anytool.platform import get_local_server_config
from anytool.utils.logging import Logger
from .transport.connector import GUIConnector
from .transport.local_connector import LocalGUIConnector
from .session import GUISession

logger = Logger.get_logger(__name__)


class GUIProvider(Provider):
    """
    Provider for GUI desktop environment.
    Manages communication with desktop_env through HTTP API or local in-process execution.
    
    Supports two modes:
    - "local": Execute GUI operations directly in-process (no server needed)
    - "server": Connect to a running local_server via HTTP API
    
    Supports automatic default session creation:
    - If no session exists, a default session will be created on first use
    - Default session uses configuration from config file or environment
    """
    
    DEFAULT_SID = BackendType.GUI.value
    
    def __init__(self, config: Dict[str, Any] = None):
        """
        Initialize GUI provider.
        
        Args:
            config: Provider configuration
        """
        super().__init__(BackendType.GUI, config)
        self.connectors: Dict[str, Union[GUIConnector, LocalGUIConnector]] = {}
    
    async def initialize(self) -> None:
        """
        Initialize the provider and create default session.
        """
        if not self.is_initialized:
            logger.info("Initializing GUI provider")
            # Auto-create default session
            await self.create_session(SessionConfig(
                session_name=self.DEFAULT_SID,
                backend_type=BackendType.GUI,
                connection_params={}
            ))
            self.is_initialized = True
    
    async def create_session(self, session_config: SessionConfig) -> BaseSession:
        """
        Create GUI session.
        
        Args:
            session_config: Session configuration
        
        Returns:
            GUISession instance
        """
        # Load GUI backend configuration
        gui_config = get_config().get_backend_config("gui")
        
        # Determine execution mode: "local" or "server"
        mode = getattr(gui_config, "mode", "local")
        
        # Extract connection parameters
        conn_params = session_config.connection_params
        timeout = get_config_value(conn_params, 'timeout', gui_config.timeout)
        retry_times = get_config_value(conn_params, 'retry_times', gui_config.max_retries)
        retry_interval = get_config_value(conn_params, 'retry_interval', gui_config.retry_interval)
        
        # Build pkgs_prefix with failsafe setting
        failsafe_str = "True" if gui_config.failsafe else "False"
        pkgs_prefix = get_config_value(
            conn_params, 
            'pkgs_prefix', 
            gui_config.pkgs_prefix.format(failsafe=failsafe_str, command="{command}")
        )
        
        if mode == "local":
            # ---------- LOCAL MODE ----------
            logger.info("GUI backend using LOCAL mode (no server required)")
            connector = LocalGUIConnector(
                timeout=timeout,
                retry_times=retry_times,
                retry_interval=retry_interval,
                pkgs_prefix=pkgs_prefix,
            )
        else:
            # ---------- SERVER MODE ----------
            logger.info("GUI backend using SERVER mode (connecting to local_server)")
            local_server_config = get_local_server_config()
            vm_ip = get_config_value(conn_params, 'vm_ip', local_server_config['host'])
            server_port = get_config_value(conn_params, 'server_port', local_server_config['port'])
            
            connector = GUIConnector(
                vm_ip=vm_ip,
                server_port=server_port,
                timeout=timeout,
                retry_times=retry_times,
                retry_interval=retry_interval,
                pkgs_prefix=pkgs_prefix,
            )
        
        # Create session
        session = GUISession(
            connector=connector,
            session_id=session_config.session_name,
            backend_type=BackendType.GUI,
            config=session_config,
        )
        
        # Store connector and session
        self.connectors[session_config.session_name] = connector
        self._sessions[session_config.session_name] = session
        
        logger.info(f"Created GUI session: {session_config.session_name} (mode={mode})")
        return session
    
    async def close_session(self, session_name: str) -> None:
        """
        Close GUI session.
        
        Args:
            session_name: Name of the session to close
        """
        if session_name in self._sessions:
            session = self._sessions[session_name]
            await session.disconnect()
            del self._sessions[session_name]
            
        if session_name in self.connectors:
            connector = self.connectors[session_name]
            await connector.disconnect()
            del self.connectors[session_name]
        
        logger.info(f"Closed GUI session: {session_name}")

================================================
FILE: anytool/grounding/backends/gui/session.py
================================================
from typing import Dict, Any, Union
import os
from anytool.grounding.core.session import BaseSession
from anytool.grounding.core.types import BackendType, SessionStatus, SessionConfig
from anytool.utils.logging import Logger
from .transport.connector import GUIConnector
from .transport.local_connector import LocalGUIConnector
from .tool import GUIAgentTool
from .config import build_llm_config

logger = Logger.get_logger(__name__)


class GUISession(BaseSession):
    """
    Session for GUI desktop environment.
    Manages connection and tools for GUI automation.
    """
    
    def __init__(
        self,
        connector: Union[GUIConnector, LocalGUIConnector],
        session_id: str,
        backend_type: BackendType.GUI,
        config: SessionConfig,
        auto_connect: bool = True,
        auto_initialize: bool = True,
    ):
        """
        Initialize GUI session.
        
        Args:
            connector: GUI HTTP connector
            session_id: Unique session identifier
            backend_type: Backend type (GUI)
            config: Session configuration
            auto_connect: Auto-connect on context enter
            auto_initialize: Auto-initialize on context enter
        """
        super().__init__(
            connector=connector,
            session_id=session_id,
            backend_type=backend_type,
            auto_connect=auto_connect,
            auto_initialize=auto_initialize,
        )
        self.config = config
        self.gui_connector = connector
    
    async def initialize(self) -> Dict[str, Any]:
        """
        Initialize session: connect and discover tools.
        
        Returns:
            Session information dict
        """
        logger.info(f"Initializing GUI session: {self.session_id}")
        
        # Ensure connected
        if not self.connector.is_connected:
            await self.connect()
        
        # Create LLM client if configured
        llm_client = None
        user_llm_config = self.config.connection_params.get("llm_config")
        
        # Build complete LLM config with auto-detection
        # If user provides llm_config, merge with auto-detected values
        # If user doesn't provide llm_config, try to auto-build one if ANTHROPIC_API_KEY exists
        if user_llm_config or os.environ.get("ANTHROPIC_API_KEY"):
            llm_config = build_llm_config(user_llm_config)
            
            if llm_config.get("type") == "anthropic":
                # Check if API key is available
                if not llm_config.get("api_key"):
                    logger.warning(
                        "Anthropic API key not found. Skipping LLM client initialization. "
                        "Set ANTHROPIC_API_KEY environment variable or provide api_key in llm_config."
                    )
                else:
                    try:
                        from .anthropic_client import AnthropicGUIClient
                        
                        # Detect actual screen size from screenshot (most accurate)
                        # PyAutoGUI may report logical resolution, but we need the actual screenshot size
                        try:
                            screenshot_bytes = await self.gui_connector.get_screenshot()
                            if screenshot_bytes:
                                from PIL import Image
                                import io
                                img = Image.open(io.BytesIO(screenshot_bytes))
                                actual_screen_size = img.size
                                logger.info(f"Auto-detected screen size from screenshot: {actual_screen_size}")
                                screen_size = actual_screen_size
                            else:
                                raise RuntimeError("Could not get screenshot")
                        except Exception as e:
                            # Fallback to pyautogui detection
                            actual_screen_size = await self.gui_connector.get_screen_size()
                            if actual_screen_size:
                                logger.info(f"Auto-detected screen size from pyautogui: {actual_screen_size}")
                                screen_size = actual_screen_size
                            else:
                                # Final fallback to configured value
                                screen_size = llm_config.get("screen_size", (1920, 1080))
                                logger.warning(f"Could not auto-detect screen size, using configured: {screen_size}")
                        
                        # Detect PyAutoGUI working size (logical pixels)
                        pyautogui_size = await self.gui_connector.get_screen_size()
                        if pyautogui_size:
                            logger.info(f"PyAutoGUI working size (logical): {pyautogui_size}")
                        else:
                            # If we can't detect PyAutoGUI size, assume it's the same as screen size
                            pyautogui_size = screen_size
                            logger.warning(f"Could not detect PyAutoGUI size, assuming same as screen: {pyautogui_size}")
                        
                        llm_client = AnthropicGUIClient(
                            model=llm_config["model"],
                            platform=llm_config["platform"],
                            api_key=llm_config["api_key"],
                            provider=llm_config["provider"],
                            screen_size=screen_size,
                            pyautogui_size=pyautogui_size,
                            max_tokens=llm_config["max_tokens"],
                            only_n_most_recent_images=llm_config["only_n_most_recent_images"],
                        )
                        logger.info(
                            f"Initialized Anthropic LLM client - "
                            f"Model: {llm_config['model']}, Platform: {llm_config['platform']}"
                        )
                    except Exception as e:
                        logger.warning(f"Failed to initialize Anthropic client: {e}")
        
        # Get recording_manager from connection_params if available
        recording_manager = self.config.connection_params.get("recording_manager")
        
        # Create GUI Agent Tool
        self.tools = [
            GUIAgentTool(
                connector=self.gui_connector, 
                llm_client=llm_client,
                recording_manager=recording_manager
            )
        ]
        
        logger.info(f"Initialized GUI session with {len(self.tools)} tool(s)")
        
        # Return session info
        session_info = {
            "session_id": self.session_id,
            "backend_type": self.backend_type.value,
            "vm_ip": self.gui_connector.vm_ip,
            "server_port": self.gui_connector.server_port,
            "num_tools": len(self.tools),
            "tools": [tool.name for tool in self.tools],
            "llm_client": "anthropic" if llm_client else "none",
        }
        
        return session_info
    
    async def connect(self) -> None:
        """Connect to GUI desktop environment"""
        if self.connector.is_connected:
            return
        
        self.status = SessionStatus.CONNECTING
        logger.info(f"Connecting to desktop_env at {self.gui_connector.base_url}")
        
        await self.connector.connect()
        
        self.status = SessionStatus.CONNECTED
        logger.info("Connected to desktop environment")
    
    async def disconnect(self) -> None:
        """Disconnect from GUI desktop environment"""
        if not self.connector.is_connected:
            return
        
        logger.info("Disconnecting from desktop environment")
        await self.connector.disconnect()
        
        self.status = SessionStatus.DISCONNECTED
        logger.info("Disconnected from desktop environment")
    
    @property
    def is_connected(self) -> bool:
        """Check if session is connected"""
        return self.connector.is_connected

================================================
FILE: anytool/grounding/backends/gui/tool.py
================================================
import base64
from typing import Any, Dict
from anytool.grounding.core.tool.base import BaseTool
from anytool.grounding.core.types import BackendType, ToolResult, ToolStatus
from .transport.connector import GUIConnector
from .transport.actions import ACTION_SPACE, KEYBOARD_KEYS
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class GUIAgentTool(BaseTool):
    """
    LLM-powered GUI Agent Tool.
    
    This tool acts as an intelligent agent that:
    - Takes a task description as input
    - Observes the desktop via screenshot
    - Uses LLM/VLM to understand and plan actions
    - Outputs action space commands
    - Executes actions through the connector
    """
    
    _name = "gui_agent"
    _description = """Vision-based GUI automation agent for tasks requiring graphical interface interaction.
    
    Use this tool when the task involves:
    - Operating desktop applications with graphical interfaces (browsers, editors, design tools, etc.)
    - Tasks that require visual understanding of UI elements, layouts, or content
    - Multi-step workflows that need click, drag, type, or other GUI interactions
    - Scenarios where programmatic APIs or command-line tools are unavailable or insufficient
    
    The agent observes screen state through screenshots, uses vision-language models to understand
    the interface, plans appropriate actions, and executes GUI operations autonomously.
    
    IMPORTANT - max_steps Parameter Guidelines:
    - Simple tasks (1-2 actions): 15-20 steps
    - Medium tasks (3-5 actions): 25-35 steps  
    - Complex tasks (6+ actions, like web navigation): 35-50 steps
    - When uncertain, prefer larger values (35+) to avoid premature termination
    - Default is 25, but increase for multi-step workflows
    
    Input: 
    - task_description: Natural language task description
    - max_steps: Maximum actions (default 25, increase for complex tasks)
    
    Output: Task execution results with action history and completion status
    """
    
    backend_type = BackendType.GUI
    
    def __init__(self, connector: GUIConnector, llm_client=None, recording_manager=None, **kwargs):
        """
        Initialize GUI Agent Tool.
        
        Args:
            connector: GUI connector for communication with desktop_env
            llm_client: LLM/VLM client for vision-based planning (optional)
            recording_manager: RecordingManager for recording intermediate steps (optional)
            **kwargs: Additional arguments for BaseTool
        """
        super().__init__(**kwargs)
        self.connector = connector
        self.llm_client = llm_client  # Will be injected later
        self.recording_manager = recording_manager  # For recording intermediate steps
        self.action_history = []  # Track executed actions
    
    async def _arun(
        self,
        task_description: str,
        max_steps: int = 50,
    ) -> ToolResult:
        """
        Execute a GUI automation task using LLM planning.
        
        This is the main entry point that:
        1. Gets current screenshot
        2. Uses LLM to plan next action based on task and screenshot
        3. Executes the planned action
        4. Repeats until task is complete or max_steps reached
        
        Args:
            task_description: Natural language description of the task
            max_steps: Maximum number of actions to execute (default 25)
                Recommended values based on task complexity:
                - Simple (1-2 actions): 15-20
                - Medium (3-5 actions): 25-35
                - Complex (6+ actions, web navigation, multi-app): 35-50
                When in doubt, use higher values to avoid premature termination
        
        Returns:
            ToolResult with task execution status
        """
        if not task_description:
            return ToolResult(
                status=ToolStatus.ERROR,
                error="task_description is required"
            )
        
        logger.info(f"Starting GUI task: {task_description}")
        self.action_history = []
        
        # Execute task with LLM planning loop
        try:
            result = await self._execute_task_with_planning(
                task_description=task_description,
                max_steps=max_steps,
            )
            return result
        
        except Exception as e:
            logger.error(f"Task execution failed: {e}")
            return ToolResult(
                status=ToolStatus.ERROR,
                error=str(e),
                metadata={
                    "task_description": task_description,
                    "actions_executed": len(self.action_history),
                    "action_history": self.action_history,
                }
            )
    
    async def _execute_task_with_planning(
        self,
        task_description: str,
        max_steps: int,
    ) -> ToolResult:
        """
        Execute task with LLM-based planning loop.
        
        Planning loop:
        1. Observe: Get screenshot
        2. Plan: LLM decides next action
        3. Execute: Perform the action
        4. Verify: Check if task is complete
        5. Repeat until done or max_steps
        
        Args:
            task_description: Task to complete
            max_steps: Maximum planning iterations
        
        Returns:
            ToolResult with execution details
        """
        # Collect all screenshots for visual analysis
        all_screenshots = []
        # Collect intermediate steps
        intermediate_steps = []
        
        for step in range(max_steps):
            logger.info(f"Planning step {step + 1}/{max_steps}")
            
            # Step 1: Observe current state
            screenshot = await self.connector.get_screenshot()
            if not screenshot:
                return ToolResult(
                    status=ToolStatus.ERROR,
                    error="Failed to get screenshot for planning",
                    metadata={"step": step, "action_history": self.action_history}
                )
            
            # Collect screenshot for visual analysis
            all_screenshots.append(screenshot)
            
            # Step 2: Plan next action using LLM
            planned_action = await self._plan_next_action(
                task_description=task_description,
                screenshot=screenshot,
                action_history=self.action_history,
            )
            
            # Check if task is complete
            if planned_action["action_type"] == "DONE":
                logger.info("Task marked as complete by LLM")
                reasoning = planned_action.get("reasoning", "Task completed successfully")
                
                intermediate_steps.append({
                    "step_number": step + 1,
                    "action": "DONE",
                    "reasoning": reasoning,
                    "status": "done",
                })
                
                return ToolResult(
                    status=ToolStatus.SUCCESS,
                    content=f"Task completed: {task_description}\n\nFinal state: {reasoning}",
                    metadata={
                        "steps_taken": step + 1,
                        "action_history": self.action_history,
                        "screenshots": all_screenshots,
                        "intermediate_steps": intermediate_steps,
                        "final_reasoning": reasoning,
                    }
                )
            
            # Check if task failed
            if planned_action["action_type"] == "FAIL":
                logger.warning("Task marked as failed by LLM")
                reason = planned_action.get("reason", "Task cannot be completed")
                
                intermediate_steps.append({
                    "step_number": step + 1,
                    "action": "FAIL",
                    "reasoning": planned_action.get("reasoning", ""),
                    "status": "failed",
                })
                
                return ToolResult(
                    status=ToolStatus.ERROR,
                    error=reason,
                    metadata={
                        "steps_taken": step + 1,
                        "action_history": self.action_history,
                        "screenshots": all_screenshots,
                        "intermediate_steps": intermediate_steps,
                    }
                )
            
            # Check if action is WAIT (screenshot observation, continue to next step)
            if planned_action["action_type"] == "WAIT":
                logger.info("Screenshot observation step, continuing planning loop")
                intermediate_steps.append({
                    "step_number": step + 1,
                    "action": "WAIT",
                    "reasoning": planned_action.get("reasoning", ""),
                    "status": "observation",
                })
                continue
            
            # Step 3: Execute the planned action
            execution_result = await self._execute_planned_action(planned_action)
            
            # Record action in history
            self.action_history.append({
                "step": step + 1,
                "planned_action": planned_action,
                "execution_result": execution_result,
            })
            
            intermediate_steps.append({
                "step_number": step + 1,
                "action": planned_action.get("action_type", "unknown"),
                "reasoning": planned_action.get("reasoning", ""),
                "status": execution_result.get("status", "unknown"),
            })
            
            # Check execution result
            if execution_result.get("status") != "success":
                logger.warning(f"Action execution failed: {execution_result.get('error')}")
                # Continue to next iteration for retry planning
        
        # Max steps reached
        return ToolResult(
            status=ToolStatus.ERROR,
            error=f"Task incomplete after {max_steps} steps",
            metadata={
                "task_description": task_description,
                "steps_taken": max_steps,
                "action_history": self.action_history,
                "screenshots": all_screenshots,
                "intermediate_steps": intermediate_steps,
            }
        )
    
    async def _plan_next_action(
        self,
        task_description: str,
        screenshot: bytes,
        action_history: list,
    ) -> Dict[str, Any]:
        """
        Use LLM/VLM to plan the next action.
        
        This method sends:
        - Task description
        - Current screenshot (vision input)
        - Action history (context)
        - Available ACTION_SPACE
        
        And gets back a structured action plan.
        
        Args:
            task_description: The task to accomplish
            screenshot: Current desktop screenshot (PNG/JPEG bytes)
            action_history: Previously executed actions
        
        Returns:
            Dict with action_type and parameters
        """
        if self.llm_client is None:
            # Fallback: Simple heuristic or manual mode
            logger.warning("No LLM client configured, using fallback mode")
            return {
                "action_type": "FAIL",
                "reason": "LLM client not configured"
            }
        
        # Check if using Anthropic client
        try:
            from .anthropic_client import AnthropicGUIClient
            is_anthropic = isinstance(self.llm_client, AnthropicGUIClient)
        except ImportError:
            is_anthropic = False
        
        if is_anthropic:
            # Use Anthropic client
            try:
                reasoning, commands = await self.llm_client.plan_action(
                    task_description=task_description,
                    screenshot=screenshot,
                    action_history=action_history,
                )
                
                if commands == ["FAIL"]:
                    return {
                        "action_type": "FAIL",
                        "reason": "Anthropic planning failed"
                    }
                
                if commands == ["DONE"]:
                    return {
                        "action_type": "DONE",
                        "reasoning": reasoning
                    }
                
                if commands == ["SCREENSHOT"]:
                    # Screenshot is automatically handled by system
                    # Continue to next planning step
                    logger.info("LLM requested screenshot (observation step)")
                    return {
                        "action_type": "WAIT",
                        "reasoning": reasoning or "Observing screen state"
                    }
                
                # If no commands but has reasoning, task is complete
                # (Anthropic returns text-only when task is done)
                if not commands and reasoning:
                    logger.info("LLM returned text-only response, interpreting as task completion")
                    return {
                        "action_type": "DONE",
                        "reasoning": reasoning
                    }
                
                # No commands and no reasoning = error
                if not commands:
                    return {
                        "action_type": "FAIL",
                        "reason": "No commands generated and no completion message"
                    }
                
                # Return first command (Anthropic returns pyautogui commands directly)
                return {
                    "action_type": "PYAUTOGUI_COMMAND",
                    "command": commands[0],
                    "reasoning": reasoning
                }
                
            except Exception as e:
                logger.error(f"Anthropic planning failed: {e}")
                return {
                    "action_type": "FAIL",
                    "reason": f"Planning error: {str(e)}"
                }
        
        # Generic LLM client (for future integration with other LLMs)
        # Encode screenshot to base64 for LLM
        screenshot_b64 = base64.b64encode(screenshot).decode('utf-8')
        
        # Prepare prompt for LLM
        prompt = self._build_planning_prompt(
            task_description=task_description,
            action_history=action_history,
        )
        
        # Call LLM with vision input
        try:
            response = await self.llm_client.plan_action(
                prompt=prompt,
                image_base64=screenshot_b64,
                action_space=ACTION_SPACE,
                keyboard_keys=KEYBOARD_KEYS,
            )
            
            # Parse LLM response to action dict
            action = self._parse_llm_response(response)
            
            logger.info(f"LLM planned action: {action['action_type']}")
            return action
        
        except Exception as e:
            logger.error(f"LLM planning failed: {e}")
            return {
                "action_type": "FAIL",
                "reason": f"Planning error: {str(e)}"
            }
    
    def _build_planning_prompt(
        self,
        task_description: str,
        action_history: list,
    ) -> str:
        """
        Build prompt for LLM planning.
        
        Args:
            task_description: The task to accomplish
            action_history: Previously executed actions
        
        Returns:
            Formatted prompt string
        """
        prompt = f"""You are a GUI automation agent. Your task is to complete the following:

Task: {task_description}

You can observe the current desktop state through the provided screenshot.
You must plan the next action to take from the available ACTION_SPACE.

Available actions:
- Mouse: MOVE_TO, CLICK, RIGHT_CLICK, DOUBLE_CLICK, DRAG_TO, SCROLL
- Keyboard: TYPING, PRESS, KEY_DOWN, KEY_UP, HOTKEY
- Control: WAIT, DONE, FAIL

"""
        
        if action_history:
            prompt += f"\nPrevious actions taken ({len(action_history)}):\n"
            for i, action in enumerate(action_history[-5:], 1):  # Last 5 actions
                prompt += f"{i}. {action['planned_action']['action_type']}"
                if 'parameters' in action['planned_action']:
                    prompt += f" - {action['planned_action']['parameters']}"
                prompt += "\n"
        
        prompt += """
Based on the screenshot and task, output the next action in JSON format:
{
    "action_type": "ACTION_TYPE",
    "parameters": {...},
    "reasoning": "Why this action is needed"
}

If the task is complete, output: {"action_type": "DONE"}
If the task cannot be completed, output: {"action_type": "FAIL", "reason": "explanation"}
"""
        
        return prompt
    
    def _parse_llm_response(self, response: str) -> Dict[str, Any]:
        """
        Parse LLM response to extract action.
        
        Args:
            response: LLM response (should be JSON)
        
        Returns:
            Action dict with action_type and parameters
        """
        import json
        
        try:
            # Try to parse as JSON
            action = json.loads(response)
            
            # Validate action
            if "action_type" not in action:
                raise ValueError("Missing action_type in LLM response")
            
            return action
        
        except json.JSONDecodeError:
            logger.error(f"Failed to parse LLM response as JSON: {response[:200]}")
            return {
                "action_type": "FAIL",
                "reason": "Invalid LLM response format"
            }
    
    async def _execute_planned_action(
        self,
        action: Dict[str, Any]
    ) -> Dict[str, Any]:
        """
        Execute a planned action through the connector.
        
        Args:
            action: Action dict with action_type and parameters
        
        Returns:
            Execution result dict
        """
        action_type = action["action_type"]
        
        # Handle Anthropic's direct pyautogui commands
        if action_type == "PYAUTOGUI_COMMAND":
            command = action.get("command", "")
            logger.info(f"Executing pyautogui command: {command}")
            
            try:
                result = await self.connector.execute_python_command(command)
                return {
                    "status": "success" if result else "error",
                    "action_type": action_type,
                    "command": command,
                    "result": result
                }
            except Exception as e:
                logger.error(f"Command execution error: {e}")
                return {
                    "status": "error",
                    "action_type": action_type,
                    "error": str(e)
                }
        
        # Handle standard action space commands
        parameters = action.get("parameters", {})
        logger.info(f"Executing action: {action_type}")
        
        try:
            result = await self.connector.execute_action(action_type, parameters)
            return result
        
        except Exception as e:
            logger.error(f"Action execution error: {e}")
            return {
                "status": "error",
                "action_type": action_type,
                "error": str(e)
            }
    
    # Helper methods for direct action execution
    
    async def execute_action(
        self,
        action_type: str,
        parameters: Dict[str, Any]
    ) -> ToolResult:
        """
        Direct action execution (bypass LLM planning).
        
        Args:
            action_type: Action type from ACTION_SPACE
            parameters: Action parameters
        
        Returns:
            ToolResult with execution status
        """
        result = await self.connector.execute_action(action_type, parameters)
        
        if result.get("status") == "success":
            return ToolResult(
                status=ToolStatus.SUCCESS,
                content=f"Executed {action_type}",
                metadata=result
            )
        else:
            return ToolResult(
                status=ToolStatus.ERROR,
                error=result.get("error", "Unknown error"),
                metadata=result
            )
    
    async def get_screenshot(self) -> ToolResult:
        """Get current desktop screenshot."""
        screenshot = await self.connector.get_screenshot()
        if screenshot:
            return ToolResult(
                status=ToolStatus.SUCCESS,
                content=screenshot,
                metadata={"type": "screenshot", "size": len(screenshot)}
            )
        else:
            return ToolResult(
                status=ToolStatus.ERROR,
                error="Failed to capture screenshot"
            )
    
    async def _record_intermediate_step(
        self,
        step_number: int,
        planned_action: Dict[str, Any],
        execution_result: Dict[str, Any],
        screenshot: bytes,
        task_description: str,
    ):
        """
        Record an intermediate step of GUI agent execution.
        
        This method records each planning-action cycle to the recording system,
        providing detailed traces of GUI agent's decision-making process.
        
        Args:
            step_number: Step number in the execution sequence
            planned_action: Action planned by LLM
            execution_result: Result of executing the action
            screenshot: Screenshot before executing the action
            task_description: Overall task description
        """
        # Try to get recording_manager dynamically if not set at initialization
        recording_manager = self.recording_manager
        if not recording_manager and hasattr(self, '_runtime_info') and self._runtime_info:
            # Try to get from grounding_client
            grounding_client = self._runtime_info.grounding_client
            if grounding_client and hasattr(grounding_client, 'recording_manager'):
                recording_manager = grounding_client.recording_manager
                logger.debug(f"Step {step_number}: Dynamically retrieved recording_manager from grounding_client")
        
        if not recording_manager:
            logger.debug(f"Step {step_number}: No recording_manager available, skipping intermediate step recording")
            return
        
        # Check if recording is active
        try:
            from anytool.recording.manager import RecordingManager
            if not RecordingManager.is_recording():
                logger.debug(f"Step {step_number}: RecordingManager not started")
                return
        except Exception as e:
            logger.debug(f"Step {step_number}: Failed to check recording status: {e}")
            return
        
        # Check if recorder is initialized
        if not hasattr(recording_manager, '_recorder') or not recording_manager._recorder:
            logger.warning(f"Step {step_number}: recording_manager._recorder not initialized")
            return
        
        # Build command string for display
        action_type = planned_action.get("action_type", "unknown")
        command = self._format_action_command(planned_action)
        
        # Build result summary
        status = execution_result.get("status", "unknown")
        is_success = status in ("success", "done", "observation")
        
        # Build result content
        if status == "done":
            result_content = f"Task completed at step {step_number}"
        elif status == "failed":
            result_content = execution_result.get("message", "Task failed")
        elif status == "observation":
            result_content = execution_result.get("message", "Screenshot observation")
        else:
            result_content = execution_result.get("result", execution_result.get("message", str(execution_result)))
        
        # Build parameters for recording
        parameters = {
            "task_description": task_description,
            "step_number": step_number,
            "action_type": action_type,
            "planned_action": planned_action,
        }
        
        # Record to trajectory recorder (handles screenshot saving)
        try:
            await recording_manager._recorder.record_step(
                backend="gui",
                tool="gui_agent_step",
                command=command,
                result={
                    "status": "success" if is_success else "error",
                    "output": str(result_content)[:200],  # Truncate long outputs
                },
                parameters=parameters,
                screenshot=screenshot,
                extra={
                    "gui_step_number": step_number,
                    "reasoning": planned_action.get("reasoning", ""),
                }
            )
            
            logger.info(f"✓ Recorded GUI intermediate step {step_number}: {command}")
        
        except Exception as e:
            logger.error(f"✗ Failed to record intermediate step {step_number}: {e}", exc_info=True)
    
    def _format_action_command(self, planned_action: Dict[str, Any]) -> str:
        """
        Format planned action into a human-readable command string.
        
        Args:
            planned_action: Action dictionary from LLM planning
            
        Returns:
            Formatted command string
        """
        action_type = planned_action.get("action_type", "unknown")
        
        # Handle special action types
        if action_type == "DONE":
            return "DONE (task completed)"
        elif action_type == "FAIL":
            reason = planned_action.get("reason", "unknown")
            return f"FAIL ({reason})"
        elif action_type == "WAIT":
            return "WAIT (screenshot observation)"
        
        # Handle PyAutoGUI commands
        elif action_type == "PYAUTOGUI_COMMAND":
            command = planned_action.get("command", "")
            # Truncate long commands
            if len(command) > 100:
                return command[:100] + "..."
            return command
        
        # Handle standard action space commands
        else:
            parameters = planned_action.get("parameters", {})
            if parameters:
                # Format first 2 parameters
                param_items = list(parameters.items())[:2]
                param_str = ", ".join([f"{k}={v}" for k, v in param_items])
                return f"{action_type}({param_str})"
            else:
                return action_type

================================================
FILE: anytool/grounding/backends/gui/transport/actions.py
================================================
"""
GUI Action Space Definitions.
"""
from typing import Dict, Any

# Screen resolution constants
X_MAX = 1920
Y_MAX = 1080

# Keyboard keys constants
KEYBOARD_KEYS = [
    '\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', 
    '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 
    '[', '\\', ']', '^', '_', '`', 
    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 
    '{', '|', '}', '~', 
    'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace', 
    'browserback', 'browserfavorites', 'browserforward', 'browserhome', 'browserrefresh', 'browsersearch', 'browserstop', 
    'capslock', 'clear', 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete', 'divide', 
    'down', 'end', 'enter', 'esc', 'escape', 'execute', 
    'f1', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 
    'f2', 'f20', 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 
    'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja', 'kana', 'kanji', 
    'launchapp1', 'launchapp2', 'launchmail', 'launchmediaselect', 'left', 'modechange', 'multiply', 
    'nexttrack', 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6', 'num7', 'num8', 'num9', 
    'numlock', 'pagedown', 'pageup', 'pause', 'pgdn', 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 
    'prntscrn', 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator', 
    'shift', 'shiftleft', 'shiftright', 'sleep', 'stop', 'subtract', 'tab', 'up', 
    'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen', 
    'command', 'option', 'optionleft', 'optionright'
]

# Action Space Definition
ACTION_SPACE = [
    {
        "action_type": "MOVE_TO",
        "note": "move the cursor to the specified position",
        "parameters": {
            "x": {"type": float, "range": [0, X_MAX], "optional": False},
            "y": {"type": float, "range": [0, Y_MAX], "optional": False},
        }
    },
    {
        "action_type": "CLICK",
        "note": "click the left button if button not specified, otherwise click the specified button",
        "parameters": {
            "button": {"type": str, "range": ["left", "right", "middle"], "optional": True},
            "x": {"type": float, "range": [0, X_MAX], "optional": True},
            "y": {"type": float, "range": [0, Y_MAX], "optional": True},
            "num_clicks": {"type": int, "range": [1, 2, 3], "optional": True},
        }
    },
    {
        "action_type": "MOUSE_DOWN",
        "note": "press the mouse button",
        "parameters": {
            "button": {"type": str, "range": ["left", "right", "middle"], "optional": True}
        }
    },
    {
        "action_type": "MOUSE_UP",
        "note": "release the mouse button",
        "parameters": {
            "button": {"type": str, "range": ["left", "right", "middle"], "optional": True}
        }
    },
    {
        "action_type": "RIGHT_CLICK",
        "note": "right click at position",
        "parameters": {
            "x": {"type": float, "range": [0, X_MAX], "optional": True},
            "y": {"type": float, "range": [0, Y_MAX], "optional": True}
        }
    },
    {
        "action_type": "DOUBLE_CLICK",
        "note": "double click at position",
        "parameters": {
            "x": {"type": float, "range": [0, X_MAX], "optional": True},
            "y": {"type": float, "range": [0, Y_MAX], "optional": True}
        }
    },
    {
        "action_type": "DRAG_TO",
        "note": "drag the cursor to position",
        "parameters": {
            "x": {"type": float, "range": [0, X_MAX], "optional": False},
            "y": {"type": float, "range": [0, Y_MAX], "optional": False}
        }
    },
    {
        "action_type": "SCROLL",
        "note": "scroll the mouse wheel",
        "parameters": {
            "dx": {"type": int, "range": None, "optional": False},
            "dy": {"type": int, "range": None, "optional": False}
        }
    },
    {
        "action_type": "TYPING",
        "note": "type the specified text",
        "parameters": {
            "text": {"type": str, "range": None, "optional": False}
        }
    },
    {
        "action_type": "PRESS",
        "note": "press the specified key",
        "parameters": {
            "key": {"type": str, "range": KEYBOARD_KEYS, "optional": False}
        }
    },
    {
        "action_type": "KEY_DOWN",
        "note": "press down the specified key",
        "parameters": {
            "key": {"type": str, "range": KEYBOARD_KEYS, "optional": False}
        }
    },
    {
        "action_type": "KEY_UP",
        "note": "release the specified key",
        "parameters": {
            "key": {"type": str, "range": KEYBOARD_KEYS, "optional": False}
        }
    },
    {
        "action_type": "HOTKEY",
        "note": "press key combination",
        "parameters": {
            "keys": {"type": list, "range": [KEYBOARD_KEYS], "optional": False}
        }
    },
    {
        "action_type": "WAIT",
        "note": "wait until next action",
    },
    {
        "action_type": "FAIL",
        "note": "mark task as failed",
    },
    {
        "action_type": "DONE",
        "note": "mark task as done",
    }
]


def build_pyautogui_command(action_type: str, parameters: Dict[str, Any]) -> str:
    """
    Build pyautogui command from action type and parameters.
    
    Args:
        action_type: Type of action (e.g., 'CLICK', 'TYPING')
        parameters: Action parameters
    
    Returns:
        Python command string
    """
    if action_type == "MOVE_TO":
        if "x" in parameters and "y" in parameters:
            x, y = parameters["x"], parameters["y"]
            return f"pyautogui.moveTo({x}, {y}, 0.5, pyautogui.easeInQuad)"
        else:
            return "pyautogui.moveTo()"
    
    elif action_type == "CLICK":
        button = parameters.get("button", "left")
        num_clicks = parameters.get("num_clicks", 1)
        
        if "x" in parameters and "y" in parameters:
            x, y = parameters["x"], parameters["y"]
            return f"pyautogui.click(button='{button}', x={x}, y={y}, clicks={num_clicks})"
        else:
            return f"pyautogui.click(button='{button}', clicks={num_clicks})"
    
    elif action_type == "MOUSE_DOWN":
        button = parameters.get("button", "left")
        return f"pyautogui.mouseDown(button='{button}')"
    
    elif action_type == "MOUSE_UP":
        button = parameters.get("button", "left")
        return f"pyautogui.mouseUp(button='{button}')"
    
    elif action_type == "RIGHT_CLICK":
        if "x" in parameters and "y" in parameters:
            x, y = parameters["x"], parameters["y"]
            return f"pyautogui.rightClick(x={x}, y={y})"
        else:
            return "pyautogui.rightClick()"
    
    elif action_type == "DOUBLE_CLICK":
        if "x" in parameters and "y" in parameters:
            x, y = parameters["x"], parameters["y"]
            return f"pyautogui.doubleClick(x={x}, y={y})"
        else:
            return "pyautogui.doubleClick()"
    
    elif action_type == "DRAG_TO":
        if "x" in parameters and "y" in parameters:
            x, y = parameters["x"], parameters["y"]
            return f"pyautogui.dragTo({x}, {y}, 1.0, button='left')"
    
    elif action_type == "SCROLL":
        dx = parameters.get("dx", 0)
        dy = parameters.get("dy", 0)
        return f"pyautogui.scroll({dy})"
    
    elif action_type == "TYPING":
        text = parameters.get("text", "")
        # Use repr() for proper string escaping
        return f"pyautogui.typewrite({repr(text)})"
    
    elif action_type == "PRESS":
        key = parameters.get("key", "")
        return f"pyautogui.press('{key}')"
    
    elif action_type == "KEY_DOWN":
        key = parameters.get("key", "")
        return f"pyautogui.keyDown('{key}')"
    
    elif action_type == "KEY_UP":
        key = parameters.get("key", "")
        return f"pyautogui.keyUp('{key}')"
    
    elif action_type == "HOTKEY":
        keys = parameters.get("keys", [])
        if keys:
            keys_str = ", ".join([f"'{k}'" for k in keys])
            return f"pyautogui.hotkey({keys_str})"
    
    return None

================================================
FILE: anytool/grounding/backends/gui/transport/connector.py
================================================
import asyncio
import re
from typing import Any, Dict, Optional
from anytool.grounding.core.transport.connectors import AioHttpConnector
from .actions import build_pyautogui_command, KEYBOARD_KEYS
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class GUIConnector(AioHttpConnector):
    """
    Connector for desktop_env HTTP API.
    Provides action execution and observation methods.
    """
    
    def __init__(
        self,
        vm_ip: str,
        server_port: int = 5000,
        timeout: int = 90,
        retry_times: int = 3,
        retry_interval: float = 5.0,
        pkgs_prefix: str = "import pyautogui; import time; pyautogui.FAILSAFE = False; {command}",
    ):
        """
        Initialize GUI connector.
        
        Args:
            vm_ip: IP address of the VM running desktop_env
            server_port: Port of the desktop_env HTTP server
            timeout: Request timeout in seconds
            retry_times: Number of retries for failed requests
            retry_interval: Interval between retries in seconds
            pkgs_prefix: Python command prefix for pyautogui setup
        """
        base_url = f"http://{vm_ip}:{server_port}"
        super().__init__(base_url, timeout=timeout)
        
        self.vm_ip = vm_ip
        self.server_port = server_port
        self.retry_times = retry_times
        self.retry_interval = retry_interval
        self.pkgs_prefix = pkgs_prefix
        self.timeout = timeout
    
    async def _retry_invoke(
        self, 
        operation_name: str,
        operation_func,
        *args,
        **kwargs
    ):
        """
        Execute operation with retry logic.
        
        Args:
            operation_name: Name of operation for logging
            operation_func: Async function to execute
            *args: Positional arguments for operation_func
            **kwargs: Keyword arguments for operation_func
        
        Returns:
            Operation result
        
        Raises:
            Exception: Last exception after all retries fail
        """
        last_exc: Exception | None = None
        
        for attempt in range(1, self.retry_times + 1):
            try:
                result = await operation_func(*args, **kwargs)
                logger.debug("%s executed successfully (attempt %d/%d)", operation_name, attempt, self.retry_times)
                return result
            except asyncio.TimeoutError as exc:
                logger.error("%s timed out", operation_name)
                raise RuntimeError(f"{operation_name} timed out after {self.timeout} seconds") from exc
            except Exception as exc:
                last_exc = exc
                if attempt == self.retry_times:
                    break
                logger.warning(
                    "%s failed (attempt %d/%d): %s, retrying in %.1f seconds...", 
                    operation_name, attempt, self.retry_times, exc, self.retry_interval
                )
                await asyncio.sleep(self.retry_interval)
        
        error_msg = f"{operation_name} failed after {self.retry_times} retries"
        logger.error(error_msg)
        raise last_exc or RuntimeError(error_msg)
    
    @staticmethod
    def _is_valid_image_response(content_type: str, data: Optional[bytes]) -> bool:
        """Validate image response using magic bytes."""
        if not isinstance(data, (bytes, bytearray)) or not data:
            return False
        # PNG magic
        if len(data) >= 8 and data[:8] == b"\x89PNG\r\n\x1a\n":
            return True
        # JPEG magic
        if len(data) >= 3 and data[:3] == b"\xff\xd8\xff":
            return True
        # Fallback to content-type
        if content_type and ("image/png" in content_type or "image/jpeg" in content_type):
            return True
        return False
    
    @staticmethod
    def _fix_pyautogui_less_than_bug(command: str) -> str:
        """
        Fix PyAutoGUI '<' character bug by converting it to hotkey("shift", ',') calls.
        
        This fixes the known PyAutoGUI issue where typing '<' produces '>' instead.
        References:
        - https://github.com/asweigart/pyautogui/issues/198
        - https://github.com/xlang-ai/OSWorld/issues/257
        
        Args:
            command (str): The original pyautogui command
            
        Returns:
            str: The fixed command with '<' characters handled properly
        """
        # Pattern to match press('<') or press('\u003c') calls  
        press_pattern = r'pyautogui\.press\(["\'](?:<|\\u003c)["\']\)'

        # Handle press('<') calls
        def replace_press_less_than(match):
            return 'pyautogui.hotkey("shift", ",")'
        
        # First handle press('<') calls
        command = re.sub(press_pattern, replace_press_less_than, command)

        # Pattern to match typewrite calls with quoted strings
        typewrite_pattern = r'pyautogui\.typewrite\((["\'])(.*?)\1\)'
        
        # Then handle typewrite calls
        def process_typewrite_match(match):
            quote_char = match.group(1)
            content = match.group(2)
            
            # Preprocess: Try to decode Unicode escapes like \u003c to actual '<'
            # This handles cases where '<' is represented as escaped Unicode
            try:
                # Attempt to decode unicode escapes
                decoded_content = content.encode('utf-8').decode('unicode_escape')
                content = decoded_content
            except UnicodeDecodeError:
                # If decoding fails, proceed with original content to avoid breaking existing logic
                pass  # Graceful degradation - fall back to original content if decoding fails
            
            # Check if content contains '<'
            if '<' not in content:
                return match.group(0)
            
            # Split by '<' and rebuild
            parts = content.split('<')
            result_parts = []
            
            for i, part in enumerate(parts):
                if i == 0:
                    # First part
                    if part:
                        result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
                else:
                    # Add hotkey for '<' and then typewrite for the rest
                    result_parts.append('pyautogui.hotkey("shift", ",")')
                    if part:
                        result_parts.append(f"pyautogui.typewrite({quote_char}{part}{quote_char})")
            
            return '; '.join(result_parts)
        
        command = re.sub(typewrite_pattern, process_typewrite_match, command)
        
        return command
    
    async def get_screen_size(self) -> Optional[tuple[int, int]]:
        """
        Get actual screen size from desktop environment using pyautogui.
        
        Returns:
            (width, height) tuple, or None on failure
        """
        try:
            command = "print(pyautogui.size())"
            result = await self.execute_python_command(command)
            if result and result.get("status") == "success":
                output = result.get("output", "")
                # Parse output like "Size(width=2880, height=1800)"
                import re
                match = re.search(r'width=(\d+).*height=(\d+)', output)
                if match:
                    width = int(match.group(1))
                    height = int(match.group(2))
                    logger.info(f"Detected screen size: {width}x{height}")
                    return (width, height)
            logger.warning(f"Failed to detect screen size, output: {result}")
            return None
        except Exception as e:
            logger.error(f"Failed to get screen size: {e}")
            return None
    
    async def get_screenshot(self) -> Optional[bytes]:
        """
        Get screenshot from desktop environment.
        
        Returns:
            Screenshot image bytes (PNG/JPEG), or None on failure
        """
        try:
            async def _get():
                response = await self._request("GET", "/screenshot", timeout=10)
                if response.status == 200:
                    content_type = response.headers.get("Content-Type", "")
                    content = await response.read()
                    if self._is_valid_image_response(content_type, content):
                        return content
                    else:
                        raise ValueError("Invalid screenshot format")
                else:
                    raise RuntimeError(f"HTTP {response.status}")
            
            return await self._retry_invoke("get_screenshot", _get)
        except Exception as e:
            logger.error(f"Failed to get screenshot: {e}")
            return None
    
    async def execute_python_command(self, command: str) -> Optional[Dict[str, Any]]:
        """
        Execute a Python command on desktop environment.
        Used for pyautogui commands.
        
        Args:
            command: Python command to execute
        
        Returns:
            Response dict with execution result, or None on failure
        """
        try:
            # Apply '<' character fix for PyAutoGUI bug
            fixed_command = self._fix_pyautogui_less_than_bug(command)
            
            command_list = ["python", "-c", self.pkgs_prefix.format(command=fixed_command)]
            payload = {"command": command_list, "shell": False}
            
            async def _execute():
                return await self.post_json("/execute", payload)
            
            return await self._retry_invoke("execute_python_command", _execute)
        except Exception as e:
            logger.error(f"Failed to execute command: {e}")
            return None
    
    async def execute_action(self, action_type: str, parameters: Dict[str, Any] = None) -> Dict[str, Any]:
        """
        Execute a desktop action.
        This is the main method for action space execution.
        
        Args:
            action_type: Action type (e.g., 'CLICK', 'TYPING')
            parameters: Action parameters
        
        Returns:
            Result dict with execution status
        """
        parameters = parameters or {}
        
        # Handle control actions
        if action_type in ['WAIT', 'FAIL', 'DONE']:
            return {
                "status": "success",
                "action_type": action_type,
                "message": f"Control action {action_type} acknowledged"
            }
        
        # Validate keyboard keys
        if action_type in ['PRESS', 'KEY_DOWN', 'KEY_UP']:
            key = parameters.get('key')
            if key and key not in KEYBOARD_KEYS:
                return {
                    "status": "error",
                    "action_type": action_type,
                    "error": f"Invalid key: {key}. Must be in supported keyboard keys."
                }
        
        if action_type == 'HOTKEY':
            keys = parameters.get('keys', [])
            invalid_keys = [k for k in keys if k not in KEYBOARD_KEYS]
            if invalid_keys:
                return {
                    "status": "error",
                    "action_type": action_type,
                    "error": f"Invalid keys: {invalid_keys}"
                }
        
        # Build pyautogui command
        command = build_pyautogui_command(action_type, parameters)
        
        if command is None:
            return {
                "status": "error",
                "action_type": action_type,
                "error": f"Unsupported action type: {action_type}"
            }
        
        # Execute command
        result = await self.execute_python_command(command)
        
        if result:
            return {
                "status": "success",
                "action_type": action_type,
                "parameters": parameters,
                "result": result
            }
        else:
            return {
                "status": "error",
                "action_type": action_type,
                "parameters": parameters,
                "error": "Command execution failed"
            }
    
    async def get_accessibility_tree(self, max_depth: int = 5) -> Optional[Dict[str, Any]]:
        """
        Get accessibility tree from desktop environment.
        
        Args:
            max_depth: Maximum depth of accessibility tree traversal
        
        Returns:
            Accessibility tree as dict, or None on failure
        """
        try:
            async def _get():
                response = await self._request("GET", "/accessibility", timeout=10)
                if response.status == 200:
                    data = await response.json()
                    return data.get("AT")
                else:
                    raise RuntimeError(f"HTTP {response.status}")
            
            return await self._retry_invoke("get_accessibility_tree", _get)
        except Exception as e:
            logger.error(f"Failed to get accessibility tree: {e}")
            return None

    async def get_cursor_position(self) -> Optional[tuple[int, int]]:
        """
        Get current mouse cursor position.
        Useful for GUI debugging and relative positioning.
        
        Returns:
            (x, y) tuple, or None on failure
        """
        try:
            async def _get():
                result = await self.get_json("/cursor_position")
                return (result.get("x"), result.get("y"))
            
            return await self._retry_invoke("get_cursor_position", _get)
        except Exception as e:
            logger.error(f"Failed to get cursor position: {e}")
            return None
    
    async def invoke(self, name: str, params: dict[str, Any]) -> Any:
        """
        Unified RPC entry for operations.
        Required by BaseConnector.
        
        Args:
            name: Operation name (action_type or observation method)
            params: Operation parameters
        
        Returns:
            Operation result
        """
        # Handle observation methods
        if name == "screenshot":
            return await self.get_screenshot()
        elif name == "accessibility_tree":
            max_depth = params.get("max_depth", 5) if params else 5
            return await self.get_accessibility_tree(max_depth)
        elif name == "cursor_position":
            return await self.get_cursor_position()
        else:
            # Treat as action
            return await self.execute_action(name.upper(), params or {})

================================================
FILE: anytool/grounding/backends/gui/transport/local_connector.py
================================================
"""
Local GUI Connector — execute GUI operations directly in-process.

This connector has the **same public API** as GUIConnector (HTTP version)
but uses local pyautogui / ScreenshotHelper / AccessibilityHelper,
removing the need for a local_server.

Return format is kept identical so that GUISession / GUIAgentTool
work without any changes.
"""

import asyncio
import os
import platform
import re
import tempfile
import uuid
from typing import Any, Dict, Optional

from anytool.grounding.core.transport.connectors.base import BaseConnector
from anytool.grounding.core.transport.task_managers.noop import NoOpConnectionManager
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)

platform_name = platform.system()


class LocalGUIConnector(BaseConnector[Any]):
    """
    GUI connector that runs desktop automation **locally** using pyautogui /
    ScreenshotHelper / AccessibilityHelper, bypassing the Flask local_server.

    Public API is compatible with ``GUIConnector`` so that ``GUISession``
    works without modification.
    """

    def __init__(
        self,
        timeout: int = 90,
        retry_times: int = 3,
        retry_interval: float = 5.0,
        pkgs_prefix: str = "import pyautogui; import time; pyautogui.FAILSAFE = False; {command}",
    ):
        super().__init__(NoOpConnectionManager())
        self.timeout = timeout
        self.retry_times = retry_times
        self.retry_interval = retry_interval
        self.pkgs_prefix = pkgs_prefix

        # Compatibility attributes expected by GUISession
        self.vm_ip = "localhost"
        self.server_port = 0
        self.base_url = "local://localhost"

        # Lazy-initialized helpers (avoid import side effects at class load)
        self._screenshot_helper = None
        self._accessibility_helper = None

    def _get_screenshot_helper(self):
        if self._screenshot_helper is None:
            from anytool.local_server.utils import ScreenshotHelper
            self._screenshot_helper = ScreenshotHelper()
        return self._screenshot_helper

    def _get_accessibility_helper(self):
        if self._accessibility_helper is None:
            from anytool.local_server.utils import AccessibilityHelper
            self._accessibility_helper = AccessibilityHelper()
        return self._accessibility_helper

    # ------------------------------------------------------------------
    # connect / disconnect
    # ------------------------------------------------------------------

    async def connect(self) -> None:
        """No real connection for local mode."""
        if self._connected:
            return
        await super().connect()
        logger.info("LocalGUIConnector: ready (local mode, no server required)")

    # ------------------------------------------------------------------
    # Retry wrapper (same interface as GUIConnector._retry_invoke)
    # ------------------------------------------------------------------

    async def _retry_invoke(
        self,
        operation_name: str,
        operation_func,
        *args,
        **kwargs,
    ):
        last_exc: Exception | None = None
        for attempt in range(1, self.retry_times + 1):
            try:
                result = await operation_func(*args, **kwargs)
                logger.debug(
                    "%s executed successfully (attempt %d/%d)",
                    operation_name, attempt, self.retry_times,
                )
                return result
            except asyncio.TimeoutError as exc:
                logger.error("%s timed out", operation_name)
                raise RuntimeError(
                    f"{operation_name} timed out after {self.timeout} seconds"
                ) from exc
            except Exception as exc:
                last_exc = exc
                if attempt == self.retry_times:
                    break
                logger.warning(
                    "%s failed (attempt %d/%d): %s, retrying in %.1f seconds...",
                    operation_name, attempt, self.retry_times, exc, self.retry_interval,
                )
                await asyncio.sleep(self.retry_interval)

        error_msg = f"{operation_name} failed after {self.retry_times} retries"
        logger.error(error_msg)
        raise last_exc or RuntimeError(error_msg)

    # ------------------------------------------------------------------
    # PyAutoGUI '<' bug fix (same as GUIConnector)
    # ------------------------------------------------------------------

    @staticmethod
    def _fix_pyautogui_less_than_bug(command: str) -> str:
        """Fix PyAutoGUI '<' character bug."""
        press_pattern = r'pyautogui\.press\(["\'](?:<|\\u003c)["\']\)'

        def replace_press_less_than(match):
            return 'pyautogui.hotkey("shift", ",")'

        command = re.sub(press_pattern, replace_press_less_than, command)

        typewrite_pattern = r'pyautogui\.typewrite\((["\'])(.*?)\1\)'

        def process_typewrite_match(match):
            quote_char = match.group(1)
            content = match.group(2)
            try:
                decoded_content = content.encode("utf-8").decode("unicode_escape")
                content = decoded_content
            except UnicodeDecodeError:
                pass
            if "<" not in content:
                return match.group(0)
            parts = content.split("<")
            result_parts = []
            for i, part in enumerate(parts):
                if i == 0:
                    if part:
                        result_parts.append(
                            f"pyautogui.typewrite({quote_char}{part}{quote_char})"
                        )
                else:
                    result_parts.append('pyautogui.hotkey("shift", ",")')
                    if part:
                        result_parts.append(
                            f"pyautogui.typewrite({quote_char}{part}{quote_char})"
                        )
            return "; ".join(result_parts)

        command = re.sub(typewrite_pattern, process_typewrite_match, command)
        return command

    # ------------------------------------------------------------------
    # Image response validation (same as GUIConnector)
    # ------------------------------------------------------------------

    @staticmethod
    def _is_valid_image_response(content_type: str, data: Optional[bytes]) -> bool:
        if not isinstance(data, (bytes, bytearray)) or not data:
            return False
        if len(data) >= 8 and data[:8] == b"\x89PNG\r\n\x1a\n":
            return True
        if len(data) >= 3 and data[:3] == b"\xff\xd8\xff":
            return True
        if content_type and ("image/png" in content_type or "image/jpeg" in content_type):
            return True
        return False

    # ------------------------------------------------------------------
    # Public API (same signatures as GUIConnector)
    # ------------------------------------------------------------------

    async def get_screen_size(self) -> Optional[tuple[int, int]]:
        """Get screen size using pyautogui."""
        try:
            command = "print(pyautogui.size())"
            result = await self.execute_python_command(command)
            if result and result.get("status") == "success":
                output = result.get("output", "")
                match = re.search(r"width=(\d+).*height=(\d+)", output)
                if match:
                    width = int(match.group(1))
                    height = int(match.group(2))
                    logger.info("Detected screen size: %dx%d", width, height)
                    return (width, height)
            logger.warning("Failed to detect screen size, output: %s", result)
            return None
        except Exception as e:
            logger.error("Failed to get screen size: %s", e)
            return None

    async def get_screenshot(self) -> Optional[bytes]:
        """Capture screenshot locally using ScreenshotHelper."""
        try:
            async def _get():
                helper = self._get_screenshot_helper()
                tmp_path = os.path.join(
                    tempfile.gettempdir(), f"screenshot_{uuid.uuid4().hex}.png"
                )
                if helper.capture(tmp_path, with_cursor=True):
                    with open(tmp_path, "rb") as f:
                        data = f.read()
                    os.remove(tmp_path)
                    return data
                else:
                    raise RuntimeError("Screenshot capture failed")

            return await self._retry_invoke("get_screenshot", _get)
        except Exception as e:
            logger.error("Failed to get screenshot: %s", e)
            return None

    async def execute_python_command(self, command: str) -> Optional[Dict[str, Any]]:
        """Execute a pyautogui Python command locally via subprocess."""
        try:
            fixed_command = self._fix_pyautogui_less_than_bug(command)
            full_command = self.pkgs_prefix.format(command=fixed_command)

            async def _execute():
                python_cmd = "python" if platform_name == "Windows" else "python3"
                proc = await asyncio.create_subprocess_exec(
                    python_cmd, "-c", full_command,
                    stdout=asyncio.subprocess.PIPE,
                    stderr=asyncio.subprocess.PIPE,
                )
                stdout_b, stderr_b = await asyncio.wait_for(
                    proc.communicate(), timeout=self.timeout
                )
                stdout = stdout_b.decode("utf-8", errors="replace") if stdout_b else ""
                stderr = stderr_b.decode("utf-8", errors="replace") if stderr_b else ""
                returncode = proc.returncode or 0
                return {
                    "status": "success" if returncode == 0 else "error",
                    "output": stdout + stderr,
                    "error": stderr if returncode != 0 else "",
                    "returncode": returncode,
                }

            return await self._retry_invoke("execute_python_command", _execute)
        except Exception as e:
            logger.error("Failed to execute command: %s", e)
            return None

    async def execute_action(
        self, action_type: str, parameters: Dict[str, Any] | None = None
    ) -> Dict[str, Any]:
        """Execute a desktop action (same logic as GUIConnector)."""
        parameters = parameters or {}

        if action_type in ["WAIT", "FAIL", "DONE"]:
            return {
                "status": "success",
                "action_type": action_type,
                "message": f"Control action {action_type} acknowledged",
            }

        # Import action builder (same module used by GUIConnector)
        from anytool.grounding.backends.gui.transport.actions import (
            build_pyautogui_command,
            KEYBOARD_KEYS,
        )

        if action_type in ["PRESS", "KEY_DOWN", "KEY_UP"]:
            key = parameters.get("key")
            if key and key not in KEYBOARD_KEYS:
                return {
                    "status": "error",
                    "action_type": action_type,
                    "error": f"Invalid key: {key}. Must be in supported keyboard keys.",
                }
        if action_type == "HOTKEY":
            keys = parameters.get("keys", [])
            invalid_keys = [k for k in keys if k not in KEYBOARD_KEYS]
            if invalid_keys:
                return {
                    "status": "error",
                    "action_type": action_type,
                    "error": f"Invalid keys: {invalid_keys}",
                }

        command = build_pyautogui_command(action_type, parameters)
        if command is None:
            return {
                "status": "error",
                "action_type": action_type,
                "error": f"Unsupported action type: {action_type}",
            }

        result = await self.execute_python_command(command)
        if result:
            return {
                "status": "success",
                "action_type": action_type,
                "parameters": parameters,
                "result": result,
            }
        else:
            return {
                "status": "error",
                "action_type": action_type,
                "parameters": parameters,
                "error": "Command execution failed",
            }

    async def get_accessibility_tree(
        self, max_depth: int = 5
    ) -> Optional[Dict[str, Any]]:
        """Get accessibility tree locally."""
        try:
            async def _get():
                helper = self._get_accessibility_helper()
                return helper.get_tree(max_depth=max_depth)

            return await self._retry_invoke("get_accessibility_tree", _get)
        except Exception as e:
            logger.error("Failed to get accessibility tree: %s", e)
            return None

    async def get_cursor_position(self) -> Optional[tuple[int, int]]:
        """Get cursor position locally."""
        try:
            async def _get():
                helper = self._get_screenshot_helper()
                return helper.get_cursor_position()

            return await self._retry_invoke("get_cursor_position", _get)
        except Exception as e:
            logger.error("Failed to get cursor position: %s", e)
            return None

    # ------------------------------------------------------------------
    # BaseConnector abstract methods
    # ------------------------------------------------------------------

    async def invoke(self, name: str, params: dict[str, Any]) -> Any:
        if name == "screenshot":
            return await self.get_screenshot()
        elif name == "accessibility_tree":
            max_depth = params.get("max_depth", 5) if params else 5
            return await self.get_accessibility_tree(max_depth)
        elif name == "cursor_position":
            return await self.get_cursor_position()
        else:
            return await self.execute_action(name.upper(), params or {})

    async def request(self, *args: Any, **kwargs: Any) -> Any:
        raise NotImplementedError(
            "LocalGUIConnector does not support raw HTTP requests"
        )


================================================
FILE: anytool/grounding/backends/mcp/__init__.py
================================================
"""
MCP Backend for AnyTool Grounding.

This module provides the MCP (Model Context Protocol) backend implementation
for the grounding framework. It includes:

- MCPProvider: Manages multiple MCP server sessions
- MCPSession: Handles individual MCP server connections
- MCPClient: High-level client for MCP server configuration
- MCPInstallerManager: Manages automatic installation of MCP dependencies
- MCPToolCache: Caches tool metadata to avoid starting servers on list_tools
"""

from .provider import MCPProvider
from .session import MCPSession
from .client import MCPClient
from .installer import (
    MCPInstallerManager,
    get_global_installer,
    set_global_installer,
    MCPDependencyError,
    MCPCommandNotFoundError,
    MCPInstallationCancelledError,
    MCPInstallationFailedError,
)
from .tool_cache import MCPToolCache, get_tool_cache

__all__ = [
    "MCPProvider",
    "MCPSession",
    "MCPClient",
    "MCPInstallerManager",
    "get_global_installer",
    "set_global_installer",
    "MCPDependencyError",
    "MCPCommandNotFoundError",
    "MCPInstallationCancelledError",
    "MCPInstallationFailedError",
    "MCPToolCache",
    "get_tool_cache",
]

================================================
FILE: anytool/grounding/backends/mcp/client.py
================================================
"""
Client for managing MCP servers and sessions.

This module provides a high-level client that manages MCP servers, connectors,
and sessions from configuration.
"""
import asyncio
import warnings
from typing import Any, Optional

from anytool.grounding.core.types import SandboxOptions
from anytool.config.utils import get_config_value, save_json_file, load_json_file
from .config import create_connector_from_config
from .session import MCPSession
from .installer import MCPInstallerManager, MCPDependencyError

from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class MCPClient:
    """Client for managing MCP servers and sessions.

    This class provides a unified interface for working with MCP servers,
    handling configuration, connector creation, and session management.
    """

    def __init__(
        self,
        config: str | dict[str, Any] | None = None,
        sandbox: bool = False,
        sandbox_options: SandboxOptions | None = None,
        timeout: float = 30.0,
        sse_read_timeout: float = 300.0,
        max_retries: int = 3,
        retry_interval: float = 2.0,
        installer: Optional[MCPInstallerManager] = None,
        check_dependencies: bool = True,
        tool_call_max_retries: int = 3,
        tool_call_retry_delay: float = 1.0,
    ) -> None:
        """Initialize a new MCP client.

        Args:
            config: Either a dict containing configuration or a path to a JSON config file.
                   If None, an empty configuration is used.
            sandbox: Whether to use sandboxed execution mode for running MCP servers.
            sandbox_options: Optional sandbox configuration options.
            timeout: Timeout for operations in seconds (default: 30.0)
            sse_read_timeout: SSE read timeout in seconds (default: 300.0)
            max_retries: Maximum number of retry attempts for failed operations (default: 3)
            retry_interval: Wait time between retries in seconds (default: 2.0)
            installer: Optional installer manager for dependency installation
            check_dependencies: Whether to check and install dependencies (default: True)
            tool_call_max_retries: Maximum number of retries for tool calls (default: 3)
            tool_call_retry_delay: Initial delay between tool call retries in seconds (default: 1.0)
        """
        self.config: dict[str, Any] = {}
        self.sandbox = sandbox
        self.sandbox_options = sandbox_options
        self.timeout = timeout
        self.sse_read_timeout = sse_read_timeout
        self.max_retries = max_retries
        self.retry_interval = retry_interval
        self.installer = installer
        self.check_dependencies = check_dependencies
        self.tool_call_max_retries = tool_call_max_retries
        self.tool_call_retry_delay = tool_call_retry_delay
        self.sessions: dict[str, MCPSession] = {}
        self.active_sessions: list[str] = []

        # Load configuration if provided
        if config is not None:
            if isinstance(config, str):
                self.config = load_json_file(config)
            else:
                self.config = config
    
    def _get_mcp_servers(self) -> dict[str, Any]:
        """Internal helper to get mcpServers configuration.
        
        Tries both 'mcpServers' and 'servers' keys for compatibility.
        
        Returns:
            Dictionary of MCP server configurations, empty dict if none found.
        """
        servers = get_config_value(self.config, "mcpServers", None)
        if servers is None:
            servers = get_config_value(self.config, "servers", {})
        return servers or {}

    @classmethod
    def from_dict(
        cls,
        config: dict[str, Any],
        sandbox: bool = False,
        sandbox_options: SandboxOptions | None = None,
        timeout: float = 30.0,
        sse_read_timeout: float = 300.0,
        max_retries: int = 3,
        retry_interval: float = 2.0,
    ) -> "MCPClient":
        """Create a MCPClient from a dictionary.

        Args:
            config: The configuration dictionary.
            sandbox: Whether to use sandboxed execution mode for running MCP servers.
            sandbox_options: Optional sandbox configuration options.
            timeout: Timeout for operations in seconds (default: 30.0)
            sse_read_timeout: SSE read timeout in seconds (default: 300.0)
            max_retries: Maximum number of retry attempts (default: 3)
            retry_interval: Wait time between retries in seconds (default: 2.0)
        """
        return cls(config=config, sandbox=sandbox, sandbox_options=sandbox_options, 
                   timeout=timeout, sse_read_timeout=sse_read_timeout,
                   max_retries=max_retries, retry_interval=retry_interval)

    @classmethod
    def from_config_file(
        cls, filepath: str, sandbox: bool = False, sandbox_options: SandboxOptions | None = None,
        timeout: float = 30.0, sse_read_timeout: float = 300.0,
        max_retries: int = 3, retry_interval: float = 2.0,
    ) -> "MCPClient":
        """Create a MCPClient from a configuration file.

        Args:
            filepath: The path to the configuration file.
            sandbox: Whether to use sandboxed execution mode for running MCP servers.
            sandbox_options: Optional sandbox configuration options.
            timeout: Timeout for operations in seconds (default: 30.0)
            sse_read_timeout: SSE read timeout in seconds (default: 300.0)
            max_retries: Maximum number of retry attempts (default: 3)
            retry_interval: Wait time between retries in seconds (default: 2.0)
        """
        return cls(config=load_json_file(filepath), sandbox=sandbox, sandbox_options=sandbox_options,
                   timeout=timeout, sse_read_timeout=sse_read_timeout,
                   max_retries=max_retries, retry_interval=retry_interval)

    def add_server(
        self,
        name: str,
        server_config: dict[str, Any],
    ) -> None:
        """Add a server configuration.

        Args:
            name: The name to identify this server.
            server_config: The server configuration.
        """
        mcp_servers = self._get_mcp_servers()
        if "mcpServers" not in self.config:
            self.config["mcpServers"] = {}
        
        self.config["mcpServers"][name] = server_config
        logger.debug(f"Added MCP server configuration: {name}")

    def remove_server(self, name: str) -> None:
        """Remove a server configuration.

        Args:
            name: The name of the server to remove.
        """
        mcp_servers = self._get_mcp_servers()
        if name in mcp_servers:
            # Remove from config
            if "mcpServers" in self.config:
                self.config["mcpServers"].pop(name, None)
            elif "servers" in self.config:
                self.config["servers"].pop(name, None)

            # If we removed an active session, remove it from active_sessions
            if name in self.active_sessions:
                self.active_sessions.remove(name)
            
            logger.debug(f"Removed MCP server configuration: {name}")
        else:
            logger.warning(f"Server '{name}' not found in configuration")

    def get_server_names(self) -> list[str]:
        """Get the list of configured server names.

        Returns:
            List of server names.
        """
        return list(self._get_mcp_servers().keys())

    def save_config(self, filepath: str) -> None:
        """Save the current configuration to a file.

        Args:
            filepath: The path to save the configuration to.
        """
        save_json_file(self.config, filepath)

    async def create_session(self, server_name: str, auto_initialize: bool = True) -> MCPSession:
        """Create a session for the specified server with retry logic.

        Args:
            server_name: The name of the server to create a session for.
            auto_initialize: Whether to automatically initialize the session.

        Returns:
            The created MCPSession.

        Raises:
            ValueError: If the specified server doesn't exist.
            Exception: If session creation fails after all retries.
        """
        # Check if session already exists
        if server_name in self.sessions:
            logger.debug(f"Session for server '{server_name}' already exists, returning existing session")
            return self.sessions[server_name]
        
        # Get server config
        servers = self._get_mcp_servers()
        
        if not servers:
            warnings.warn("No MCP servers defined in config", UserWarning, stacklevel=2)
            return None

        if server_name not in servers:
            raise ValueError(f"Server '{server_name}' not found in config. Available: {list(servers.keys())}")

        server_config = servers[server_name]

        # Retry logic for session creation
        last_exc: Exception | None = None
        
        for attempt in range(1, self.max_retries + 1):
            try:
                # Create connector with options (now async)
                connector = await create_connector_from_config(
                    server_config,
                    server_name=server_name,
                    sandbox=self.sandbox, 
                    sandbox_options=self.sandbox_options,
                    timeout=self.timeout,
                    sse_read_timeout=self.sse_read_timeout,
                    installer=self.installer,
                    check_dependencies=self.check_dependencies,
                    tool_call_max_retries=self.tool_call_max_retries,
                    tool_call_retry_delay=self.tool_call_retry_delay,
                )

                # Create the session with proper initialization parameters
                session = MCPSession(
                    connector=connector,
                    session_id=f"mcp-{server_name}",
                    auto_connect=True,
                    auto_initialize=False,  # We'll handle initialization explicitly below
                )
                
                # Initialize if requested
                if auto_initialize:
                    await session.initialize()
                    logger.debug(f"Initialized session for server '{server_name}'")
                
                # Store session
                self.sessions[server_name] = session

                # Add to active sessions
                if server_name not in self.active_sessions:
                    self.active_sessions.append(server_name)
                
                logger.info(f"Created session for MCP server '{server_name}' (attempt {attempt}/{self.max_retries})")
                return session
                
            except MCPDependencyError as e:
                # Don't retry dependency errors - they won't succeed on retry
                # Error already shown to user by installer, just re-raise
                logger.debug(f"Dependency error for server '{server_name}': {type(e).__name__}")
                raise
            except Exception as e:
                last_exc = e
                if attempt == self.max_retries:
                    break
                
                # Use info level for first attempt (common after fresh install), warning for subsequent
                log_level = logger.info if attempt == 1 else logger.warning
                log_level(
                    f"Failed to create session for server '{server_name}' (attempt {attempt}/{self.max_retries}): {e}, "
                    f"retrying in {self.retry_interval} seconds..."
                )
                await asyncio.sleep(self.retry_interval)
        
        # All retries failed
        error_msg = f"Failed to create session for server '{server_name}' after {self.max_retries} retries"
        logger.error(error_msg)
        raise last_exc or RuntimeError(error_msg)

    async def create_all_sessions(
        self,
        auto_initialize: bool = True,
    ) -> dict[str, MCPSession]:
        """Create sessions for all configured servers.

        Args:
            auto_initialize: Whether to automatically initialize the sessions.

        Returns:
            Dictionary mapping server names to their MCPSession instances.

        Warns:
            UserWarning: If no servers are configured.
        """
        servers = self._get_mcp_servers()
        
        if not servers:
            warnings.warn("No MCP servers defined in config", UserWarning, stacklevel=2)
            return {}

        # Create sessions for all servers (create_session already handles initialization)
        logger.debug(f"Creating sessions for {len(servers)} servers")
        for name in servers:
            try:
                await self.create_session(name, auto_initialize)
            except Exception as e:
                logger.error(f"Failed to create session for server '{name}': {e}")

        logger.info(f"Created {len(self.sessions)} MCP sessions")
        return self.sessions

    def get_session(self, server_name: str) -> MCPSession:
        """Get an existing session.

        Args:
            server_name: The name of the server to get the session for.
                        If None, uses the first active session.

        Returns:
            The MCPSession for the specified server.

        Raises:
            ValueError: If no active sessions exist or the specified session doesn't exist.
        """
        if server_name not in self.sessions:
            raise ValueError(f"No session exists for server '{server_name}'")

        return self.sessions[server_name]

    def get_all_active_sessions(self) -> dict[str, MCPSession]:
        """Get all active sessions.

        Returns:
            Dictionary mapping server names to their MCPSession instances.
        """
        return {name: self.sessions[name] for name in self.active_sessions if name in self.sessions}

    async def close_session(self, server_name: str) -> None:
        """Close a session.

        Args:
            server_name: The name of the server to close the session for.

        Raises:
            ValueError: If no active sessions exist or the specified session doesn't exist.
        """
        # Check if the session exists
        if server_name not in self.sessions:
            logger.warning(f"No session exists for server '{server_name}', nothing to close")
            return

        # Get the session
        session = self.sessions[server_name]
        error_occurred = False

        try:
            # Disconnect from the session
            logger.debug(f"Closing session for server '{server_name}'")
            await session.disconnect()
            logger.info(f"Successfully closed session for server '{server_name}'")
        except Exception as e:
            error_occurred = True
            logger.error(f"Error closing session for server '{server_name}': {e}")
        finally:
            # Remove the session regardless of whether disconnect succeeded
            self.sessions.pop(server_name, None)

            # Remove from active_sessions
            if server_name in self.active_sessions:
                self.active_sessions.remove(server_name)
            
            if error_occurred:
                logger.warning(f"Session for '{server_name}' removed from tracking despite disconnect error")

    async def close_all_sessions(self) -> None:
        """Close all active sessions.

        This method ensures all sessions are closed even if some fail.
        """
        # Get a list of all session names first to avoid modification during iteration
        server_names = list(self.sessions.keys())
        errors = []

        for server_name in server_names:
            try:
                logger.debug(f"Closing session for server '{server_name}'")
                await self.close_session(server_name)
            except Exception as e:
                error_msg = f"Failed to close session for server '{server_name}': {e}"
                logger.error(error_msg)
                errors.append(error_msg)

        # Log summary if there were errors
        if errors:
            logger.error(f"Encountered {len(errors)} errors while closing sessions")
        else:
            logger.debug("All sessions closed successfully")


================================================
FILE: anytool/grounding/backends/mcp/config.py
================================================
"""
Configuration loader for MCP session.

This module provides functionality to load MCP configuration from JSON files.
"""

from typing import Any, Optional

from anytool.grounding.core.types import SandboxOptions
from anytool.config.utils import get_config_value
from .transport.connectors import (
    MCPBaseConnector,
    HttpConnector,
    SandboxConnector,
    StdioConnector,
    WebSocketConnector,
)
from .transport.connectors.utils import is_stdio_server
from .installer import MCPInstallerManager

# Import E2BSandbox
try:
    from anytool.grounding.core.security import E2BSandbox
    E2B_AVAILABLE = True
except ImportError:
    E2BSandbox = None
    E2B_AVAILABLE = False

async def create_connector_from_config(
    server_config: dict[str, Any],
    server_name: str = "unknown",
    sandbox: bool = False,
    sandbox_options: SandboxOptions | None = None,
    timeout: float = 30.0,
    sse_read_timeout: float = 300.0,
    installer: Optional[MCPInstallerManager] = None,
    check_dependencies: bool = True,
    tool_call_max_retries: int = 3,
    tool_call_retry_delay: float = 1.0,
) -> MCPBaseConnector:
    """Create a connector based on server configuration.
    
    Args:
        server_config: The server configuration section
        server_name: Name of the MCP server (for display purposes)
        sandbox: Whether to use sandboxed execution mode for running MCP servers.
        sandbox_options: Optional sandbox configuration options.
        timeout: Timeout for operations in seconds (default: 30.0)
        sse_read_timeout: SSE read timeout in seconds (default: 300.0)
        installer: Optional installer manager for dependency installation
        check_dependencies: Whether to check and install dependencies (default: True)
        tool_call_max_retries: Maximum number of retries for tool calls (default: 3)
        tool_call_retry_delay: Initial delay between retries in seconds (default: 1.0)

    Returns:
        A configured connector instance
        
    Raises:
        RuntimeError: If dependencies are not installed and user declines installation
    """
    
    # Get original command and args from config
    original_command = get_config_value(server_config, "command")
    original_args = get_config_value(server_config, "args", [])

    # Check and install dependencies if needed (only for stdio servers)
    if is_stdio_server(server_config) and check_dependencies:
        # Use provided installer or get global instance
        if installer is None:
            from .installer import get_global_installer
            installer = get_global_installer()

        # Ensure dependencies are installed (using original command/args)
        await installer.ensure_dependencies(server_name, original_command, original_args)

    # Stdio connector (command-based)
    if is_stdio_server(server_config) and not sandbox:
        return StdioConnector(
            command=get_config_value(server_config, "command"),
            args=get_config_value(server_config, "args"),
            env=get_config_value(server_config, "env", None),
        )

    # Sandboxed connector
    elif is_stdio_server(server_config) and sandbox:
        if not E2B_AVAILABLE:
            raise ImportError(
                "E2B sandbox support not available. Please install e2b-code-interpreter: "
                "'pip install e2b-code-interpreter'"
            )
        
        # Create E2B sandbox instance
        _sandbox_options = sandbox_options or {}
        e2b_sandbox = E2BSandbox(_sandbox_options)
        
        # Extract timeout values from sandbox_options or use defaults
        connector_timeout = _sandbox_options.get("timeout", timeout)
        connector_sse_timeout = _sandbox_options.get("sse_read_timeout", sse_read_timeout)
        
        # Create and return sandbox connector
        return SandboxConnector(
            sandbox=e2b_sandbox,
            command=get_config_value(server_config, "command"),
            args=get_config_value(server_config, "args"),
            env=get_config_value(server_config, "env", None),
            supergateway_command=_sandbox_options.get("supergateway_command", "npx -y supergateway"),
            port=_sandbox_options.get("port", 3000),
            timeout=connector_timeout,
            sse_read_timeout=connector_sse_timeout,
        )

    # HTTP connector
    elif "url" in server_config:
        return HttpConnector(
            base_url=get_config_value(server_config, "url"),
            headers=get_config_value(server_config, "headers", None),
            auth_token=get_config_value(server_config, "auth_token", None),
            timeout=timeout,
            sse_read_timeout=sse_read_timeout,
            tool_call_max_retries=tool_call_max_retries,
            tool_call_retry_delay=tool_call_retry_delay,
        )

    # WebSocket connector
    elif "ws_url" in server_config:
        return WebSocketConnector(
            url=get_config_value(server_config, "ws_url"),
            headers=get_config_value(server_config, "headers", None),
            auth_token=get_config_value(server_config, "auth_token", None),
        )

    raise ValueError("Cannot determine connector type from config")

================================================
FILE: anytool/grounding/backends/mcp/installer.py
================================================
import asyncio
import sys
import shutil
from typing import Callable, Awaitable, Optional, Dict, List
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)

PromptFunc = Callable[[str], Awaitable[bool]]

# Global lock to prevent concurrent user prompts
_prompt_lock = asyncio.Lock()


class MCPDependencyError(RuntimeError):
    """Base exception for MCP dependency errors."""
    pass


class MCPCommandNotFoundError(MCPDependencyError):
    """Raised when a required command is not available."""
    pass


class MCPInstallationCancelledError(MCPDependencyError):
    """Raised when user cancels installation."""
    pass


class MCPInstallationFailedError(MCPDependencyError):
    """Raised when installation fails."""
    pass


class Colors:
    RESET = "\033[0m"
    BOLD = "\033[1m"
    RED = "\033[91m"
    YELLOW = "\033[93m"
    GREEN = "\033[92m"
    CYAN = "\033[96m"
    GRAY = "\033[90m"
    WHITE = "\033[97m"
    BLUE = "\033[94m"


class MCPInstallerManager:
    """
    MCP dependencies package installer manager.
    
    Responsible for detecting if the MCP server dependencies are installed, and if not, asking the user whether to install them.
    """
    
    def __init__(self, prompt: PromptFunc | None = None, auto_install: bool = False, verbose: bool = False):
        """Initialize the installer manager.
        
        Args:
            prompt: Custom user prompt function, if None, the default CLI prompt is used
            auto_install: If True, automatically install dependencies without asking the user
            verbose: If True, show detailed installation logs; if False, only show progress indicator
        """
        self._prompt: PromptFunc | None = prompt or self._default_cli_prompt
        self._auto_install = auto_install
        self._verbose = verbose
        self._installed_cache: Dict[str, bool] = {}  # Cache for checked packages
        self._failed_installations: Dict[str, str] = {}  # Track failed installations to avoid retry
        
    async def _default_cli_prompt(self, message: str) -> bool:
        """Default CLI prompt function (called within lock by ensure_dependencies)."""
        from anytool.utils.display import print_separator, colorize
        
        print()
        print_separator(70, 'c', 2)
        print(f"  {colorize('MCP dependencies installation prompt', color=Colors.BLUE, bold=True)}")
        print_separator(70, 'c', 2)
        print(f"  {message}")
        print_separator(70, 'gr', 2)
        print(f"  {colorize('[y/yes]', color=Colors.GREEN)} Install  |  {colorize('[n/no]', color=Colors.RED)} Cancel")
        print_separator(70, 'gr', 2)
        print(f"  {colorize('Your choice:', bold=True)} ", end="", flush=True)
        
        answer = await asyncio.get_running_loop().run_in_executor(None, sys.stdin.readline)
        response = answer.strip().lower() in {"y", "yes"}
        
        if response:
            print(f"{Colors.GREEN}✓ Installation confirmed{Colors.RESET}\n")
        else:
            print(f"{Colors.RED}✗ Installation cancelled{Colors.RESET}\n")
        
        return response
    
    async def _ask_user(self, message: str) -> bool:
        """Ask the user whether to install."""
        if self._auto_install:
            logger.info("Automatic installation mode enabled, will automatically install dependencies")
            return True
            
        if self._prompt:
            try:
                return await self._prompt(message)
            except Exception as e:
                logger.error(f"Error asking user: {e}")
                return False
        return False
    
    def _check_command_available(self, command: str) -> bool:
        """Check if the command is available.
        
        Args:
            command: The command to check (e.g. "npx", "uvx")
            
        Returns:
            bool: Whether the command is available
        """
        return shutil.which(command) is not None
    
    async def _check_package_installed(self, command: str, args: List[str]) -> bool:
        """Check if the package is installed.
        
        Args:
            command: The command to check (e.g. "npx", "uvx")
            args: The arguments list
            
        Returns:
            bool: Whether the package is installed
        """
        # Build cache key
        cache_key = f"{command}:{':'.join(args)}"
        
        # Check cache
        if cache_key in self._installed_cache:
            return self._installed_cache[cache_key]
        
        # For different types of commands, use different check methods
        try:
            if command == "npx":
                # For npx, check if the npm package exists
                package_name = self._extract_npm_package(args)
                if package_name:
                    result = await self._check_npm_package(package_name)
                    self._installed_cache[cache_key] = result
                    return result
            elif command == "uvx":
                # For uvx, check if the Python package exists
                package_name = self._extract_python_package(args)
                if package_name:
                    result = await self._check_python_package(package_name)
                    self._installed_cache[cache_key] = result
                    return result
            elif command == "uv":
                # For "uv run --with package ...", check if the Python package exists
                package_name = self._extract_uv_package(args)
                if package_name:
                    result = await self._check_uv_pip_package(package_name)
                    self._installed_cache[cache_key] = result
                    return result
        except Exception as e:
            logger.debug(f"Error checking package installation status: {e}")
        
        # Default to assuming not installed
        return False
    
    def _extract_npm_package(self, args: List[str]) -> Optional[str]:
        """Extract package name from npx arguments.
        
        Args:
            args: npx arguments list, e.g. ["-y", "mcp-excalidraw-server"] or ["bazi-mcp"]
            
        Returns:
            Package name (without version tag) or None
        """
        for i, arg in enumerate(args):
            # Skip option parameters
            if arg.startswith("-"):
                continue
            
            # Found package name, now strip version tag
            package_name = arg
            
            # Handle scoped packages: @scope/package@version -> @scope/package
            if package_name.startswith("@"):
                # Scoped package like @rtuin/mcp-mermaid-validator@latest
                parts = package_name.split("/", 1)
                if len(parts) == 2:
                    scope = parts[0]
                    name_with_version = parts[1]
                    # Remove version tag from name part (e.g., "pkg@latest" -> "pkg")
                    name = name_with_version.split("@")[0] if "@" in name_with_version else name_with_version
                    return f"{scope}/{name}"
                return package_name
            else:
                # Regular package like mcp-deepwiki@latest -> mcp-deepwiki
                return package_name.split("@")[0] if "@" in package_name else package_name
        
        return None
    
    def _extract_python_package(self, args: List[str]) -> Optional[str]:
        """Extract package name from uvx arguments.
        
        Args:
            args: uvx arguments list, e.g. ["--from", "office-powerpoint-mcp-server", "ppt_mcp_server"]
                  or ["--with", "mcp==1.9.0", "sitemap-mcp-server"]
                  or ["arxiv-mcp-server", "--storage-path", "./path"]
            
        Returns:
            Package name or None
        """
        # Find --from parameter (this is the package to install)
        for i, arg in enumerate(args):
            if arg == "--from" and i + 1 < len(args):
                return args[i + 1]
        
        # Skip option flags and their values, find the main package (FIRST positional arg)
        # Options that take a value: --with, --python, --from, --storage-path, etc.
        options_with_value = {"--with", "--from", "--python", "-p", "--storage-path"}
        skip_next = False
        
        for arg in args:
            if skip_next:
                skip_next = False
                continue
            if arg in options_with_value:
                skip_next = True
                continue
            if arg.startswith("-"):
                # Other flags without values (or unknown options with values)
                # Also skip the next arg if it looks like an option value (doesn't start with -)
                continue
            # First non-option argument is the package name
            return arg
        
        return None
    
    def _extract_uv_package(self, args: List[str]) -> Optional[str]:
        """Extract package name from uv run arguments.
        
        Args:
            args: uv arguments list, e.g. ["run", "--with", "biomcp-python", "biomcp", "run"]
            
        Returns:
            Package name or None
        """
        # Find --with parameter (this specifies the package to install)
        for i, arg in enumerate(args):
            if arg == "--with" and i + 1 < len(args):
                package_name = args[i + 1]
                # Remove version specifier if present (e.g., "mcp==1.9.0" -> "mcp")
                if "==" in package_name:
                    return package_name.split("==")[0]
                if ">=" in package_name:
                    return package_name.split(">=")[0]
                return package_name
        
        return None
    
    async def _check_npm_package(self, package_name: str) -> bool:
        """Check if the npm package is globally installed.
        
        Args:
            package_name: npm package name
            
        Returns:
            bool: Whether the npm package is installed
        """
        try:
            process = await asyncio.create_subprocess_exec(
                "npm", "list", "-g", package_name,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE
            )
            stdout, stderr = await process.communicate()
            
            # npm list returns 0 if the package is installed
            return process.returncode == 0
        except Exception as e:
            logger.debug(f"Error checking npm package {package_name}: {e}")
            return False
    
    async def _check_python_package(self, package_name: str) -> bool:
        """Check if the Python package is installed as a uvx tool.
        
        uvx tools are installed in ~/.local/share/uv/tools/ directory,
        not in the current pip environment.
        
        Args:
            package_name: Python package/tool name
            
        Returns:
            bool: Whether the uvx tool is installed
        """
        import os
        from pathlib import Path
        
        # Strip version specifier if present (e.g., "mcp==1.9.0" -> "mcp")
        clean_name = package_name.split("==")[0].split(">=")[0].split("<=")[0].split(">")[0].split("<")[0]
        
        # Check if uvx tool exists in the standard uv tools directory
        uv_tools_dir = Path.home() / ".local" / "share" / "uv" / "tools"
        tool_dir = uv_tools_dir / clean_name
        
        if tool_dir.exists():
            logger.debug(f"uvx tool '{clean_name}' found at {tool_dir}")
            return True
        
        # Fallback: try running uvx with --help to check if it's available
        try:
            process = await asyncio.create_subprocess_exec(
                "uvx", clean_name, "--help",
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE
            )
            # Just wait briefly, don't need the full output
            try:
                await asyncio.wait_for(process.communicate(), timeout=5.0)
            except asyncio.TimeoutError:
                process.kill()
                await process.wait()
            
            # If it didn't error immediately, the tool likely exists
            return process.returncode == 0
        except Exception as e:
            logger.debug(f"Error checking uvx tool {clean_name}: {e}")
        
        return False
    
    async def _check_uv_pip_package(self, package_name: str) -> bool:
        """Check if a Python package is installed via uv pip.
        
        Args:
            package_name: Python package name
            
        Returns:
            bool: Whether the package is installed
        """
        # Strip version specifier if present
        clean_name = package_name.split("==")[0].split(">=")[0].split("<=")[0].split(">")[0].split("<")[0]
        
        try:
            # Try using uv pip show to check if package is installed
            process = await asyncio.create_subprocess_exec(
                "uv", "pip", "show", clean_name,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE
            )
            stdout, stderr = await process.communicate()
            
            if process.returncode == 0:
                logger.debug(f"uv pip package '{clean_name}' found")
                return True
        except Exception as e:
            logger.debug(f"Error checking uv pip package {clean_name}: {e}")
        
        # Fallback: check with regular pip
        try:
            process = await asyncio.create_subprocess_exec(
                "pip", "show", clean_name,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE
            )
            stdout, stderr = await process.communicate()
            
            return process.returncode == 0
        except Exception as e:
            logger.debug(f"Error checking pip package {clean_name}: {e}")
        
        return False
    
    async def _install_package(self, command: str, args: List[str], use_sudo: bool = False) -> bool:
        """Execute the install command.
        
        Args:
            command: The command to execute (e.g. "npx", "uvx")
            args: The arguments list
            use_sudo: Whether to use sudo for installation
            
        Returns:
            bool: Whether the installation is successful
        """
        install_command = self._get_install_command(command, args)
        
        if not install_command:
            logger.error("Cannot determine install command")
            return False
        
        # Add sudo if requested
        if use_sudo:
            install_command = ["sudo"] + install_command
        
        logger.info(f"Executing install command: {' '.join(install_command)}")
        
        try:
            # For sudo commands, always show verbose output so password prompt is visible
            if self._verbose or use_sudo:
                # Verbose mode: show all installation logs
                from anytool.utils.display import print_separator, colorize
                
                print_separator(70, 'c', 2)
                if use_sudo:
                    print(f"  {colorize('Installing with administrator privileges...', color=Colors.BLUE)}")
                    print(f"  {colorize('>> You will be prompted for your password below <<', color=Colors.YELLOW)}")
                else:
                    print(f"  {colorize('Installing dependencies...', color=Colors.BLUE)}")
                print(f"  {colorize('Command: ' + ' '.join(install_command), color=Colors.GRAY)}")
                print_separator(70, 'c', 2)
                print()
                
                # For sudo, don't redirect stdin so password prompt works
                if use_sudo:
                    process = await asyncio.create_subprocess_exec(
                        *install_command,
                        stdout=asyncio.subprocess.PIPE,
                        stderr=asyncio.subprocess.STDOUT,
                        stdin=None  # Let sudo use terminal for password
                    )
                else:
                    process = await asyncio.create_subprocess_exec(
                        *install_command,
                        stdout=asyncio.subprocess.PIPE,
                        stderr=asyncio.subprocess.STDOUT
                    )
                
                # Real-time output of installation logs
                output_lines = []
                while True:
                    line = await process.stdout.readline()
                    if not line:
                        break
                    line_str = line.decode().rstrip()
                    output_lines.append(line_str)
                    print(f"{Colors.GRAY}{line_str}{Colors.RESET}")
                
                await process.wait()
                full_output = '\n'.join(output_lines)
            else:
                # Quiet mode: only show progress indicator
                print(f"\n{Colors.BLUE}Installing dependencies...{Colors.RESET} ", end="", flush=True)
                
                process = await asyncio.create_subprocess_exec(
                    *install_command,
                    stdout=asyncio.subprocess.PIPE,
                    stderr=asyncio.subprocess.PIPE
                )
                
                # Show spinner animation while installing
                spinner = ['⠋', '⠙', '⠹', '⠸', '⠼', '⠴', '⠦', '⠧', '⠇', '⠏']
                spinner_idx = 0
                
                while True:
                    try:
                        await asyncio.wait_for(process.wait(), timeout=0.1)
                        break
                    except asyncio.TimeoutError:
                        print(f"\r{Colors.BLUE}Installing dependencies...{Colors.RESET} {Colors.CYAN}{spinner[spinner_idx]}{Colors.RESET}", end="", flush=True)
                        spinner_idx = (spinner_idx + 1) % len(spinner)
                
                # Clear the spinner line
                print(f"\r{' ' * 100}\r", end="", flush=True)
                
                # Collect output
                stdout, stderr = await process.communicate()
                full_output = (stdout or stderr).decode() if (stdout or stderr) else ""
            
            if process.returncode == 0:
                print(f"{Colors.GREEN}✓ Dependencies installed successfully{Colors.RESET}")
                if not use_sudo:
                    print(f"{Colors.GRAY}(Note: First connection may take a moment to initialize){Colors.RESET}")
                # Update cache
                cache_key = f"{command}:{':'.join(args)}"
                self._installed_cache[cache_key] = True
                return True
            else:
                # Check if it's a permission error
                is_permission_error = "EACCES" in full_output or "permission denied" in full_output.lower()
                
                if is_permission_error and not use_sudo:
                    print(f"\n{Colors.YELLOW}Permission denied{Colors.RESET}")
                    print(f"{Colors.GRAY}The installation requires administrator privileges.{Colors.RESET}\n")
                    
                    # Ask user if they want to use sudo
                    message = (
                        f"\n{Colors.WHITE}Administrator privileges required{Colors.RESET}\n\n"
                        f"Command: {Colors.GRAY}{' '.join(install_command)}{Colors.RESET}\n\n"
                        f"{Colors.YELLOW}Do you want to retry with sudo (requires password)?{Colors.RESET}"
                    )
                    
                    if await self._ask_user(message):
                        # No extra print needed, the verbose mode will show clear instructions
                        return await self._install_package(command, args, use_sudo=True)
                    else:
                        print(f"\n{Colors.RED}✗ Installation cancelled{Colors.RESET}")
                        return False
                else:
                    print(f"{Colors.RED}✗ Dependencies installation failed (return code: {process.returncode}){Colors.RESET}")
                    # Show error output if not already shown
                    if not self._verbose and full_output:
                        # Limit error output to last 20 lines
                        error_lines = full_output.split('\n')
                        if len(error_lines) > 20:
                            error_lines = ['...(truncated)...'] + error_lines[-20:]
                        print(f"{Colors.GRAY}Error output:\n{chr(10).join(error_lines)}{Colors.RESET}")
                    
                    # Add general guidance for manual installation
                    print(f"\n{Colors.YELLOW}Tip:{Colors.RESET} {Colors.GRAY}If automatic installation fails, please refer to the")
                    print(f"official documentation of the MCP server for manual installation instructions.{Colors.RESET}\n")
                    
                    return False
                
        except Exception as e:
            logger.error(f"Error installing dependencies: {e}")
            print(f"{Colors.RED}✗ Error occurred during installation: {e}{Colors.RESET}")
            return False
    
    def _get_install_command(self, command: str, args: List[str]) -> Optional[List[str]]:
        """Generate install command based on command type.
        
        Args:
            command: The command to execute (e.g. "npx", "uvx", "uv")
            args: The original arguments list
            
        Returns:
            Install command list or None
        """
        if command == "npx":
            package_name = self._extract_npm_package(args)
            if package_name:
                return ["npm", "install", "-g", package_name]
        elif command == "uvx":
            package_name = self._extract_python_package(args)
            if package_name:
                return ["pip", "install", package_name]
        elif command == "uv":
            # Handle "uv run --with package_name ..." format
            package_name = self._extract_uv_package(args)
            if package_name:
                return ["uv", "pip", "install", package_name]
        
        return None
    
    async def ensure_dependencies(
        self, 
        server_name: str,
        command: str, 
        args: List[str]
    ) -> bool:
        """Ensure the dependencies of the MCP server are installed.
        
        This method checks if the dependencies are installed, and if not, asks the user whether to install them.
        
        Args:
            server_name: MCP server name (for display purposes)
            command: The command to execute (e.g. "npx", "uvx")
            args: The arguments list
            
        Returns:
            bool: Whether the dependencies are installed (installed or successfully installed)
            
        Raises:
            RuntimeError: When the command is not available or the user refuses to install
        """
        # Use lock to ensure entire installation process is atomic
        async with _prompt_lock:
            return await self._ensure_dependencies_impl(server_name, command, args)
    
    async def _ensure_dependencies_impl(
        self, 
        server_name: str,
        command: str, 
        args: List[str]
    ) -> bool:
        """Internal implementation of ensure_dependencies (called within lock)."""
        # Skip dependency checking for direct script execution commands
        # These commands run scripts directly and don't need package installation
        SKIP_COMMANDS = {"node", "python", "python3", "bash", "sh", "deno", "bun"}
        
        if command.lower() in SKIP_COMMANDS:
            logger.debug(f"Skipping dependency check for direct script execution command: {command}")
            return True
        
        # Skip dependency checking for GitHub-based npx packages
        # These packages are handled directly by npx which downloads, builds, and runs them
        # npm install -g doesn't work properly for GitHub packages that require building
        if command == "npx":
            package_name = self._extract_npm_package(args)
            if package_name and package_name.startswith("github:"):
                logger.debug(f"Skipping dependency check for GitHub-based npx package: {package_name}")
                return True
        
        # Check if this server has already failed installation
        cache_key = f"{server_name}:{command}:{':'.join(args)}"
        if cache_key in self._failed_installations:
            error_msg = self._failed_installations[cache_key]
            logger.debug(f"Skipping installation for '{server_name}' - previously failed")
            raise MCPDependencyError(error_msg)
        
        # Special handling for uvx - check if uv is installed
        if command == "uvx":
            if not self._check_command_available("uv"):
                # Only show once to user, no verbose logging
                print(f"\n{Colors.RED}✗ Server '{server_name}' requires 'uv' to be installed{Colors.RESET}")
                print(f"{Colors.YELLOW}Please install uv first:")
                print(f"  • macOS/Linux: curl -LsSf https://astral.sh/uv/install.sh | sh")
                print(f"  • Or with pip: pip install uv")
                print(f"  • Or with brew: brew install uv{Colors.RESET}\n")
                
                error_msg = f"uvx requires 'uv' to be installed (server: {server_name})"
                self._failed_installations[cache_key] = error_msg
                raise MCPCommandNotFoundError(error_msg)
        
        # Check if the command is available
        if not self._check_command_available(command):
            error_msg = (
                f"Command '{command}' is not available.\n"
                f"Please install the necessary tools first."
            )
            logger.error(error_msg)
            self._failed_installations[cache_key] = error_msg
            raise MCPCommandNotFoundError(error_msg)
        
        # Check if the package is installed
        if await self._check_package_installed(command, args):
            logger.debug(f"The dependencies of the MCP server '{server_name}' are installed")
            return True
        
        # Extract package name for display
        if command == "npx":
            package_name = self._extract_npm_package(args)
            package_type = "npm"
        elif command == "uvx":
            package_name = self._extract_python_package(args)
            package_type = "Python"
        elif command == "uv":
            package_name = self._extract_uv_package(args)
            package_type = "Python"
        else:
            package_name = f"{command} {' '.join(args)}"
            package_type = "package"
        
        # Build the message for displaying the install command
        install_cmd = self._get_install_command(command, args)
        
        # If we can't determine an install command, show helpful message
        if not install_cmd:
            print(f"\n{Colors.YELLOW}Cannot automatically install dependencies for '{server_name}'{Colors.RESET}")
            print(f"{Colors.GRAY}Command: {command} {' '.join(args)}{Colors.RESET}")
            print(f"\n{Colors.WHITE}This MCP server may require manual installation or configuration.{Colors.RESET}")
            print(f"{Colors.GRAY}Please refer to the MCP server's official documentation for installation instructions.{Colors.RESET}\n")
            
            error_msg = f"Manual installation required for '{server_name}' (command: {command})"
            self._failed_installations[cache_key] = error_msg
            raise MCPDependencyError(error_msg)
        
        install_cmd_str = ' '.join(install_cmd)
        
        # Build the message
        message = (
            f"\n{Colors.WHITE}The MCP server needs to install dependencies{Colors.RESET}\n\n"
            f"Server name: {Colors.CYAN}{server_name}{Colors.RESET}\n"
            f"Package type: {Colors.YELLOW}{package_type}{Colors.RESET}\n"
            f"Package name: {Colors.YELLOW}{package_name or 'Unknown'}{Colors.RESET}\n"
            f"Install command: {Colors.GRAY}{install_cmd_str}{Colors.RESET}\n\n"
            f"{Colors.YELLOW}Whether to install this dependency package?{Colors.RESET}"
        )
        
        # Ask the user
        if not await self._ask_user(message):
            error_msg = f"User cancelled the dependency installation for '{server_name}'"
            logger.warning(error_msg)
            self._failed_installations[cache_key] = error_msg
            raise MCPInstallationCancelledError(error_msg)
        
        # Execute installation
        success = await self._install_package(command, args)
        
        if not success:
            error_msg = f"Dependency installation failed for '{server_name}'"
            logger.error(error_msg)
            self._failed_installations[cache_key] = error_msg
            raise MCPInstallationFailedError(error_msg)
        
        return True


# Global singleton instance
_global_installer: Optional[MCPInstallerManager] = None


def get_global_installer() -> MCPInstallerManager:
    """Get the global installer manager instance."""
    global _global_installer
    if _global_installer is None:
        _global_installer = MCPInstallerManager()
    return _global_installer

def set_global_installer(installer: MCPInstallerManager) -> None:
    """Set the global installer manager instance."""
    global _global_installer
    _global_installer = installer

================================================
FILE: anytool/grounding/backends/mcp/provider.py
================================================
"""
MCP Provider implementation.

This module provides a provider for managing MCP server sessions.
"""
import asyncio
from typing import Dict, List, Optional

from anytool.grounding.backends.mcp.session import MCPSession
from anytool.grounding.core.provider import Provider
from anytool.grounding.core.types import SessionConfig, BackendType, ToolSchema
from anytool.grounding.backends.mcp.client import MCPClient
from anytool.grounding.backends.mcp.installer import MCPInstallerManager, MCPDependencyError
from anytool.grounding.backends.mcp.tool_cache import get_tool_cache
from anytool.grounding.backends.mcp.tool_converter import _sanitize_mcp_schema
from anytool.grounding.core.tool import BaseTool, RemoteTool
from anytool.utils.logging import Logger
from anytool.config.utils import get_config_value

logger = Logger.get_logger(__name__)


class MCPProvider(Provider[MCPSession]):
    """
    MCP Provider manages multiple MCP server sessions.
    
    Each MCP server defined in config corresponds to one session.
    The provider handles lazy/eager session creation and tool aggregation.
    """
    
    def __init__(self, config: Dict | None = None, installer: Optional[MCPInstallerManager] = None):
        """Initialize MCP Provider.
        
        Args:
            config: Configuration dict with MCP server definitions.
                   Example: {"mcpServers": {"server1": {...}, "server2": {...}}}
            installer: Optional installer manager for dependency installation
        """
        super().__init__(BackendType.MCP, config)
        
        # Extract MCP-specific configuration
        sandbox = get_config_value(config, "sandbox", False)
        timeout = get_config_value(config, "timeout", 30)
        sse_read_timeout = get_config_value(config, "sse_read_timeout", 300.0)
        max_retries = get_config_value(config, "max_retries", 3)
        retry_interval = get_config_value(config, "retry_interval", 2.0)
        check_dependencies = get_config_value(config, "check_dependencies", True)
        auto_install = get_config_value(config, "auto_install", False)
        # Tool call retry settings (for transient errors like 400, 500, etc.)
        tool_call_max_retries = get_config_value(config, "tool_call_max_retries", 3)
        tool_call_retry_delay = get_config_value(config, "tool_call_retry_delay", 1.0)
        
        # Create sandbox options if sandbox is enabled
        sandbox_options = None
        if sandbox:
            sandbox_options = {
                "timeout": timeout,
                "sse_read_timeout": sse_read_timeout,
            }
        
        # Create installer with auto_install setting if not provided
        if installer is None and auto_install:
            installer = MCPInstallerManager(auto_install=True)
        
        # Initialize MCPClient with configuration
        self._client = MCPClient(
            config=config or {},
            sandbox=sandbox,
            sandbox_options=sandbox_options,
            timeout=timeout,
            sse_read_timeout=sse_read_timeout,
            max_retries=max_retries,
            retry_interval=retry_interval,
            installer=installer,
            check_dependencies=check_dependencies,
            tool_call_max_retries=tool_call_max_retries,
            tool_call_retry_delay=tool_call_retry_delay,
        )
        
        # Map server name to session for quick lookup
        self._server_sessions: Dict[str, MCPSession] = {}

    async def initialize(self) -> None:
        """Initialize the MCP provider.
        
        If config["eager_sessions"] is True, creates sessions for all configured servers.
        Otherwise, sessions are created lazily on first access.
        """
        if self.is_initialized:
            return

        # config can be dict or Pydantic model, use utility function
        eager = get_config_value(self.config, "eager_sessions", False)
        if eager:
            servers = self.list_servers()
            logger.debug(f"Eagerly initializing {len(servers)} MCP server sessions")
            for srv in servers:
                if srv not in self._server_sessions:
                    cfg = SessionConfig(
                        session_name=f"mcp-{srv}",
                        backend_type=BackendType.MCP,
                        connection_params={"server": srv},
                    )
                    await self.create_session(cfg)

        self.is_initialized = True
        logger.info(
            f"MCPProvider initialized with {len(self.list_servers())} servers (eager={eager})"
        )

    def list_servers(self) -> List[str]:
        """Return all configured MCP server names from MCPClient config.
        
        Returns:
            List of server names
        """
        return self._client.get_server_names()

    async def create_session(self, session_config: SessionConfig) -> MCPSession:
        """Create a new MCP session for a specific server.
        
        Args:
            session_config: Must contain 'server' in connection_params
            
        Returns:
            MCPSession instance
            
        Raises:
            ValueError: If 'server' not in connection_params
            Exception: If session creation or initialization fails
        """
        server = get_config_value(session_config.connection_params, "server")
        if not server:
            raise ValueError("MCPProvider.create_session requires 'server' in connection_params")

        # Generate session_id: mcp-<server_name>
        session_id = f"{self.backend_type.value}-{server}"

        # Check if session already exists
        if server in self._server_sessions:
            logger.debug(f"Session for server '{server}' already exists, returning existing session")
            return self._server_sessions[server]

        # Create session through MCPClient
        try:
            logger.debug(f"Creating new session for MCP server: {server}")
            session = await self._client.create_session(server, auto_initialize=True)
            session.session_id = session_id

            # Store in both maps
            self._server_sessions[server] = session
            self._sessions[session_id] = session
            
            logger.info(f"Created MCP session '{session_id}' for server '{server}'")
            return session
        except MCPDependencyError as e:
            # Dependency errors already shown to user, just debug log
            logger.debug(f"Dependency error for server '{server}': {type(e).__name__}")
            raise
        except Exception as e:
            logger.error(f"Failed to create session for server '{server}': {e}")
            raise

    async def close_session(self, session_name: str) -> None:
        """Close an MCP session by session name.
        
        Args:
            session_name: Session name in format 'mcp-<server_name>'
        """
        # Parse server name from session_name (format: mcp-<server_name>)
        try:
            prefix, server_name = session_name.split("-", 1)
            if prefix != self.backend_type.value:
                raise ValueError(f"Invalid MCP session name format: {session_name}, expected 'mcp-<server_name>'")
        except ValueError as e:
            logger.warning(f"Invalid session_name format: {session_name} - {e}")
            return

        # Check if session exists
        if session_name not in self._sessions and server_name not in self._server_sessions:
            logger.warning(f"Session '{session_name}' not found, nothing to close")
            return

        error_occurred = False
        try:
            logger.debug(f"Closing MCP session '{session_name}' (server: {server_name})")
            await self._client.close_session(server_name)
            logger.info(f"Successfully closed MCP session '{session_name}'")
        except Exception as e:
            error_occurred = True
            logger.error(f"Error closing MCP session '{session_name}': {e}")
        finally:
            # Clean up both maps regardless of errors
            self._server_sessions.pop(server_name, None)
            self._sessions.pop(session_name, None)
            
            if error_occurred:
                logger.warning(f"Session '{session_name}' removed from tracking despite close error")

    async def list_tools(self, session_name: str | None = None, use_cache: bool = True) -> List[BaseTool]:
        """List tools from MCP sessions.
        
        Args:
            session_name: If provided, only list tools from that session.
                         If None, list tools from all sessions.
            use_cache: If True, try to load from cache first (no server startup).
                      If False, start servers and get live tools.
        
        Returns:
            List of BaseTool instances
        """
        await self.ensure_initialized()
        
        # Case 1: List tools from specific session (always live, no cache)
        if session_name:
            sess = self._sessions.get(session_name)
            if sess:
                try:
                    tools = await sess.list_tools()
                    server_name = session_name.replace(f"{self.backend_type.value}-", "", 1)
                    for tool in tools:
                        tool.bind_runtime_info(
                            backend=self.backend_type,
                            session_name=session_name,
                            server_name=server_name,
                        )
                    return tools
                except Exception as e:
                    logger.error(f"Error listing tools from session '{session_name}': {e}")
                    return []
            else:
                logger.warning(f"Session '{session_name}' not found")
                return []

        # Case 2: List tools from all servers
        # Try cache first if enabled
        if use_cache:
            cache = get_tool_cache()
            if cache.has_cache():
                tools = self._load_tools_from_cache()
                if tools:
                    logger.info(f"Loaded {len(tools)} tools from cache (no server startup)")
                    return tools
        
        # No cache or cache disabled, start servers
        return await self._list_tools_live()
    
    def _load_tools_from_cache(self) -> List[BaseTool]:
        """Load tools from cache file without starting servers.
        
        Priority:
        1. Try to load from sanitized cache (mcp_tool_cache_sanitized.json)
        2. If not exists, load from raw cache and sanitize, then save sanitized version
        """
        cache = get_tool_cache()
        config_servers = self.list_servers()
        
        # Try sanitized cache first
        if cache.has_sanitized_cache():
            logger.debug("Loading from sanitized cache")
            all_cached_tools = cache.get_all_sanitized_tools()
            return self._build_tools_from_cache(all_cached_tools, config_servers)
        
        # Fall back to raw cache, sanitize and save
        if cache.has_cache():
            logger.info("Sanitized cache not found, building from raw cache...")
            all_cached_tools = cache.get_all_tools()
            sanitized_servers = self._sanitize_and_save_cache(all_cached_tools, cache)
            return self._build_tools_from_cache(sanitized_servers, config_servers)
        
        return []
    
    def _sanitize_and_save_cache(
        self, 
        raw_tools: Dict[str, List[Dict]], 
        cache
    ) -> Dict[str, List[Dict]]:
        """Sanitize raw cache and save to sanitized cache file."""
        sanitized_servers: Dict[str, List[Dict]] = {}
        
        for server_name, tool_list in raw_tools.items():
            sanitized_tools = []
            for tool_meta in tool_list:
                raw_params = tool_meta.get("parameters", {})
                sanitized_params = _sanitize_mcp_schema(raw_params)
                sanitized_tools.append({
                    "name": tool_meta["name"],
                    "description": tool_meta.get("description", ""),
                    "parameters": sanitized_params,
                })
            sanitized_servers[server_name] = sanitized_tools
        
        # Save sanitized cache for future use
        cache.save_sanitized(sanitized_servers)
        logger.info(f"Created sanitized cache with {len(sanitized_servers)} servers")
        
        return sanitized_servers
    
    def _build_tools_from_cache(
        self, 
        all_cached_tools: Dict[str, List[Dict]], 
        config_servers: List[str]
    ) -> List[BaseTool]:
        """Build BaseTool instances from cached tool metadata."""
        tools: List[BaseTool] = []
        
        for server_name in config_servers:
            tool_list = all_cached_tools.get(server_name)
            if not tool_list:
                continue
            
            session_name = f"{self.backend_type.value}-{server_name}"
            for tool_meta in tool_list:
                schema = ToolSchema(
                    name=tool_meta["name"],
                    description=tool_meta.get("description", ""),
                    parameters=tool_meta.get("parameters", {}),
                    backend_type=BackendType.MCP,
                )
                tool = RemoteTool(schema=schema, connector=None)
                tool.bind_runtime_info(
                    backend=self.backend_type,
                    session_name=session_name,
                    server_name=server_name,
                )
                tools.append(tool)
        
        return tools
    
    async def _list_tools_live(self) -> List[BaseTool]:
        """List tools by starting all servers.
        
        Uses a semaphore to serialize session creation, avoiding TaskGroup race conditions
        that occur when multiple MCP connections are initialized concurrently.
        """
        servers = self.list_servers()
        
        if not servers:
            logger.warning("No MCP servers configured")
            return []
        
        # Find servers that don't have sessions yet
        to_create = [s for s in servers if s not in self._server_sessions]

        # Create missing sessions with serialized execution using semaphore
        if to_create:
            logger.info(f"Creating {len(to_create)} MCP sessions (serialized to avoid race conditions)")
            
            # Use semaphore with limit=1 to serialize session creation
            # This avoids TaskGroup race conditions in concurrent HTTP connection setup
            semaphore = asyncio.Semaphore(1)
            
            async def _create_with_semaphore(server: str):
                async with semaphore:
                    logger.debug(f"Creating session for '{server}'")
                    return await self._lazy_create(server)
            
            tasks = [_create_with_semaphore(s) for s in to_create]
            results = await asyncio.gather(*tasks, return_exceptions=True)
            
            # Log errors
            for i, result in enumerate(results):
                if isinstance(result, MCPDependencyError):
                    logger.debug(f"Dependency error for '{to_create[i]}': {type(result).__name__}")
                elif isinstance(result, Exception):
                    logger.error(f"Failed to create session for '{to_create[i]}': {result}")

        # Aggregate tools from all sessions
        uniq: Dict[tuple[str, str], BaseTool] = {}
        failed_servers = []
        
        logger.debug(f"Listing tools from {len(self._server_sessions)} sessions")
        for server, sess in self._server_sessions.items():
            try:
                tools = await sess.list_tools()
                session_name = f"{self.backend_type.value}-{server}"
                for tool in tools:
                    key = (server, tool.schema.name)
                    if key not in uniq:
                        tool.bind_runtime_info(
                            backend=self.backend_type,
                            session_name=session_name,
                            server_name=server,
                        )
                        uniq[key] = tool
            except Exception as e:
                failed_servers.append(server)
                logger.error(f"Error listing tools from server '{server}': {e}")
        
        if failed_servers:
            logger.warning(f"Failed to list tools from {len(failed_servers)} server(s): {failed_servers}")
        
        tools_list = list(uniq.values())
        logger.debug(f"Listed {len(tools_list)} unique tools from {len(self._server_sessions)} MCP servers")
        
        # Save to cache for next time
        await self._save_tools_to_cache(tools_list)
        
        return tools_list
    
    async def _save_tools_to_cache(self, tools: List[BaseTool]) -> None:
        """Save tools metadata to cache file."""
        cache = get_tool_cache()
        
        # Group tools by server
        servers: Dict[str, List[Dict]] = {}
        for tool in tools:
            server_name = tool.runtime_info.server_name if tool.is_bound else "unknown"
            if server_name not in servers:
                servers[server_name] = []
            servers[server_name].append({
                "name": tool.schema.name,
                "description": tool.schema.description or "",
                "parameters": tool.schema.parameters or {},
            })
        
        cache.save(servers)
    
    async def ensure_server_session(self, server_name: str) -> Optional[MCPSession]:
        """Ensure a server session exists, creating it if needed.
        
        This is used for on-demand server startup when executing tools.
        """
        if server_name in self._server_sessions:
            return self._server_sessions[server_name]
        
        # Server not running, start it
        logger.info(f"Starting MCP server on-demand: {server_name}")
        cfg = SessionConfig(
            session_name=f"mcp-{server_name}",
            backend_type=BackendType.MCP,
            connection_params={"server": server_name},
        )
        
        try:
            session = await self.create_session(cfg)
            return session
        except Exception as e:
            logger.error(f"Failed to start server '{server_name}': {e}")
            return None

    async def _lazy_create(self, server: str) -> None:
        """Internal helper for lazy session creation.
        
        Args:
            server: Server name to create session for
            
        Raises:
            Exception: Re-raises any exception from session creation for error tracking
        """
        # Double-check to avoid race conditions
        if server in self._server_sessions:
            logger.debug(f"Session for server '{server}' already exists, skipping lazy creation")
            return
        
        cfg = SessionConfig(
            session_name=f"mcp-{server}",
            backend_type=BackendType.MCP,
            connection_params={"server": server},
        )
        
        try:
            await self.create_session(cfg)
            logger.debug(f"Lazily created session for server '{server}'")
        except MCPDependencyError as e:
            # Dependency errors already shown to user
            logger.debug(f"Dependency error for server '{server}': {type(e).__name__}")
            # Re-raise so that asyncio.gather can track the error
            raise
        except Exception as e:
            logger.error(f"Failed to lazily create session for server '{server}': {e}")
            # Re-raise so that asyncio.gather can track the error
            raise

================================================
FILE: anytool/grounding/backends/mcp/session.py
================================================
"""
Session manager for MCP connections.

This module provides a session manager for MCP connections,
which handles authentication, initialization, and tool discovery.
"""

from typing import Any, Dict

from anytool.grounding.backends.mcp.transport.connectors import MCPBaseConnector
from anytool.grounding.backends.mcp.tool_converter import convert_mcp_tool_to_base_tool
from anytool.grounding.core.session import BaseSession
from anytool.grounding.core.types import BackendType
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class MCPSession(BaseSession):
    """Session manager for MCP connections.

    This class manages the lifecycle of an MCP connection, including
    authentication, initialization, and tool discovery.
    """

    def __init__(
        self,
        connector: MCPBaseConnector,
        *,
        session_id: str = "",
        auto_connect: bool = True,
        auto_initialize: bool = True,
    ) -> None:
        """Initialize a new MCP session.

        Args:
            connector: The connector to use for communicating with the MCP implementation.
            session_id: Unique identifier for this session
            auto_connect: Whether to automatically connect to the MCP implementation.
            auto_initialize: Whether to automatically initialize the session.
        """
        super().__init__(
            connector=connector,
            session_id=session_id,
            backend_type=BackendType.MCP,
            auto_connect=auto_connect,
            auto_initialize=auto_initialize,
        )

    async def initialize(self) -> Dict[str, Any]:
        """Initialize the MCP session and discover available tools.

        Returns:
            The session information returned by the MCP implementation.
        """
        # Make sure we're connected
        if not self.is_connected and self.auto_connect:
            await self.connect()

        # Initialize the session through connector
        logger.debug(f"Initializing MCP session {self.session_id}")
        session_info = await self.connector.initialize()

        # List tools from MCP server and convert to BaseTool
        mcp_tools = self.connector.tools  # MCPBaseConnector caches tools after initialize
        logger.debug(f"Converting {len(mcp_tools)} MCP tools to BaseTool")
        
        self.tools = [
            convert_mcp_tool_to_base_tool(mcp_tool, self.connector)
            for mcp_tool in mcp_tools
        ]
        
        logger.debug(f"MCP session {self.session_id} initialized with {len(self.tools)} tools")

        return session_info

================================================
FILE: anytool/grounding/backends/mcp/tool_cache.py
================================================
import json
from pathlib import Path
from datetime import datetime
from typing import Any, Dict, List, Optional

from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)

# Cache path in project root directory (AnyTool/)
# __file__ = .../AnyTool/anytool/grounding/backends/mcp/tool_cache.py
# parent x5 = .../AnyTool/
DEFAULT_CACHE_PATH = Path(__file__).parent.parent.parent.parent.parent / "mcp_tool_cache.json"
# Sanitized cache path (Claude API compatible JSON Schema)
DEFAULT_SANITIZED_CACHE_PATH = Path(__file__).parent.parent.parent.parent.parent / "mcp_tool_cache_sanitized.json"


class MCPToolCache:
    """Simple file-based cache for MCP tool metadata."""
    
    CACHE_VERSION = 1
    
    def __init__(self, cache_path: Optional[Path] = None, sanitized_cache_path: Optional[Path] = None):
        self.cache_path = cache_path or DEFAULT_CACHE_PATH
        self.sanitized_cache_path = sanitized_cache_path or DEFAULT_SANITIZED_CACHE_PATH
        self._cache: Optional[Dict] = None
        self._sanitized_cache: Optional[Dict] = None
        self._server_order: Optional[List[str]] = None
    
    def set_server_order(self, order: List[str]):
        """Set expected server order (from config). Used when saving to disk."""
        self._server_order = order
    
    def _reorder_servers(self, servers: Dict[str, List[Dict]]) -> Dict[str, List[Dict]]:
        """Reorder servers dict according to _server_order."""
        if not self._server_order:
            return servers
        
        ordered = {}
        # First add servers in config order
        for name in self._server_order:
            if name in servers:
                ordered[name] = servers[name]
        # Then add any remaining servers (not in config)
        for name in servers:
            if name not in ordered:
                ordered[name] = servers[name]
        return ordered
    
    def _ensure_dir(self):
        """Ensure cache directory exists."""
        self.cache_path.parent.mkdir(parents=True, exist_ok=True)
    
    def load(self) -> Dict[str, Any]:
        """Load cache from disk. Returns empty dict if not exists."""
        if self._cache is not None:
            return self._cache
        
        if not self.cache_path.exists():
            self._cache = {"version": self.CACHE_VERSION, "servers": {}}
            return self._cache
        
        try:
            with open(self.cache_path, "r", encoding="utf-8") as f:
                self._cache = json.load(f)
            logger.info(f"Loaded MCP tool cache: {len(self._cache.get('servers', {}))} servers")
            return self._cache
        except Exception as e:
            logger.warning(f"Failed to load cache: {e}")
            self._cache = {"version": self.CACHE_VERSION, "servers": {}}
            return self._cache
    
    def save(self, servers: Dict[str, List[Dict]]):
        """
        Save tool metadata to disk (overwrites existing cache).
        
        Args:
            servers: Dict mapping server_name -> list of tool metadata dicts
                     Each tool dict should have: name, description, parameters
        """
        self._ensure_dir()
        
        cache_data = {
            "version": self.CACHE_VERSION,
            "updated_at": datetime.now().isoformat(),
            "servers": servers,
        }
        
        try:
            with open(self.cache_path, "w", encoding="utf-8") as f:
                json.dump(cache_data, f, indent=2, ensure_ascii=False)
            self._cache = cache_data
            logger.info(f"Saved MCP tool cache: {len(servers)} servers")
        except Exception as e:
            logger.error(f"Failed to save cache: {e}")
    
    def save_server(self, server_name: str, tools: List[Dict]):
        """
        Save/update a single server's tools to cache (incremental append).
        
        Args:
            server_name: Name of the MCP server
            tools: List of tool metadata dicts for this server
        """
        self._ensure_dir()
        
        # Load existing cache
        cache = self.load()
        
        # Update server entry
        if "servers" not in cache:
            cache["servers"] = {}
        cache["servers"][server_name] = tools
        cache["servers"] = self._reorder_servers(cache["servers"])
        cache["updated_at"] = datetime.now().isoformat()
        
        # Save back
        try:
            with open(self.cache_path, "w", encoding="utf-8") as f:
                json.dump(cache, f, indent=2, ensure_ascii=False)
            self._cache = cache
            logger.debug(f"Saved {len(tools)} tools for server '{server_name}'")
        except Exception as e:
            logger.error(f"Failed to save cache for server '{server_name}': {e}")
    
    def get_server_tools(self, server_name: str) -> Optional[List[Dict]]:
        """Get cached tools for a specific server."""
        cache = self.load()
        return cache.get("servers", {}).get(server_name)
    
    def get_all_tools(self) -> Dict[str, List[Dict]]:
        """Get all cached tools, grouped by server."""
        cache = self.load()
        return cache.get("servers", {})
    
    def has_cache(self) -> bool:
        """Check if cache exists and has data."""
        cache = self.load()
        return bool(cache.get("servers"))
    
    def clear(self):
        """Clear the cache."""
        if self.cache_path.exists():
            self.cache_path.unlink()
        self._cache = None
        logger.info("MCP tool cache cleared")
    
    def save_failed_server(self, server_name: str, error: str):
        """
        Record a failed server to cache.
        
        Args:
            server_name: Name of the failed MCP server
            error: Error message
        """
        self._ensure_dir()
        
        # Load existing cache
        cache = self.load()
        
        # Add to failed_servers list
        if "failed_servers" not in cache:
            cache["failed_servers"] = {}
        cache["failed_servers"][server_name] = {
            "error": error,
            "failed_at": datetime.now().isoformat(),
        }
        cache["updated_at"] = datetime.now().isoformat()
        
        # Save back
        try:
            with open(self.cache_path, "w", encoding="utf-8") as f:
                json.dump(cache, f, indent=2, ensure_ascii=False)
            self._cache = cache
        except Exception as e:
            logger.error(f"Failed to save failed server '{server_name}': {e}")
    
    def get_failed_servers(self) -> Dict[str, Dict]:
        """Get list of failed servers from cache."""
        cache = self.load()
        return cache.get("failed_servers", {})
    
    def load_sanitized(self) -> Dict[str, Any]:
        """Load sanitized cache from disk. Returns empty dict if not exists."""
        if self._sanitized_cache is not None:
            return self._sanitized_cache
        
        if not self.sanitized_cache_path.exists():
            self._sanitized_cache = {"version": self.CACHE_VERSION, "servers": {}}
            return self._sanitized_cache
        
        try:
            with open(self.sanitized_cache_path, "r", encoding="utf-8") as f:
                self._sanitized_cache = json.load(f)
            logger.info(f"Loaded sanitized MCP tool cache: {len(self._sanitized_cache.get('servers', {}))} servers")
            return self._sanitized_cache
        except Exception as e:
            logger.warning(f"Failed to load sanitized cache: {e}")
            self._sanitized_cache = {"version": self.CACHE_VERSION, "servers": {}}
            return self._sanitized_cache
    
    def save_sanitized(self, servers: Dict[str, List[Dict]]):
        """
        Save sanitized tool metadata to disk.
        
        Args:
            servers: Dict mapping server_name -> list of sanitized tool metadata dicts
        """
        self._ensure_dir()
        
        cache_data = {
            "version": self.CACHE_VERSION,
            "updated_at": datetime.now().isoformat(),
            "sanitized": True,
            "servers": servers,
        }
        
        try:
            with open(self.sanitized_cache_path, "w", encoding="utf-8") as f:
                json.dump(cache_data, f, indent=2, ensure_ascii=False)
            self._sanitized_cache = cache_data
            logger.info(f"Saved sanitized MCP tool cache: {len(servers)} servers")
        except Exception as e:
            logger.error(f"Failed to save sanitized cache: {e}")
    
    def get_all_sanitized_tools(self) -> Dict[str, List[Dict]]:
        """Get all sanitized cached tools, grouped by server."""
        cache = self.load_sanitized()
        return cache.get("servers", {})
    
    def has_sanitized_cache(self) -> bool:
        """Check if sanitized cache exists and has data."""
        cache = self.load_sanitized()
        return bool(cache.get("servers"))
    
    def clear_sanitized(self):
        """Clear the sanitized cache."""
        if self.sanitized_cache_path.exists():
            self.sanitized_cache_path.unlink()
        self._sanitized_cache = None
        logger.info("Sanitized MCP tool cache cleared")


# Global instance
_tool_cache: Optional[MCPToolCache] = None


def get_tool_cache() -> MCPToolCache:
    """Get global tool cache instance."""
    global _tool_cache
    if _tool_cache is None:
        _tool_cache = MCPToolCache()
    return _tool_cache


================================================
FILE: anytool/grounding/backends/mcp/tool_converter.py
================================================
"""
Tool converter for MCP.

This module provides utilities to convert MCP tools to BaseTool instances.
"""

import copy
from typing import Any, Dict
from mcp.types import Tool as MCPTool

from anytool.grounding.core.tool import BaseTool, RemoteTool
from anytool.grounding.core.types import BackendType, ToolSchema
from anytool.grounding.core.transport.connectors import BaseConnector
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


def _sanitize_mcp_schema(params: Dict[str, Any]) -> Dict[str, Any]:
    """
    Sanitize MCP tool schema to ensure Claude API compatibility (JSON Schema draft 2020-12).
    
    Fixes:
    - Empty schemas -> valid object schema
    - Missing required fields (type, properties, required)
    - Removes non-standard fields (title, examples, nullable, default, etc.)
    - Recursively cleans nested properties and items
    - Ensures every property has a valid type
    - Ensures top-level type is 'object' (Anthropic API requirement)
    """
    if not params:
        return {"type": "object", "properties": {}, "required": []}
    
    sanitized = copy.deepcopy(params)
    sanitized = _deep_sanitize(sanitized)
    
    # Anthropic API requires top-level type to be 'object'
    # If it's not an object, wrap the schema as a property of an object
    top_level_type = sanitized.get("type")
    if top_level_type and top_level_type != "object":
        logger.debug(f"[MCP_SCHEMA_SANITIZE] Wrapping non-object schema (type={top_level_type}) into object")
        wrapped = {
            "type": "object",
            "properties": {
                "value": sanitized  # The original schema becomes a property
            },
            "required": ["value"]  # Make it required
        }
        sanitized = wrapped
    
    return sanitized


def _deep_sanitize(schema: Dict[str, Any]) -> Dict[str, Any]:
    """
    Recursively sanitize a JSON schema to conform to JSON Schema draft 2020-12.
    Removes non-standard fields and ensures valid structure.
    """
    if not isinstance(schema, dict):
        return {"type": "string"}
    
    # Allowed top-level keys for Claude API compatibility
    allowed_keys = {
        "type", "properties", "required", "items", 
        "description", "enum", "const",
        "minimum", "maximum", "minLength", "maxLength",
        "minItems", "maxItems", "pattern",
        "additionalProperties", "anyOf", "oneOf", "allOf"
    }
    
    # Remove disallowed keys
    keys_to_remove = [k for k in schema if k not in allowed_keys]
    for k in keys_to_remove:
        schema.pop(k, None)
    
    # Ensure type exists
    if "type" not in schema:
        # Type is defined via anyOf/oneOf/allOf - don't add default type
        # These combination keywords define the type themselves
        if "anyOf" in schema or "oneOf" in schema or "allOf" in schema:
            pass  # Type is defined through combination keywords, do not add default type
        # Try to infer type
        elif "properties" in schema:
            schema["type"] = "object"
        elif "items" in schema:
            schema["type"] = "array"
        elif "enum" in schema:
            # For enum, try to infer from values
            enum_vals = schema.get("enum", [])
            if enum_vals and all(isinstance(v, str) for v in enum_vals):
                schema["type"] = "string"
            elif enum_vals and all(isinstance(v, (int, float)) for v in enum_vals):
                schema["type"] = "number"
            else:
                schema["type"] = "string"
        elif not schema:
            # Empty schema (e.g., only had $schema which was removed) -> no parameters needed
            schema["type"] = "object"
            schema["properties"] = {}
            schema["required"] = []
        else:
            schema["type"] = "object"
    
    # Handle object type
    if schema.get("type") == "object":
        if "properties" not in schema:
            schema["properties"] = {}
        if "required" not in schema:
            schema["required"] = []
        
        # Recursively sanitize properties
        if isinstance(schema.get("properties"), dict):
            for prop_name, prop_schema in list(schema["properties"].items()):
                if isinstance(prop_schema, dict):
                    schema["properties"][prop_name] = _deep_sanitize(prop_schema)
                else:
                    # Invalid property schema, replace with string
                    schema["properties"][prop_name] = {"type": "string"}
        
        # Sanitize additionalProperties if present
        if "additionalProperties" in schema and isinstance(schema["additionalProperties"], dict):
            schema["additionalProperties"] = _deep_sanitize(schema["additionalProperties"])
    
    # Handle array type
    elif schema.get("type") == "array":
        if "items" in schema:
            if isinstance(schema["items"], dict):
                schema["items"] = _deep_sanitize(schema["items"])
            elif isinstance(schema["items"], list):
                # Tuple validation - sanitize each item
                schema["items"] = [_deep_sanitize(item) if isinstance(item, dict) else {"type": "string"} for item in schema["items"]]
            else:
                schema["items"] = {"type": "string"}
        else:
            # Default items to string if not specified
            schema["items"] = {"type": "string"}
    
    # Handle anyOf/oneOf/allOf
    for combo_key in ["anyOf", "oneOf", "allOf"]:
        if combo_key in schema and isinstance(schema[combo_key], list):
            schema[combo_key] = [
                _deep_sanitize(sub) if isinstance(sub, dict) else {"type": "string"}
                for sub in schema[combo_key]
            ]
    
    return schema


def convert_mcp_tool_to_base_tool(
    mcp_tool: MCPTool, 
    connector: BaseConnector
) -> BaseTool:
    """
    Convert an MCP Tool to a BaseTool (RemoteTool) instance.
    
    This function extracts the tool schema from an MCP tool object and creates
    a RemoteTool that can be used within the grounding framework.
    
    Args:
        mcp_tool: MCP Tool object from the MCP SDK
        connector: Connector instance for communicating with the MCP server
        
    Returns:
        RemoteTool instance wrapping the MCP tool
    """
    # Extract tool metadata
    tool_name = mcp_tool.name
    tool_description = getattr(mcp_tool, 'description', None) or ""
    
    # Convert MCP input schema to our parameter schema format (with sanitization)
    input_schema: Dict[str, Any] = {}
    if hasattr(mcp_tool, 'inputSchema') and mcp_tool.inputSchema:
        input_schema = _sanitize_mcp_schema(mcp_tool.inputSchema)
    else:
        input_schema = {"type": "object", "properties": {}, "required": []}
    
    # Create ToolSchema
    schema = ToolSchema(
        name=tool_name,
        description=tool_description,
        parameters=input_schema,
        backend_type=BackendType.MCP,
    )
    
    # Create and return RemoteTool
    remote_tool = RemoteTool(
        connector=connector,
        remote_name=tool_name,
        schema=schema,
        backend=BackendType.MCP,
    )
    
    logger.debug(f"Converted MCP tool '{tool_name}' to RemoteTool")
    return remote_tool

================================================
FILE: anytool/grounding/backends/mcp/transport/connectors/__init__.py
================================================
"""
Connectors for various MCP transports.

This module provides interfaces for connecting to MCP implementations
through different transport mechanisms.
"""

from .base import MCPBaseConnector  # noqa: F401
from .http import HttpConnector  # noqa: F401
from .sandbox import SandboxConnector  # noqa: F401
from .stdio import StdioConnector  # noqa: F401
from .websocket import WebSocketConnector  # noqa: F401

__all__ = [
    "MCPBaseConnector",
    "StdioConnector",
    "HttpConnector",
    "WebSocketConnector",
    "SandboxConnector",
]


================================================
FILE: anytool/grounding/backends/mcp/transport/connectors/base.py
================================================
"""
Base connector for MCP implementations.

This module provides the base connector interface that all MCP connectors must implement.
"""

import asyncio
from abc import abstractmethod
from typing import Any

from mcp import ClientSession
from mcp.shared.exceptions import McpError
from mcp.types import CallToolResult, GetPromptResult, Prompt, ReadResourceResult, Resource, Tool

from anytool.grounding.core.transport.task_managers import BaseConnectionManager
from anytool.grounding.core.transport.connectors import BaseConnector
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)

# Default retry settings for tool calls
DEFAULT_TOOL_CALL_MAX_RETRIES = 3
DEFAULT_TOOL_CALL_RETRY_DELAY = 1.0


class MCPBaseConnector(BaseConnector[ClientSession]):
    """Base class for MCP connectors.

    This class defines the interface that all MCP connectors must implement.
    """

    def __init__(
        self, 
        connection_manager: BaseConnectionManager[ClientSession],
        tool_call_max_retries: int = DEFAULT_TOOL_CALL_MAX_RETRIES,
        tool_call_retry_delay: float = DEFAULT_TOOL_CALL_RETRY_DELAY,
    ):
        """Initialize base connector with common attributes.
        
        Args:
            connection_manager: The connection manager to use for the connection.
            tool_call_max_retries: Maximum number of retries for tool calls (default: 3)
            tool_call_retry_delay: Initial delay between retries in seconds (default: 1.0)
        """
        super().__init__(connection_manager)
        self.client_session: ClientSession | None = None
        self._tools: list[Tool] | None = None
        self._resources: list[Resource] | None = None
        self._prompts: list[Prompt] | None = None
        self.auto_reconnect = True  # Whether to automatically reconnect on connection loss (not configurable for now)
        self.tool_call_max_retries = tool_call_max_retries
        self.tool_call_retry_delay = tool_call_retry_delay

    @property
    @abstractmethod
    def public_identifier(self) -> str:
        """Get the identifier for the connector."""
        pass
    
    async def _get_streams_from_connection(self):
        """Get read and write streams from the connection. Override in subclasses if needed."""
        # Default implementation for most MCP connectors (stdio, HTTP)
        # Returns the connection directly as it should be a tuple of (read_stream, write_stream)
        return self._connection
    
    async def _after_connect(self) -> None:
        """Create ClientSession after connection is established.
        
        Some connectors (like WebSocket) don't use ClientSession and may override this method.
        """
        # Get streams from the connection
        streams = await self._get_streams_from_connection()
        
        if streams is None:
            # Some connectors (like WebSocket) don't use ClientSession
            # They should override this method to set up their own resources
            logger.debug("No streams returned, ClientSession creation skipped")
            return
        
        if isinstance(streams, tuple) and len(streams) == 2:
            read_stream, write_stream = streams
            # Create the client session
            self.client_session = ClientSession(read_stream, write_stream, sampling_callback=None)
            await self.client_session.__aenter__()
            logger.debug("MCP ClientSession created successfully")
        else:
            raise RuntimeError(f"Invalid streams format: expected tuple of 2 elements, got {type(streams)}")

    async def _before_disconnect(self) -> None:
        """Clean up MCP-specific resources before disconnection."""
        errors = []

        # Close the client session
        if self.client_session:
            try:
                logger.debug("Closing MCP client session")
                await self.client_session.__aexit__(None, None, None)
            except Exception as e:
                error_msg = f"Error closing client session: {e}"
                logger.warning(error_msg)
                errors.append(error_msg)
            finally:
                self.client_session = None

        # Reset tools, resources, and prompts
        self._tools = None
        self._resources = None
        self._prompts = None

        if errors:
            logger.warning(f"Encountered {len(errors)} errors during MCP resource cleanup")
    
    async def _cleanup_on_connect_failure(self) -> None:
        """Override to add MCP-specific cleanup on connection failure."""
        # Clean up client session if it was created
        if self.client_session:
            try:
                await self.client_session.__aexit__(None, None, None)
            except Exception:
                pass
            finally:
                self.client_session = None
        
        # Call parent cleanup
        await super()._cleanup_on_connect_failure()

    async def initialize(self) -> dict[str, Any]:
        """Initialize the MCP session and return session information."""
        if not self.client_session:
            raise RuntimeError("MCP client is not connected")

        logger.debug("Initializing MCP session")

        # Initialize the session
        result = await self.client_session.initialize()

        server_capabilities = result.capabilities

        if server_capabilities.tools:
            # Get available tools
            tools_result = await self.list_tools()
            self._tools = tools_result or []
        else:
            self._tools = []

        if server_capabilities.resources:
            # Get available resources
            resources_result = await self.list_resources()
            self._resources = resources_result or []
        else:
            self._resources = []

        if server_capabilities.prompts:
            # Get available prompts
            prompts_result = await self.list_prompts()
            self._prompts = prompts_result or []
        else:
            self._prompts = []

        logger.debug(
            f"MCP session initialized with {len(self._tools)} tools, "
            f"{len(self._resources)} resources, "
            f"and {len(self._prompts)} prompts"
        )

        return result

    @property
    def tools(self) -> list[Tool]:
        """Get the list of available tools."""
        if self._tools is None:
            raise RuntimeError("MCP client is not initialized")
        return self._tools

    @property
    def resources(self) -> list[Resource]:
        """Get the list of available resources."""
        if self._resources is None:
            raise RuntimeError("MCP client is not initialized")
        return self._resources

    @property
    def prompts(self) -> list[Prompt]:
        """Get the list of available prompts."""
        if self._prompts is None:
            raise RuntimeError("MCP client is not initialized")
        return self._prompts

    @property
    def is_connected(self) -> bool:
        """Check if the connector is actually connected and the connection is alive.

        This property checks not only the connected flag but also verifies that
        the client session exists and the underlying connection is still active.

        Returns:
            True if the connector is connected and the connection is alive, False otherwise.
        """
        # First check the basic connected flag
        if not self._connected:
            return False

        # Check if we have a client session
        if not self.client_session:
            self._connected = False
            return False

        # Check if connection manager task is still running (if applicable)
        if self._connection_manager and hasattr(self._connection_manager, "_task"):
            task = self._connection_manager._task
            if task and task.done():
                logger.debug("Connection manager task is done, marking as disconnected")
                self._connected = False
                return False

        return True

    async def _ensure_connected(self) -> None:
        """Ensure the connector is connected, reconnecting if necessary.

        Raises:
            RuntimeError: If connection cannot be established and auto_reconnect is False.
        """
        if not self.client_session:
            raise RuntimeError("MCP client is not connected")

        if not self.is_connected:
            if self.auto_reconnect:
                logger.debug("Connection lost, attempting to reconnect...")
                try:
                    await self.connect()
                    logger.debug("Reconnection successful")
                except Exception as e:
                    raise RuntimeError(f"Failed to reconnect to MCP server: {e}") from e
            else:
                raise RuntimeError(
                    "Connection to MCP server has been lost. Auto-reconnection is disabled. Please reconnect manually."
                )

    async def call_tool(self, name: str, arguments: dict[str, Any]) -> CallToolResult:
        """Call an MCP tool with automatic reconnection handling and retry logic.

        Args:
            name: The name of the tool to call.
            arguments: The arguments to pass to the tool.

        Returns:
            The result of the tool call.

        Raises:
            RuntimeError: If the connection is lost and cannot be reestablished.
            Exception: If the tool call fails after all retries.
        """
        last_error: Exception | None = None
        
        for attempt in range(self.tool_call_max_retries):
            # Ensure we're connected
            await self._ensure_connected()

            logger.debug(f"Calling tool '{name}' with arguments: {arguments} (attempt {attempt + 1}/{self.tool_call_max_retries})")
            try:
                result = await self.client_session.call_tool(name, arguments)
                logger.debug(f"Tool '{name}' called successfully")
                return result
            except Exception as e:
                last_error = e
                error_str = str(e).lower()
                
                # Check if the error might be due to connection loss
                if not self.is_connected:
                    logger.warning(f"Tool call '{name}' failed due to connection loss: {e}")
                    # Try to reconnect on next iteration
                    continue
                
                # Check for retryable HTTP errors (400, 500, 502, 503, 504)
                is_retryable = any(code in error_str for code in ['400', '500', '502', '503', '504', 'bad request', 'internal server error', 'service unavailable', 'gateway timeout'])
                
                if is_retryable and attempt < self.tool_call_max_retries - 1:
                    delay = self.tool_call_retry_delay * (2 ** attempt)  # Exponential backoff
                    logger.warning(
                        f"Tool call '{name}' failed with retryable error: {e}, "
                        f"retrying in {delay:.1f}s (attempt {attempt + 1}/{self.tool_call_max_retries})"
                    )
                    await asyncio.sleep(delay)
                    continue
                
                # Non-retryable error or max retries reached, re-raise
                raise
        
        # All retries exhausted
        error_msg = f"Tool call '{name}' failed after {self.tool_call_max_retries} retries"
        logger.error(error_msg)
        raise RuntimeError(error_msg) from last_error

    async def list_tools(self) -> list[Tool]:
        """List all available tools from the MCP implementation."""

        # Ensure we're connected
        await self._ensure_connected()

        logger.debug("Listing tools")
        try:
            result = await self.client_session.list_tools()
            return result.tools
        except McpError as e:
            logger.error(f"Error listing tools: {e}")
            return []

    async def list_resources(self) -> list[Resource]:
        """List all available resources from the MCP implementation."""
        # Ensure we're connected
        await self._ensure_connected()

        logger.debug("Listing resources")
        try:
            result = await self.client_session.list_resources()
            return result.resources
        except McpError as e:
            logger.error(f"Error listing resources: {e}")
            return []

    async def read_resource(self, uri: str) -> ReadResourceResult:
        """Read a resource by URI."""
        if not self.client_session:
            raise RuntimeError("MCP client is not connected")

        logger.debug(f"Reading resource: {uri}")
        result = await self.client_session.read_resource(uri)
        return result

    async def list_prompts(self) -> list[Prompt]:
        """List all available prompts from the MCP implementation."""
        # Ensure we're connected
        await self._ensure_connected()

        logger.debug("Listing prompts")
        try:
            result = await self.client_session.list_prompts()
            return result.prompts
        except McpError as e:
            logger.error(f"Error listing prompts: {e}")
            return []

    async def get_prompt(self, name: str, arguments: dict[str, Any] | None = None) -> GetPromptResult:
        """Get a prompt by name."""
        # Ensure we're connected
        await self._ensure_connected()

        logger.debug(f"Getting prompt: {name}")
        result = await self.client_session.get_prompt(name, arguments)
        return result

    async def request(self, method: str, params: dict[str, Any] | None = None) -> Any:
        """Send a raw request to the MCP implementation."""
        # Ensure we're connected
        await self._ensure_connected()

        logger.debug(f"Sending request: {method} with params: {params}")
        return await self.client_session.request({"method": method, "params": params or {}})

    async def invoke(self, name: str, params: dict[str, Any]) -> Any:
        await self._ensure_connected()

        if not name.startswith("__"):
            return await self.call_tool(name, params)

        if name == "__read_resource__":
            return await self.read_resource(params["uri"])
        if name == "__list_prompts__":
            return await self.list_prompts()
        if name == "__get_prompt__":
            return await self.get_prompt(params["name"], params.get("args"))

        raise ValueError(f"Unsupported MCP invoke name: {name}")

================================================
FILE: anytool/grounding/backends/mcp/transport/connectors/http.py
================================================
"""
HTTP connector for MCP implementations.

This module provides a connector for communicating with MCP implementations
through HTTP APIs with SSE, Streamable HTTP, or simple JSON-RPC for transport.
"""

import asyncio
import anyio
import httpx
from typing import Any, Dict, List
from mcp import ClientSession
from mcp.types import (
    CallToolResult,
    TextContent,
    ImageContent,
    EmbeddedResource,
    Tool,
    Resource,
    Prompt,
    GetPromptResult,
    ReadResourceResult,
)

from anytool.utils.logging import Logger
from anytool.grounding.core.transport.task_managers.base import BaseConnectionManager
from anytool.grounding.backends.mcp.transport.task_managers import SseConnectionManager, StreamableHttpConnectionManager
from anytool.grounding.backends.mcp.transport.connectors.base import MCPBaseConnector, DEFAULT_TOOL_CALL_MAX_RETRIES, DEFAULT_TOOL_CALL_RETRY_DELAY

logger = Logger.get_logger(__name__)


class HttpConnector(MCPBaseConnector):
    """Connector for MCP implementations using HTTP transport.

    This connector uses HTTP/SSE or streamable HTTP to communicate with remote MCP implementations,
    using a connection manager to handle the proper lifecycle management.
    """

    def __init__(
        self,
        base_url: str,
        auth_token: str | None = None,
        headers: dict[str, str] | None = None,
        timeout: float = 5,
        sse_read_timeout: float = 60 * 5,
        tool_call_max_retries: int = DEFAULT_TOOL_CALL_MAX_RETRIES,
        tool_call_retry_delay: float = DEFAULT_TOOL_CALL_RETRY_DELAY,
    ):
        """Initialize a new HTTP connector.

        Args:
            base_url: The base URL of the MCP HTTP API.
            auth_token: Optional authentication token.
            headers: Optional additional headers.
            timeout: Timeout for HTTP operations in seconds.
            sse_read_timeout: Timeout for SSE read operations in seconds.
            tool_call_max_retries: Maximum number of retries for tool calls (default: 3)
            tool_call_retry_delay: Initial delay between retries in seconds (default: 1.0)
        """
        self.base_url = base_url.rstrip("/")
        self.auth_token = auth_token
        self.headers = headers or {}
        if auth_token:
            self.headers["Authorization"] = f"Bearer {auth_token}"
        self.timeout = timeout
        self.sse_read_timeout = sse_read_timeout
        
        # JSON-RPC HTTP mode fields
        self._use_jsonrpc = False
        self._jsonrpc_client: httpx.AsyncClient | None = None
        self._jsonrpc_request_id = 0
        
        # Create a placeholder connection manager (will be set up later in connect())
        # We use a placeholder here because the actual transport type (SSE vs Streamable HTTP)
        # can only be determined at runtime through server negotiation as per MCP specification
        from anytool.grounding.core.transport.task_managers import PlaceholderConnectionManager
        connection_manager = PlaceholderConnectionManager()
        super().__init__(
            connection_manager, 
            tool_call_max_retries=tool_call_max_retries,
            tool_call_retry_delay=tool_call_retry_delay,
        )

    async def connect(self) -> None:
        """Create the underlying session/connection.
        
        For JSON-RPC mode, we don't use a connection manager.
        """
        if self._connected:
            return
        
        try:
            # Hook: before connection - this sets up transport type
            await self._before_connect()
            
            if self._use_jsonrpc:
                # JSON-RPC mode doesn't use connection manager
                # Just call _after_connect to set up the HTTP client
                await self._after_connect()
                self._connected = True
            else:
                # Use normal connection flow with connection manager
                # If _before_connect() already established a connection, reuse it
                if self._connection is None:
                    self._connection = await self._connection_manager.start()
                await self._after_connect()
                self._connected = True
        except Exception:
            await self._cleanup_on_connect_failure()
            raise

    async def disconnect(self) -> None:
        """Close the session/connection and reset state."""
        if not self._connected:
            return
        
        # Hook: before disconnection
        await self._before_disconnect()
        
        if not self._use_jsonrpc:
            # Stop the connection manager only for non-JSON-RPC modes
            if self._connection_manager:
                await self._connection_manager.stop()
                self._connection = None
        
        # Hook: after disconnection
        await self._after_disconnect()
        
        self._connected = False

    async def _before_connect(self) -> None:
        """Negotiate transport type and set up the appropriate connection manager.
        
        Tries transports in order:
        1. Streamable HTTP (new MCP transport)
        2. SSE (legacy MCP transport)
        3. Simple JSON-RPC HTTP (for custom servers)
        
        This implements backwards compatibility per MCP specification.
        """
        self.transport_type = None
        self._use_jsonrpc = False
        connection_manager = None
        streamable_error = None
        sse_error = None

        # First, try the new streamable HTTP transport
        try:
            logger.debug(f"Attempting streamable HTTP connection to: {self.base_url}")
            connection_manager = StreamableHttpConnectionManager(
                self.base_url, self.headers, self.timeout, self.sse_read_timeout
            )

            # Test the connection by starting it with built-in timeout
            read_stream, write_stream = await connection_manager.start(timeout=self.timeout)

            # Create and verify ClientSession
            test_client = ClientSession(read_stream, write_stream, sampling_callback=None)
            
            # Add timeout to __aenter__ - use asyncio.wait_for instead of anyio.fail_after
            # to avoid cancel scope conflicts with background tasks
            try:
                await asyncio.wait_for(test_client.__aenter__(), timeout=self.timeout)
            except asyncio.TimeoutError:
                raise TimeoutError(f"ClientSession enter timed out after {self.timeout}s")

            try:
                # Add timeout to initialize() using asyncio.wait_for to prevent hanging
                try:
                    await asyncio.wait_for(test_client.initialize(), timeout=self.timeout)
                except asyncio.TimeoutError:
                    raise TimeoutError(f"initialize() timed out after {self.timeout}s")
                    
                try:
                    await asyncio.wait_for(test_client.list_tools(), timeout=self.timeout)
                except asyncio.TimeoutError:
                    raise TimeoutError(f"list_tools() timed out after {self.timeout}s")
                
                # SUCCESS! Keep the client session (don't close it, closing destroys the streams)
                # Store it directly as the client_session for later use
                self.transport_type = "streamable HTTP"
                self._connection_manager = connection_manager
                self._connection = connection_manager.get_streams()
                self.client_session = test_client  # Reuse the working session
                logger.debug("Streamable HTTP transport selected")
                return
            except TimeoutError:
                try:
                    await asyncio.wait_for(test_client.__aexit__(None, None, None), timeout=2)
                except (asyncio.TimeoutError, Exception):
                    pass
                raise
            except Exception as init_error:
                # Clean up the test client only on error
                try:
                    await asyncio.wait_for(test_client.__aexit__(None, None, None), timeout=2)
                except (asyncio.TimeoutError, Exception):
                    pass
                raise init_error

        except Exception as e:
            streamable_error = e
            logger.debug(f"Streamable HTTP failed: {e}")

            # Clean up the failed connection manager
            if connection_manager:
                try:
                    await asyncio.wait_for(connection_manager.stop(), timeout=2)
                except (asyncio.TimeoutError, Exception):
                    pass

        # Try SSE fallback
        try:
            logger.debug(f"Attempting SSE fallback connection to: {self.base_url}")
            connection_manager = SseConnectionManager(
                self.base_url, self.headers, self.timeout, self.sse_read_timeout
            )

            # Test the connection by starting it with built-in timeout
            read_stream, write_stream = await connection_manager.start(timeout=self.timeout)

            # Create and verify ClientSession
            test_client = ClientSession(read_stream, write_stream, sampling_callback=None)
            
            # Add timeout to __aenter__ - use asyncio.wait_for instead of anyio.fail_after
            # to avoid cancel scope conflicts with background tasks
            try:
                await asyncio.wait_for(test_client.__aenter__(), timeout=self.timeout)
            except asyncio.TimeoutError:
                raise TimeoutError(f"ClientSession enter timed out after {self.timeout}s")

            try:
                try:
                    await asyncio.wait_for(test_client.initialize(), timeout=self.timeout)
                except asyncio.TimeoutError:
                    raise TimeoutError(f"initialize() timed out after {self.timeout}s")
                
                try:
                    await asyncio.wait_for(test_client.list_tools(), timeout=self.timeout)
                except asyncio.TimeoutError:
                    raise TimeoutError(f"list_tools() timed out after {self.timeout}s")
                
                # SUCCESS! Keep the client session (don't close it, closing destroys the streams)
                # Store it directly as the client_session for later use
                self.transport_type = "SSE"
                self._connection_manager = connection_manager
                self._connection = connection_manager.get_streams()
                self.client_session = test_client  # Reuse the working session
                logger.debug("SSE transport selected")
                return
            except TimeoutError:
                try:
                    await asyncio.wait_for(test_client.__aexit__(None, None, None), timeout=2)
                except (asyncio.TimeoutError, Exception):
                    pass
                raise
            except Exception as init_error:
                # Clean up the test client only on error
                try:
                    await asyncio.wait_for(test_client.__aexit__(None, None, None), timeout=2)
                except (asyncio.TimeoutError, Exception):
                    pass
                raise init_error

        except Exception as e:
            sse_error = e
            logger.debug(f"SSE failed: {e}")

            # Clean up the failed connection manager
            if connection_manager:
                try:
                    await asyncio.wait_for(connection_manager.stop(), timeout=2)
                except (asyncio.TimeoutError, Exception):
                    pass

        # Both MCP transports failed, try simple JSON-RPC HTTP as last resort
        # This is useful for custom MCP servers that don't implement proper MCP transports
        logger.debug(f"Attempting JSON-RPC HTTP fallback to: {self.base_url}")
        try:
            # Test JSON-RPC connection
            await self._try_jsonrpc_connection()
            
            self.transport_type = "JSON-RPC HTTP"
            self._use_jsonrpc = True
            logger.info(f"JSON-RPC HTTP transport selected for: {self.base_url}")
            return
            
        except Exception as jsonrpc_error:
            # All transports failed
            logger.error(
                f"All transport methods failed for {self.base_url}. "
                f"Streamable HTTP: {streamable_error}, SSE: {sse_error}, JSON-RPC: {jsonrpc_error}"
            )
            # Raise the most relevant error - prefer the original streamable error
            raise streamable_error or sse_error or jsonrpc_error

    async def _try_jsonrpc_connection(self) -> None:
        """Test JSON-RPC HTTP connection by sending an initialize request."""
        headers = {**self.headers, "Content-Type": "application/json"}
        
        async with httpx.AsyncClient(timeout=httpx.Timeout(self.timeout), headers=headers) as client:
            payload = {
                "jsonrpc": "2.0",
                "id": 1,
                "method": "initialize",
                "params": {
                    "protocolVersion": "2024-11-05",
                    "capabilities": {},
                    "clientInfo": {"name": "AnyTool", "version": "1.0.0"},
                }
            }
            
            response = await client.post(self.base_url, json=payload)
            response.raise_for_status()
            
            data = response.json()
            
            # Check for JSON-RPC error
            if "error" in data:
                error = data["error"]
                raise RuntimeError(f"JSON-RPC error: {error.get('message', str(error))}")
            
            # Success - server supports JSON-RPC
            logger.debug(f"JSON-RPC test succeeded: {data.get('result', {})}")

    async def _after_connect(self) -> None:
        """Create ClientSession (or set up JSON-RPC client) and log success."""
        if self._use_jsonrpc:
            # Set up JSON-RPC HTTP client
            headers = {**self.headers, "Content-Type": "application/json"}
            self._jsonrpc_client = httpx.AsyncClient(
                timeout=httpx.Timeout(self.timeout),
                headers=headers,
            )
            logger.debug(f"JSON-RPC HTTP client set up for: {self.base_url}")
        else:
            # Skip creating ClientSession if _before_connect() already created one
            if self.client_session is None:
                await super()._after_connect()
            else:
                logger.debug("Reusing ClientSession from _before_connect()")
        
        logger.debug(f"Successfully connected to MCP implementation via {self.transport_type}: {self.base_url}")

    async def _before_disconnect(self) -> None:
        """Clean up resources before disconnection."""
        # Clean up JSON-RPC client if used
        if self._jsonrpc_client:
            try:
                await self._jsonrpc_client.aclose()
            except Exception as e:
                logger.warning(f"Error closing JSON-RPC client: {e}")
            finally:
                self._jsonrpc_client = None
        
        # Call parent cleanup for MCP resources
        await super()._before_disconnect()

    @property
    def public_identifier(self) -> str:
        """Get the identifier for the connector."""
        return {"type": self.transport_type, "base_url": self.base_url}

    # =====================
    # JSON-RPC HTTP Methods
    # =====================

    def _next_jsonrpc_id(self) -> int:
        """Get next JSON-RPC request ID."""
        self._jsonrpc_request_id += 1
        return self._jsonrpc_request_id

    async def _send_jsonrpc_request(
        self, 
        method: str, 
        params: Dict[str, Any] = None,
        max_retries: int = 3,
        retry_delay: float = 1.0,
    ) -> Any:
        """Send a JSON-RPC request and return the result.
        
        Args:
            method: The JSON-RPC method name (e.g., "tools/list", "tools/call")
            params: The method parameters
            max_retries: Maximum number of retries for transient errors (400, 503, etc.)
            retry_delay: Initial delay between retries (doubles each retry)
            
        Returns:
            The result field from the JSON-RPC response
        """
        if not self._jsonrpc_client:
            raise RuntimeError("JSON-RPC client not initialized")

        last_error = None
        
        for attempt in range(max_retries):
            request_id = self._next_jsonrpc_id()
            payload = {
                "jsonrpc": "2.0",
                "id": request_id,
                "method": method,
                "params": params or {},
            }

            logger.debug(f"Sending JSON-RPC request: {method} (id={request_id}, attempt {attempt + 1}/{max_retries})")
            
            try:
                response = await self._jsonrpc_client.post(self.base_url, json=payload)
                response.raise_for_status()
                
                data = response.json()
                
                if "error" in data:
                    error = data["error"]
                    error_msg = error.get("message", str(error))
                    raise RuntimeError(f"JSON-RPC error: {error_msg}")
                
                return data.get("result", {})
                
            except httpx.HTTPStatusError as e:
                last_error = e
                status_code = e.response.status_code
                
                # Retry on 400 (Bad Request) and 5xx errors
                # 400 can happen when MCP server is temporarily not ready
                if status_code in (400, 500, 502, 503, 504) and attempt < max_retries - 1:
                    delay = retry_delay * (2 ** attempt)
                    logger.warning(
                        f"HTTP {status_code} error on {method}, retrying in {delay:.1f}s "
                        f"(attempt {attempt + 1}/{max_retries})"
                    )
                    await asyncio.sleep(delay)
                    continue
                    
                raise RuntimeError(f"HTTP error: {status_code}") from e
                
            except httpx.RequestError as e:
                last_error = e
                # Retry on connection errors
                if attempt < max_retries - 1:
                    delay = retry_delay * (2 ** attempt)
                    logger.warning(
                        f"Request error on {method}: {e}, retrying in {delay:.1f}s "
                        f"(attempt {attempt + 1}/{max_retries})"
                    )
                    await asyncio.sleep(delay)
                    continue
                    
                raise RuntimeError(f"Request error: {e}") from e
        
        # Should not reach here, but just in case
        raise RuntimeError(f"Max retries exceeded for {method}") from last_error

    def _parse_tools_from_json(self, tools_data: List[Dict]) -> List[Tool]:
        """Parse tool data into Tool objects."""
        tools = []
        for tool_dict in tools_data:
            try:
                tool = Tool(
                    name=tool_dict.get("name", ""),
                    description=tool_dict.get("description", ""),
                    inputSchema=tool_dict.get("inputSchema", {}),
                )
                tools.append(tool)
            except Exception as e:
                logger.warning(f"Failed to parse tool: {e}")
        return tools

    def _parse_resources_from_json(self, resources_data: List[Dict]) -> List[Resource]:
        """Parse resource data into Resource objects."""
        resources = []
        for res_dict in resources_data:
            try:
                resource = Resource(
                    uri=res_dict.get("uri", ""),
                    name=res_dict.get("name", ""),
                    description=res_dict.get("description"),
                    mimeType=res_dict.get("mimeType"),
                )
                resources.append(resource)
            except Exception as e:
                logger.warning(f"Failed to parse resource: {e}")
        return resources

    def _parse_prompts_from_json(self, prompts_data: List[Dict]) -> List[Prompt]:
        """Parse prompt data into Prompt objects."""
        prompts = []
        for prompt_dict in prompts_data:
            try:
                prompt = Prompt(
                    name=prompt_dict.get("name", ""),
                    description=prompt_dict.get("description"),
                    arguments=prompt_dict.get("arguments"),
                )
                prompts.append(prompt)
            except Exception as e:
                logger.warning(f"Failed to parse prompt: {e}")
        return prompts

    # =====================
    # Override MCP Methods for JSON-RPC Support
    # =====================

    async def initialize(self) -> Dict[str, Any]:
        """Initialize the MCP session."""
        if not self._use_jsonrpc:
            return await super().initialize()
        
        # JSON-RPC mode
        logger.debug("Initializing JSON-RPC HTTP MCP session")
        
        result = await self._send_jsonrpc_request("initialize", {
            "protocolVersion": "2024-11-05",
            "capabilities": {},
            "clientInfo": {"name": "AnyTool", "version": "1.0.0"},
        })
        
        capabilities = result.get("capabilities", {})
        
        # List tools
        if capabilities.get("tools"):
            try:
                tools_result = await self._send_jsonrpc_request("tools/list", {})
                self._tools = self._parse_tools_from_json(tools_result.get("tools", []))
            except Exception:
                self._tools = []
        else:
            # Try anyway - some servers don't advertise capabilities correctly
            try:
                tools_result = await self._send_jsonrpc_request("tools/list", {})
                self._tools = self._parse_tools_from_json(tools_result.get("tools", []))
            except Exception:
                self._tools = []
        
        # List resources
        if capabilities.get("resources"):
            try:
                resources_result = await self._send_jsonrpc_request("resources/list", {})
                self._resources = self._parse_resources_from_json(resources_result.get("resources", []))
            except Exception:
                self._resources = []
        else:
            self._resources = []
        
        # List prompts
        if capabilities.get("prompts"):
            try:
                prompts_result = await self._send_jsonrpc_request("prompts/list", {})
                self._prompts = self._parse_prompts_from_json(prompts_result.get("prompts", []))
            except Exception:
                self._prompts = []
        else:
            self._prompts = []
        
        logger.info(
            f"JSON-RPC HTTP MCP session initialized with {len(self._tools)} tools, "
            f"{len(self._resources)} resources, {len(self._prompts)} prompts"
        )
        
        return result

    @property
    def is_connected(self) -> bool:
        """Check if the connector is connected."""
        if self._use_jsonrpc:
            return self._connected and self._jsonrpc_client is not None
        return super().is_connected

    async def _ensure_connected(self) -> None:
        """Ensure the connector is connected."""
        if self._use_jsonrpc:
            if not self._connected or not self._jsonrpc_client:
                raise RuntimeError("JSON-RPC HTTP connector is not connected")
        else:
            await super()._ensure_connected()

    async def list_tools(self) -> List[Tool]:
        """List all available tools."""
        if not self._use_jsonrpc:
            return await super().list_tools()
        
        await self._ensure_connected()
        try:
            tools_result = await self._send_jsonrpc_request("tools/list", {})
            self._tools = self._parse_tools_from_json(tools_result.get("tools", []))
            return self._tools
        except Exception as e:
            logger.error(f"Error listing tools: {e}")
            return []

    async def call_tool(self, name: str, arguments: Dict[str, Any]) -> CallToolResult:
        """Call an MCP tool."""
        if not self._use_jsonrpc:
            return await super().call_tool(name, arguments)
        
        await self._ensure_connected()
        logger.debug(f"Calling tool '{name}' with arguments: {arguments}")
        
        result = await self._send_jsonrpc_request("tools/call", {
            "name": name,
            "arguments": arguments,
        })
        
        # Parse the result into CallToolResult
        content = []
        for item in result.get("content", []):
            item_type = item.get("type", "text")
            if item_type == "text":
                content.append(TextContent(type="text", text=item.get("text", "")))
            elif item_type == "image":
                content.append(ImageContent(
                    type="image",
                    data=item.get("data", ""),
                    mimeType=item.get("mimeType", "image/png"),
                ))
            elif item_type == "resource":
                content.append(EmbeddedResource(
                    type="resource",
                    resource=item.get("resource", {}),
                ))
        
        if not content and result:
            content.append(TextContent(type="text", text=str(result)))
        
        return CallToolResult(
            content=content,
            isError=result.get("isError", False),
        )

    async def list_resources(self) -> List[Resource]:
        """List all available resources."""
        if not self._use_jsonrpc:
            return await super().list_resources()
        
        await self._ensure_connected()
        try:
            resources_result = await self._send_jsonrpc_request("resources/list", {})
            self._resources = self._parse_resources_from_json(resources_result.get("resources", []))
            return self._resources
        except Exception as e:
            logger.error(f"Error listing resources: {e}")
            return []

    async def read_resource(self, uri: str) -> ReadResourceResult:
        """Read a resource by URI."""
        if not self._use_jsonrpc:
            return await super().read_resource(uri)
        
        await self._ensure_connected()
        result = await self._send_jsonrpc_request("resources/read", {"uri": uri})
        return ReadResourceResult(**result)

    async def list_prompts(self) -> List[Prompt]:
        """List all available prompts."""
        if not self._use_jsonrpc:
            return await super().list_prompts()
        
        await self._ensure_connected()
        try:
            prompts_result = await self._send_jsonrpc_request("prompts/list", {})
            self._prompts = self._parse_prompts_from_json(prompts_result.get("prompts", []))
            return self._prompts
        except Exception as e:
            logger.error(f"Error listing prompts: {e}")
            return []

    async def get_prompt(self, name: str, arguments: Dict[str, Any] | None = None) -> GetPromptResult:
        """Get a prompt by name."""
        if not self._use_jsonrpc:
            return await super().get_prompt(name, arguments)
        
        await self._ensure_connected()
        result = await self._send_jsonrpc_request("prompts/get", {
            "name": name,
            "arguments": arguments or {},
        })
        return GetPromptResult(**result)

    async def request(self, method: str, params: Dict[str, Any] | None = None) -> Any:
        """Send a raw request to the MCP implementation."""
        if not self._use_jsonrpc:
            return await super().request(method, params)
        
        await self._ensure_connected()
        return await self._send_jsonrpc_request(method, params or {})

    async def invoke(self, name: str, params: Dict[str, Any]) -> Any:
        """Invoke a tool or special method."""
        if not self._use_jsonrpc:
            return await super().invoke(name, params)
        
        await self._ensure_connected()

        if not name.startswith("__"):
            return await self.call_tool(name, params)

        if name == "__read_resource__":
            return await self.read_resource(params["uri"])
        if name == "__list_prompts__":
            return await self.list_prompts()
        if name == "__get_prompt__":
            return await self.get_prompt(params["name"], params.get("args"))

        raise ValueError(f"Unsupported MCP invoke name: {name}")


================================================
FILE: anytool/grounding/backends/mcp/transport/connectors/sandbox.py
================================================
"""
Sandbox connector for MCP implementations.

This module provides a connector for communicating with MCP implementations
that are executed inside a sandbox environment (supports any BaseSandbox implementation).
"""

import asyncio
import sys
import time

import aiohttp
from mcp import ClientSession

from anytool.utils.logging import Logger
from anytool.grounding.backends.mcp.transport.task_managers import SseConnectionManager
from anytool.grounding.core.security import BaseSandbox
from anytool.grounding.backends.mcp.transport.connectors.base import MCPBaseConnector

logger = Logger.get_logger(__name__)


class SandboxConnector(MCPBaseConnector):
    """Connector for MCP implementations running in a sandbox environment.

    This connector runs a user-defined stdio command within a sandbox environment
    through a BaseSandbox implementation (e.g., E2BSandbox), potentially wrapped 
    by a utility like 'supergateway' to expose its stdio.
    """

    def __init__(
        self,
        sandbox: BaseSandbox,
        command: str,
        args: list[str],
        env: dict[str, str] | None = None,
        supergateway_command: str = "npx -y supergateway",
        port: int = 3000,
        timeout: float = 5,
        sse_read_timeout: float = 60 * 5,
    ):
        """Initialize a new sandbox connector.

        Args:
            sandbox: A BaseSandbox implementation (e.g., E2BSandbox) to run commands in.
            command: The user's MCP server command to execute in the sandbox.
            args: Command line arguments for the user's MCP server command.
            env: Environment variables for the user's MCP server command.
            supergateway_command: Command to run supergateway (default: "npx -y supergateway").
            port: Port number for the sandbox server (default: 3000).
            timeout: Timeout for the sandbox process in seconds.
            sse_read_timeout: Timeout for the SSE connection in seconds.
        """
        # Store user command configuration
        self.user_command = command
        self.user_args = args or []
        self.user_env = env or {}
        self.port = port
        
        # Create a placeholder connection manager (will be set up in connect())
        # We need the sandbox to start first to get the base_url, so we can't create
        # the real SseConnectionManager until connect() is called
        from anytool.grounding.core.transport.task_managers import PlaceholderConnectionManager
        connection_manager = PlaceholderConnectionManager()
        super().__init__(connection_manager)

        # Sandbox configuration
        self._sandbox = sandbox
        self.supergateway_cmd_parts = supergateway_command
        
        # Runtime state
        self.process = None
        self.client_session: ClientSession | None = None
        self.errlog = sys.stderr
        self.base_url: str | None = None
        self._connected = False
        self._connection_manager: SseConnectionManager | None = None

        # SSE connection parameters
        self.headers = {}
        self.timeout = timeout
        self.sse_read_timeout = sse_read_timeout

        self.stdout_lines: list[str] = []
        self.stderr_lines: list[str] = []
        self._server_ready = asyncio.Event()

    def _handle_stdout(self, data: str) -> None:
        """Handle stdout data from the sandbox process."""
        self.stdout_lines.append(data)
        logger.debug(f"[SANDBOX STDOUT] {data}", end="", flush=True)

    def _handle_stderr(self, data: str) -> None:
        """Handle stderr data from the sandbox process."""
        self.stderr_lines.append(data)
        logger.debug(f"[SANDBOX STDERR] {data}", file=self.errlog, end="", flush=True)

    async def wait_for_server_response(self, base_url: str, timeout: int = 30) -> bool:
        """Wait for the server to respond to HTTP requests.
        
        Args:
            base_url: The base URL to check for server readiness
            timeout: Maximum time to wait in seconds
            
        Returns:
            True if server is responding, raises TimeoutError otherwise
        """
        logger.info(f"Waiting for server at {base_url} to respond...")
        sys.stdout.flush()

        start_time = time.time()
        ping_url = f"{base_url}/sse"

        # Try to connect to the server
        while time.time() - start_time < timeout:
            try:
                async with aiohttp.ClientSession() as session:
                    try:
                        # First try the endpoint
                        async with session.get(ping_url, timeout=2) as response:
                            if response.status == 200:
                                elapsed = time.time() - start_time
                                logger.info(f"Server is ready! SSE endpoint responded with 200 after {elapsed:.1f}s")
                                return True
                    except Exception:
                        # If sse endpoint doesn't work, try the base URL
                        async with session.get(base_url, timeout=2) as response:
                            if response.status < 500:  # Accept any non-server error
                                elapsed = time.time() - start_time
                                logger.info(
                                    f"Server is ready! Base URL responded with {response.status} after {elapsed:.1f}s"
                                )
                                return True
            except Exception:
                # Wait a bit before trying again
                await asyncio.sleep(0.5)
                continue

            # If we get here, the request failed
            await asyncio.sleep(0.5)

            # Log status every 5 seconds
            elapsed = time.time() - start_time
            if int(elapsed) % 5 == 0:
                logger.info(f"Still waiting for server to respond... ({elapsed:.1f}s elapsed)")
                sys.stdout.flush()

        # If we get here, we timed out
        raise TimeoutError(f"Timeout waiting for server to respond (waited {timeout} seconds)")

    async def _before_connect(self) -> None:
        """Set up the sandbox and prepare the connection manager."""
        logger.debug("Connecting to MCP implementation in sandbox")

        # Start the sandbox if not already active
        if not self._sandbox.is_active:
            logger.debug("Starting sandbox...")
            await self._sandbox.start()

        # Get the host for the sandbox
        # Note: This assumes the sandbox implementation has a get_host method
        # For E2BSandbox, this is available
        host = self._sandbox.get_host(self.port)
        self.base_url = f"https://{host}".rstrip("/")

        # Append command with args
        command = f"{self.user_command} {' '.join(self.user_args)}"

        # Construct the full command with supergateway
        full_command = f'{self.supergateway_cmd_parts} \
            --base-url {self.base_url} \
            --port {self.port} \
            --cors \
            --stdio "{command}"'

        logger.debug(f"Full command: {full_command}")

        # Execute the command in the sandbox
        self.process = await self._sandbox.execute_safe(
            full_command,
            envs=self.user_env,
            timeout=1000 * 60 * 10,  # 10 minutes timeout
            background=True,
            on_stdout=self._handle_stdout,
            on_stderr=self._handle_stderr,
        )

        # Wait for the server to be ready
        await self.wait_for_server_response(self.base_url, timeout=30)
        logger.debug("Initializing connection manager...")

        # Create the SSE connection URL
        sse_url = f"{self.base_url}/sse"

        # Create and set up the connection manager
        self._connection_manager = SseConnectionManager(sse_url, self.headers, self.timeout, self.sse_read_timeout)

    async def _after_connect(self) -> None:
        """Create ClientSession and log success."""
        await super()._after_connect()
        logger.debug(f"Successfully connected to MCP implementation via HTTP/SSE in sandbox: {self.base_url}")

    async def _before_disconnect(self) -> None:
        """Clean up sandbox-specific resources before disconnection."""
        logger.debug("Cleaning up sandbox resources")

        # Stop the sandbox (which will clean up processes)
        if self._sandbox and self._sandbox.is_active:
            try:
                logger.debug("Stopping sandbox instance")
                await self._sandbox.stop()
                logger.debug("Sandbox instance stopped successfully")
            except Exception as e:
                logger.warning(f"Error stopping sandbox: {e}")

        self.process = None

        # Call the parent method to clean up MCP resources
        await super()._before_disconnect()

        # Clear any collected output
        self.stdout_lines = []
        self.stderr_lines = []
        self.base_url = None
    
    async def _cleanup_on_connect_failure(self) -> None:
        """Clean up sandbox resources on connection failure."""
        # Stop the sandbox if it was started
        if self._sandbox and self._sandbox.is_active:
            try:
                await self._sandbox.stop()
            except Exception as e:
                logger.warning(f"Error stopping sandbox during cleanup: {e}")
        
        self.process = None
        self.stdout_lines = []
        self.stderr_lines = []
        self.base_url = None
        
        # Call parent cleanup
        await super()._cleanup_on_connect_failure()

    @property
    def sandbox(self) -> BaseSandbox:
        """Get the underlying sandbox instance."""
        return self._sandbox

    @property
    def public_identifier(self) -> str:
        """Get the identifier for the connector."""
        return {"type": "sandbox", "command": self.user_command, "args": self.user_args}


================================================
FILE: anytool/grounding/backends/mcp/transport/connectors/stdio.py
================================================
"""
StdIO connector for MCP implementations.

This module provides a connector for communicating with MCP implementations
through the standard input/output streams.
"""

import sys

from mcp import ClientSession, StdioServerParameters

from anytool.utils.logging import Logger
from ..task_managers import StdioConnectionManager
from .base import MCPBaseConnector

logger = Logger.get_logger(__name__)


class StdioConnector(MCPBaseConnector):
    """Connector for MCP implementations using stdio transport.

    This connector uses the stdio transport to communicate with MCP implementations
    that are executed as child processes. It uses a connection manager to handle
    the proper lifecycle management of the stdio client.
    """

    def __init__(
        self,
        command: str = "npx",
        args: list[str] | None = None,
        env: dict[str, str] | None = None,
        errlog=None,
    ):
        """Initialize a new stdio connector.

        Args:
            command: The command to execute.
            args: Optional command line arguments.
            env: Optional environment variables.
            errlog: Stream to write error output to (defaults to filtered stderr).
                   StdioConnectionManager will wrap this to filter harmless errors.
        """
        self.command = command
        self.args = args or []  # Ensure args is never None
        
        # Ensure env is not None and add settings to suppress non-JSON output from servers
        self.env = env or {}
        # Add environment variables to encourage MCP servers to suppress non-JSON output
        # Many Node.js-based servers respect NODE_ENV=production
        if "NODE_ENV" not in self.env:
            self.env["NODE_ENV"] = "production"
        # Add flag to suppress informational messages (some servers respect this)
        if "MCP_SILENT" not in self.env:
            self.env["MCP_SILENT"] = "true"
        
        self.errlog = errlog
        
        # Create server parameters and connection manager
        # StdioConnectionManager will wrap errlog in FilteredStderrWrapper
        server_params = StdioServerParameters(command=self.command, args=self.args, env=self.env)
        connection_manager = StdioConnectionManager(server_params, self.errlog)
        super().__init__(connection_manager)

    async def _before_connect(self) -> None:
        """Log connection attempt."""
        logger.debug(f"Connecting to MCP implementation: {self.command}")

    async def _after_connect(self) -> None:
        """Create ClientSession and log success."""
        # Call parent's _after_connect to create the ClientSession
        await super()._after_connect()
        logger.debug(f"Successfully connected to MCP implementation: {self.command}")

    @property
    def public_identifier(self) -> dict[str, str]:
        return {"type": "stdio", "command&args": f"{self.command} {' '.join(self.args)}"}

================================================
FILE: anytool/grounding/backends/mcp/transport/connectors/utils.py
================================================
from typing import Any


def is_stdio_server(server_config: dict[str, Any]) -> bool:
    """Check if the server configuration is for a stdio server.

    Args:
        server_config: The server configuration section

    Returns:
        True if the server is a stdio server, False otherwise
    """
    return "command" in server_config and "args" in server_config

================================================
FILE: anytool/grounding/backends/mcp/transport/connectors/websocket.py
================================================
"""
WebSocket connector for MCP implementations.

This module provides a connector for communicating with MCP implementations
through WebSocket connections.
"""

import asyncio
import json
import uuid
from typing import Any

from mcp.types import Tool
from websockets import ClientConnection

from anytool.utils.logging import Logger
from anytool.grounding.core.transport.task_managers.base import BaseConnectionManager
from ..task_managers import WebSocketConnectionManager
from .base import MCPBaseConnector

logger = Logger.get_logger(__name__)


class WebSocketConnector(MCPBaseConnector):
    """Connector for MCP implementations using WebSocket transport.

    This connector uses WebSockets to communicate with remote MCP implementations,
    using a connection manager to handle the proper lifecycle management.
    """

    def __init__(
        self,
        url: str,
        auth_token: str | None = None,
        headers: dict[str, str] | None = None,
    ):
        """Initialize a new WebSocket connector.

        Args:
            url: The WebSocket URL to connect to.
            auth_token: Optional authentication token.
            headers: Optional additional headers.
        """
        self.url = url
        self.auth_token = auth_token
        self.headers = headers or {}
        if auth_token:
            self.headers["Authorization"] = f"Bearer {auth_token}"

        self.ws: ClientConnection | None = None
        self._receiver_task: asyncio.Task | None = None
        self.pending_requests: dict[str, asyncio.Future] = {}
        self._tools: list[Tool] | None = None
        
        # Create connection manager with actual parameters
        connection_manager = WebSocketConnectionManager(self.url, self.headers)
        super().__init__(connection_manager)
        self._connected = False

    async def _get_streams_from_connection(self):
        """WebSocket doesn't use streams, return None to skip ClientSession creation."""
        return None
    
    async def _after_connect(self) -> None:
        """Set up WebSocket-specific resources after connection.
        
        WebSocket doesn't use ClientSession, so we skip the parent's implementation
        and set up WebSocket-specific resources instead.
        """
        # Store the WebSocket connection
        self.ws = self._connection
        
        # Start the message receiver task
        self._receiver_task = asyncio.create_task(self._receive_messages(), name="websocket_receiver_task")
        
        logger.debug(f"Successfully connected to MCP implementation via WebSocket: {self.url}")

    async def _receive_messages(self) -> None:
        """Continuously receive and process messages from the WebSocket."""
        if not self.ws:
            raise RuntimeError("WebSocket is not connected")

        try:
            async for message in self.ws:
                # Parse the message
                data = json.loads(message)

                # Check if this is a response to a pending request
                request_id = data.get("id")
                if request_id and request_id in self.pending_requests:
                    future = self.pending_requests.pop(request_id)
                    if "result" in data:
                        future.set_result(data["result"])
                    elif "error" in data:
                        future.set_exception(Exception(data["error"]))

                    logger.debug(f"Received response for request {request_id}")
                else:
                    logger.debug(f"Received message: {data}")
        except Exception as e:
            logger.error(f"Error in WebSocket message receiver: {e}")
            # If the websocket connection was closed or errored,
            # reject all pending requests
            for future in self.pending_requests.values():
                if not future.done():
                    future.set_exception(e)

    async def _before_disconnect(self) -> None:
        """Clean up WebSocket-specific resources before disconnection."""
        errors = []

        # First cancel the receiver task
        if self._receiver_task and not self._receiver_task.done():
            try:
                logger.debug("Cancelling WebSocket receiver task")
                self._receiver_task.cancel()
                try:
                    await self._receiver_task
                except asyncio.CancelledError:
                    logger.debug("WebSocket receiver task cancelled successfully")
                except Exception as e:
                    logger.warning(f"Error during WebSocket receiver task cancellation: {e}")
            except Exception as e:
                error_msg = f"Error cancelling WebSocket receiver task: {e}"
                logger.warning(error_msg)
                errors.append(error_msg)
            finally:
                self._receiver_task = None

        # Reject any pending requests
        if self.pending_requests:
            logger.debug(f"Rejecting {len(self.pending_requests)} pending requests")
            for future in self.pending_requests.values():
                if not future.done():
                    future.set_exception(ConnectionError("WebSocket disconnected"))
            self.pending_requests.clear()

        # Reset WebSocket and tools
        self.ws = None
        self._tools = None

        if errors:
            logger.warning(f"Encountered {len(errors)} errors during WebSocket resource cleanup")
    
    async def _cleanup_on_connect_failure(self) -> None:
        """Clean up WebSocket resources on connection failure."""
        # Cancel receiver task if it was started
        if self._receiver_task and not self._receiver_task.done():
            try:
                self._receiver_task.cancel()
                await self._receiver_task
            except asyncio.CancelledError:
                pass
            except Exception:
                pass
            finally:
                self._receiver_task = None
        
        # Reject pending requests
        for future in self.pending_requests.values():
            if not future.done():
                future.set_exception(ConnectionError("Connection failed"))
        self.pending_requests.clear()
        
        # Call parent cleanup
        await super()._cleanup_on_connect_failure()
        self.ws = None

    async def _send_request(self, method: str, params: dict[str, Any] | None = None) -> Any:
        """Send a request and wait for a response."""
        if not self.ws:
            raise RuntimeError("WebSocket is not connected")

        # Create a request ID
        request_id = str(uuid.uuid4())

        # Create a future to receive the response
        future = asyncio.Future()
        self.pending_requests[request_id] = future

        # Send the request
        await self.ws.send(json.dumps({"id": request_id, "method": method, "params": params or {}}))

        logger.debug(f"Sent request {request_id} method: {method}")

        # Wait for the response
        try:
            return await future
        except Exception as e:
            # Remove the request from pending requests
            self.pending_requests.pop(request_id, None)
            logger.error(f"Error waiting for response to request {request_id}: {e}")
            raise

    async def initialize(self) -> dict[str, Any]:
        """Initialize the MCP session and return session information."""
        logger.debug("Initializing MCP session")
        result = await self._send_request("initialize")

        # Get available tools
        tools_result = await self.list_tools()
        self._tools = [Tool(**tool) for tool in tools_result]

        logger.debug(f"MCP session initialized with {len(self._tools)} tools")
        return result

    async def list_tools(self) -> list[dict[str, Any]]:
        """List all available tools from the MCP implementation."""
        logger.debug("Listing tools")
        result = await self._send_request("tools/list")
        return result.get("tools", [])

    @property
    def tools(self) -> list[Tool]:
        """Get the list of available tools."""
        if not self._tools:
            raise RuntimeError("MCP client is not initialized")
        return self._tools

    async def call_tool(self, name: str, arguments: dict[str, Any]) -> Any:
        """Call an MCP tool with the given arguments."""
        logger.debug(f"Calling tool '{name}' with arguments: {arguments}")
        return await self._send_request("tools/call", {"name": name, "arguments": arguments})

    async def list_resources(self) -> list[dict[str, Any]]:
        """List all available resources from the MCP implementation."""
        logger.debug("Listing resources")
        result = await self._send_request("resources/list")
        return result

    async def read_resource(self, uri: str) -> tuple[bytes, str]:
        """Read a resource by URI."""
        logger.debug(f"Reading resource: {uri}")
        result = await self._send_request("resources/read", {"uri": uri})
        return result.get("content", b""), result.get("mimeType", "")

    async def request(self, method: str, params: dict[str, Any] | None = None) -> Any:
        """Send a raw request to the MCP implementation."""
        logger.debug(f"Sending request: {method} with params: {params}")
        return await self._send_request(method, params)

    @property
    def public_identifier(self) -> str:
        """Get the identifier for the connector."""
        return {"type": "websocket", "url": self.url}


================================================
FILE: anytool/grounding/backends/mcp/transport/task_managers/__init__.py
================================================
"""
Connectors for various MCP transports.

This module provides interfaces for connecting to MCP implementations
through different transport mechanisms.
"""

from .sse import SseConnectionManager
from .stdio import StdioConnectionManager
from .streamable_http import StreamableHttpConnectionManager
from .websocket import WebSocketConnectionManager

__all__ = [
    "StdioConnectionManager",
    "WebSocketConnectionManager",
    "SseConnectionManager",
    "StreamableHttpConnectionManager",
]

================================================
FILE: anytool/grounding/backends/mcp/transport/task_managers/sse.py
================================================
"""
SSE connection management for MCP implementations.

This module provides a connection manager for SSE-based MCP connections
that ensures proper task isolation and resource cleanup.
"""

from typing import Any, Tuple
from mcp.client.sse import sse_client
from anytool.utils.logging import Logger
from anytool.grounding.core.transport.task_managers import (
    AsyncContextConnectionManager,
)

logger = Logger.get_logger(__name__)


class SseConnectionManager(AsyncContextConnectionManager[Tuple[Any, Any], ...]):
    """Connection manager for SSE-based MCP connections.

    This class handles the proper task isolation for sse_client context managers
    to prevent the "cancel scope in different task" error. It runs the sse_client
    in a dedicated task and manages its lifecycle.
    """

    def __init__(
        self,
        url: str,
        headers: dict[str, str] | None = None,
        timeout: float = 5,
        sse_read_timeout: float = 60 * 5,
    ):
        """Initialize a new SSE connection manager.

        Args:
            url: The SSE endpoint URL
            headers: Optional HTTP headers
            timeout: Timeout for HTTP operations in seconds
            sse_read_timeout: Timeout for SSE read operations in seconds
        """
        super().__init__(
            sse_client,
            url=url,
            headers=headers or {},
            timeout=timeout,
            sse_read_timeout=sse_read_timeout,
        )
        self.url = url
        self.headers = headers or {}
        logger.debug("SseConnectionManager init url=%s", url)


================================================
FILE: anytool/grounding/backends/mcp/transport/task_managers/stdio.py
================================================
"""
StdIO connection management for MCP implementations.

This module provides a connection manager for stdio-based MCP connections
that ensures proper task isolation and resource cleanup.
"""

import asyncio
import io
import logging
import sys
from typing import Any, TextIO, Tuple

from mcp import StdioServerParameters
from mcp.client.stdio import stdio_client

from anytool.utils.logging import Logger
from anytool.grounding.core.transport.task_managers import (
    AsyncContextConnectionManager,
)

logger = Logger.get_logger(__name__)


class FilteredStderrWrapper(io.TextIOBase):
    """Wrapper for stderr that filters out harmless MCP server shutdown messages.
    
    This wrapper suppresses error messages from MCP servers during shutdown
    that are harmless but create noise in the logs.
    """
    
    def __init__(self, wrapped_stream: TextIO):
        """Initialize the wrapper.
        
        Args:
            wrapped_stream: The underlying stderr stream
        """
        self._stream = wrapped_stream
        self._buffer = ""
        self._in_traceback = False
        self._traceback_lines = []
        self._in_rich_traceback = False  # Track rich-formatted tracebacks
        self._rich_traceback_needs_error_line = False  # After ╰, need one more line
    
    def write(self, s: str) -> int:
        """Write to stderr, filtering out harmless error messages.
        
        Args:
            s: The string to write
            
        Returns:
            Number of characters written
        """
        # Buffer the input for line-by-line processing
        self._buffer += s
        
        # Process complete lines
        while '\n' in self._buffer:
            line, self._buffer = self._buffer.split('\n', 1)
            self._process_line(line + '\n')
        
        return len(s)
    
    def _process_line(self, line: str):
        """Process a single line and decide whether to output it."""
        # Detect start of traceback or exception group
        if line.lstrip().startswith(("╭", "┏")):
            self._in_traceback = True
            self._in_rich_traceback = True
            self._rich_traceback_needs_error_line = False 
            self._traceback_lines = [line]
            return

        if (line.strip().startswith('Traceback (most recent call last)') or
            line.strip().startswith('Exception Group Traceback (most recent call last)') or
            line.strip().startswith('BaseExceptionGroup:') or
            line.strip().startswith('ExceptionGroup:')):
            self._in_traceback = True
            self._traceback_lines = [line]
            self._in_rich_traceback = False
            self._rich_traceback_needs_error_line = False
            return
        
        # Collect traceback lines
        if self._in_traceback:
            self._traceback_lines.append(line)

            # If not in rich traceback mode, but current line contains rich border characters, switch to rich mode
            if not self._in_rich_traceback and any(ch in line for ch in ("╭", "┏")):
                self._in_rich_traceback = True
            
            # Check for end of rich-formatted traceback (line with ╰)
            if self._in_rich_traceback and '╰' in line:
                # Rich traceback box ended, but we need to collect the error line that follows
                self._rich_traceback_needs_error_line = True
                return
            
            # If we just ended a rich traceback, this should be the error line
            if self._rich_traceback_needs_error_line:
                # Now we have the complete rich traceback including the error line
                if self._is_harmless_error():
                    logger.debug(f"Suppressed harmless rich-formatted MCP server error")
                else:
                    # Output the full traceback
                    for tb_line in self._traceback_lines:
                        self._stream.write(tb_line)
                    self._stream.flush()
                
                # Reset traceback collection
                self._in_traceback = False
                self._in_rich_traceback = False
                self._rich_traceback_needs_error_line = False
                self._traceback_lines = []
                return
            
            # For exception groups, we need to collect more lines
            # Check if we've collected enough to determine if it's harmless
            if len(self._traceback_lines) > 5 and not self._in_rich_traceback:
                # Check periodically if this is a harmless error
                if self._is_harmless_error():
                    # Suppress this traceback
                    logger.debug(f"Suppressed harmless MCP server shutdown error")
                    self._in_traceback = False
                    self._in_rich_traceback = False
                    self._rich_traceback_needs_error_line = False
                    self._traceback_lines = []
                    return
            
            # Check if this is the error line (last line of regular traceback)
            # But not for rich tracebacks which use box characters
            # A final traceback line is typically unindented and contains "ErrorType: message"
            if not self._in_rich_traceback and line and not line[0].isspace() and ':' in line:
                # Check if this is a harmless cleanup error
                if self._is_harmless_error():
                    # Suppress this traceback
                    logger.debug(f"Suppressed harmless MCP server shutdown error")
                else:
                    # Output the full traceback
                    for tb_line in self._traceback_lines:
                        self._stream.write(tb_line)
                    self._stream.flush()
                
                # Reset traceback collection
                self._in_traceback = False
                self._in_rich_traceback = False
                self._rich_traceback_needs_error_line = False
                self._traceback_lines = []
                return
            
            # If we've collected too many lines without finding the end, output and reset
            if len(self._traceback_lines) > 100:
                # Output what we have
                for tb_line in self._traceback_lines:
                    self._stream.write(tb_line)
                self._stream.flush()
                self._in_traceback = False
                self._in_rich_traceback = False
                self._rich_traceback_needs_error_line = False
                self._traceback_lines = []
                return
        else:
            # Normal line - check if it's a harmless error log
            line_lower = line.lower()
            harmless_log_patterns = [
                'an error occurred during closing of asynchronous generator',
                'asyncgen:',
                'service stopped.',
            ]
            
            # Check if this is a harmless log line
            is_harmless_log = any(pattern in line_lower for pattern in harmless_log_patterns)
            
            if not is_harmless_log:
                # Output normal lines
                self._stream.write(line)
                self._stream.flush()
            else:
                # Suppress harmless log messages
                logger.debug(f"Suppressed harmless log line: {line.strip()}")
    
    def _is_harmless_error(self) -> bool:
        """Check if the collected traceback is a harmless error."""
        traceback_text = ''.join(self._traceback_lines).lower()
        
        # List of harmless error patterns (case-insensitive)
        harmless_patterns = [
            'valueerror: i/o operation on closed file',
            'oserror: [errno 9] bad file descriptor',
            'brokenpipeerror',
            'runtimeerror: attempted to exit cancel scope in a different task',
            'baseexceptiongroup: unhandled errors in a taskgroup',
            'generatorexit',
            'an error occurred during closing of asynchronous generator',
        ]
        
        # Check if any pattern matches and it's related to shutdown
        for pattern in harmless_patterns:
            if pattern in traceback_text:
                # Also check if it's related to shutdown/cleanup
                shutdown_keywords = ['finally:', 'stopped', 'cleanup', '__exit__', '__aexit__', 'stdio_client', 'service stopped']
                if any(keyword in traceback_text for keyword in shutdown_keywords):
                    return True
        
        return False
    
    def flush(self):
        """Flush any remaining buffered content and the underlying stream."""
        if self._buffer:
            self._process_line(self._buffer)
            self._buffer = ""
        
        if self._traceback_lines:
            # Flush incomplete traceback
            for line in self._traceback_lines:
                self._stream.write(line)
            self._traceback_lines = []
        
        self._stream.flush()
    
    def fileno(self) -> int:
        """Return the file descriptor of the underlying stream."""
        if hasattr(self._stream, 'fileno'):
            return self._stream.fileno()
        return -1
    
    @property
    def closed(self) -> bool:
        """Check if the stream is closed."""
        return self._stream.closed


class StdioConnectionManager(AsyncContextConnectionManager[Tuple[Any, Any], ...]):
    """Connection manager for stdio-based MCP connections.

    This class handles the proper task isolation for stdio_client context managers
    to prevent the "cancel scope in different task" error. It runs the stdio_client
    in a dedicated task and manages its lifecycle.
    
    Note: Error handling during cleanup (e.g., I/O operations on closed files) is 
    handled by the parent AsyncContextConnectionManager class in _close_connection().
    """

    def __init__(
        self,
        server_params: StdioServerParameters,
        errlog: TextIO | None = None,
    ):
        """Initialize a new stdio connection manager.

        Args:
            server_params: The parameters for the stdio server
            errlog: The error log stream (defaults to filtered sys.stderr)
        """
        # Wrap stderr to filter out harmless shutdown errors
        if errlog is None:
            errlog = FilteredStderrWrapper(sys.stderr)
        elif not isinstance(errlog, FilteredStderrWrapper):
            errlog = FilteredStderrWrapper(errlog)
        
        super().__init__(stdio_client, server_params, errlog)
        self.server_params = server_params
        self.errlog = errlog
        self._mcp_logger_filter = None
        self._stop_event: asyncio.Event | None = None  # Signal for background task
        self._runner_task: asyncio.Task | None = None  # Background runner task
        self._conn_future: asyncio.Future | None = None  # Future for the established connection
        logger.debug("StdioConnectionManager init with params=%s", server_params)
    
    async def _establish_connection(self) -> Tuple[Any, Any]:
        """Establish connection in a dedicated task to avoid cancel-scope issues."""
        # Suppress MCP SDK's noisy JSON parse errors **before** starting the runner
        self._suppress_mcp_json_errors()

        # Lazily create primitives the first time we connect
        if self._stop_event is None:
            self._stop_event = asyncio.Event()
        if self._conn_future is None or self._conn_future.done():
            self._conn_future = asyncio.get_event_loop().create_future()

        async def _runner():  # Runs in its *own* task (same task for enter/exit)
            try:
                async with stdio_client(self.server_params, self.errlog) as conn:
                    # Pass connection back to the caller
                    if not self._conn_future.done():
                        self._conn_future.set_result(conn)
                    # Wait until close is requested
                    await self._stop_event.wait()
            finally:
                # Make sure the future is set even on error so awaiters don’t hang
                if not self._conn_future.done():
                    self._conn_future.set_exception(RuntimeError("Connection failed"))

        # Start background runner if not already active
        if self._runner_task is None or self._runner_task.done():
            self._runner_task = asyncio.create_task(_runner(), name="stdio_client_runner")

        # Wait for the connection tuple from the future
        conn: Tuple[Any, Any] = await self._conn_future  # type: ignore
        return conn

    async def _close_connection(self) -> None:
        """Request the background task to exit its context and wait for it."""
        try:
            # Restore original logging configuration *before* shutdown
            self._restore_mcp_logging()

            # Signal the runner to exit its context manager
            if self._stop_event and not self._stop_event.is_set():
                self._stop_event.set()

            # Await the runner task so that __aexit__ executes in *its* task
            if self._runner_task:
                try:
                    await asyncio.wait_for(self._runner_task, timeout=2.0)
                except asyncio.TimeoutError:
                    logger.warning("Timeout while waiting for stdio_client to shut down")
        finally:
            # Clean up helpers so next connect() creates new ones
            self._runner_task = None
            self._stop_event = None
            self._conn_future = None
    
    def _suppress_mcp_json_errors(self):
        """Suppress MCP SDK's JSON parsing error logs.
        
        The MCP SDK logs errors when it receives non-JSON messages from servers.
        These are harmless (the SDK continues working), so we filter them out.
        """
        mcp_logger = logging.getLogger("mcp.client.stdio")
        
        class JSONErrorFilter(logging.Filter):
            """Filter out JSON parsing errors from MCP SDK."""
            def filter(self, record):
                # Suppress "Failed to parse JSONRPC message" errors
                if "Failed to parse JSONRPC message" in str(record.msg):
                    return False
                return True
        
        self._mcp_logger_filter = JSONErrorFilter()
        mcp_logger.addFilter(self._mcp_logger_filter)
    
    def _restore_mcp_logging(self):
        """Restore MCP SDK logging to normal."""
        if self._mcp_logger_filter:
            mcp_logger = logging.getLogger("mcp.client.stdio")
            mcp_logger.removeFilter(self._mcp_logger_filter)
            self._mcp_logger_filter = None

if not isinstance(sys.stderr, FilteredStderrWrapper):
    sys.stderr = FilteredStderrWrapper(sys.stderr)
    logger.debug("Applied global FilteredStderrWrapper to sys.stderr")

================================================
FILE: anytool/grounding/backends/mcp/transport/task_managers/streamable_http.py
================================================
"""
Streamable HTTP connection management for MCP implementations.

This module provides a connection manager for streamable HTTP-based MCP connections
that ensures proper task isolation and resource cleanup.
"""

from datetime import timedelta
from typing import Any, Tuple
from contextlib import asynccontextmanager

from mcp.client.streamable_http import streamablehttp_client
from anytool.utils.logging import Logger
from anytool.grounding.core.transport.task_managers import (
    AsyncContextConnectionManager,
)

logger = Logger.get_logger(__name__)


def _make_shim():
    """
    Create a shim that wraps streamablehttp_client with improved error handling.
    """
    @asynccontextmanager
    async def _shim(**kw):
        client_streams = None
        ctx_manager = None
        
        try:
            # Enter the context - this may raise ExceptionGroup during concurrent init
            ctx_manager = streamablehttp_client(**kw)
            try:
                r, w, _sid_cb = await ctx_manager.__aenter__()
                client_streams = (r, w)
            except Exception as conn_error:
                # Handle connection errors during __aenter__
                error_msg = str(conn_error).lower()
                if "unhandled errors in a taskgroup" in error_msg:
                    logger.debug(f"TaskGroup race condition during connection: {type(conn_error).__name__}")
                    # Clean up and re-raise to trigger retry
                    if ctx_manager:
                        try:
                            await ctx_manager.__aexit__(None, None, None)
                        except Exception:
                            pass  # Ignore cleanup errors
                    raise
                else:
                    # Other connection errors - log and re-raise
                    logger.warning(f"Connection error: {conn_error}")
                    raise
            
            # Yield to caller
            yield client_streams
            
        except GeneratorExit:
            # Normal generator exit - this happens during cleanup
            logger.debug("StreamableHTTP generator exit (normal cleanup)")
            
        finally:
            # Always try to exit the context manager
            if ctx_manager is not None:
                try:
                    await ctx_manager.__aexit__(None, None, None)
                except (GeneratorExit, RuntimeError, OSError, Exception) as e:
                    # Cleanup errors are expected during concurrent shutdown
                    # Log at debug level and suppress
                    error_type = type(e).__name__
                    if "ExceptionGroup" in error_type or "TaskGroup" in str(e):
                        logger.debug(f"Benign TaskGroup cleanup error: {error_type}")
                    else:
                        logger.debug(f"Benign cleanup error: {error_type}")
                    
    return _shim


class StreamableHttpConnectionManager(
    AsyncContextConnectionManager[Tuple[Any, Any], ...]
):
    """
    MCP Streamable-HTTP connection manager based on the generic
    AsyncContextConnectionManager.  Extra session-id callback returned by the
    SDK is discarded by the shim above.
    """

    def __init__(
        self,
        url: str,
        headers: dict[str, str] | None = None,
        timeout: float = 5,
        read_timeout: float = 60 * 5,
    ):
        shim = _make_shim()              
        super().__init__(
            shim,
            url=url,
            headers=headers or {},
            timeout=timedelta(seconds=timeout),
            sse_read_timeout=timedelta(seconds=read_timeout),
        )
        self.url = url
        self.headers = headers or {}
        logger.debug("StreamableHttpConnectionManager init url=%s", url)

================================================
FILE: anytool/grounding/backends/mcp/transport/task_managers/websocket.py
================================================
"""
WebSocket connection management for MCP implementations.

This module provides a connection manager for WebSocket-based MCP connections.
"""

from typing import Any, Tuple
from mcp.client.websocket import websocket_client
from anytool.utils.logging import Logger
from anytool.grounding.core.transport.task_managers import (
    AsyncContextConnectionManager,
)

logger = Logger.get_logger(__name__)

class WebSocketConnectionManager(
    AsyncContextConnectionManager[Tuple[Any, Any], ...]
):

    def __init__(self, url: str, headers: dict[str, str] | None = None):
        # Note: The current MCP websocket_client implementation doesn't support headers
        # If headers need to be passed, this would need to be updated when MCP supports it
        super().__init__(websocket_client, url)
        self.url = url
        self.headers = headers or {}
        logger.debug("WebSocketConnectionManager init url=%s", url)

================================================
FILE: anytool/grounding/backends/shell/__init__.py
================================================
from .provider import ShellProvider
from .session import ShellSession
from .transport.connector import ShellConnector
from .transport.local_connector import LocalShellConnector

__all__ = [
    "ShellProvider",
    "ShellSession",
    "ShellConnector",
    "LocalShellConnector",
]

================================================
FILE: anytool/grounding/backends/shell/provider.py
================================================
from anytool.grounding.core.provider import Provider
from anytool.grounding.core.types import BackendType, SessionConfig
from .session import ShellSession
from .transport.connector import ShellConnector
from .transport.local_connector import LocalShellConnector
from anytool.config import get_config
from anytool.config.utils import get_config_value
from anytool.platform.config import get_local_server_config
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class ShellProvider(Provider[ShellSession]):
    
    DEFAULT_SID = BackendType.SHELL.value
    
    def __init__(self, config: dict | None = None):
        super().__init__(BackendType.SHELL, config)
        # Note: _setup_security_policy() is already called by parent class __init__

    def _setup_security_policy(self, config: dict | None = None):
        security_policy = get_config().get_security_policy(self.backend_type.value)
    
        if config:
            security_config = get_config_value(config, "security", None)
            if security_config:
                for key, value in security_config.items():
                    if hasattr(security_policy, key):
                        setattr(security_policy, key, value)
            
            sandbox_enabled = get_config_value(config, "sandbox_enabled", None)
            if sandbox_enabled is not None:
                security_policy.sandbox_enabled = sandbox_enabled
        
        logger.info(f"Shell security policy: allow_shell_commands={security_policy.allow_shell_commands}, "
                   f"blocked_commands={security_policy.blocked_commands}")
        
        self.security_manager.set_backend_policy(BackendType.SHELL, security_policy)

    async def initialize(self) -> None:
        if not self.is_initialized:
            await self.create_session(SessionConfig(
                session_name=self.DEFAULT_SID,
                backend_type=BackendType.SHELL,
                connection_params={}
            ))
            self.is_initialized = True

    async def create_session(self, session_config: SessionConfig) -> ShellSession:
        sid = self.DEFAULT_SID
        if sid in self._sessions:
            return self._sessions[sid]
        
        # Load shell backend configuration
        shell_config = get_config().get_backend_config("shell")
        
        # Determine execution mode: "local" or "server"
        mode = getattr(shell_config, "mode", "local")
        
        if mode == "local":
            # ---------- LOCAL MODE ----------
            # Execute scripts directly via subprocess, no server required.
            logger.info("Shell backend using LOCAL mode (no server required)")
            connector = LocalShellConnector(
                retry_times=shell_config.max_retries,
                retry_interval=shell_config.retry_interval,
                security_manager=self.security_manager,
            )
        else:
            # ---------- SERVER MODE ----------
            # Connect to a running local_server via HTTP.
            logger.info("Shell backend using SERVER mode (connecting to local_server)")
            local_server_config = get_local_server_config()
            default_port = local_server_config.get('port', shell_config.default_port)
            
            connector = ShellConnector(
                vm_ip=get_config_value(session_config.connection_params, "vm_ip", local_server_config['host']),
                port=get_config_value(session_config.connection_params, "port", default_port),
                retry_times=shell_config.max_retries,
                retry_interval=shell_config.retry_interval,
                security_manager=self.security_manager,
            )
        
        # Create session with config parameters
        session = ShellSession(
            connector=connector,
            session_id=sid,
            security_manager=self.security_manager,
            default_working_dir=shell_config.working_dir,
            default_env=shell_config.env,
            default_conda_env=shell_config.conda_env
        )
        
        await session.initialize()
        self._sessions[sid] = session
        return session

    async def close_session(self, session_id: str) -> None:
        sess = self._sessions.pop(session_id, None)
        if sess:
            await sess.disconnect()

================================================
FILE: anytool/grounding/backends/shell/session.py
================================================
import re
from typing import Union
from anytool.grounding.core.types import BackendType
from anytool.grounding.core.session import BaseSession
from anytool.grounding.backends.shell.transport.connector import ShellConnector
from anytool.grounding.backends.shell.transport.local_connector import LocalShellConnector
from anytool.grounding.core.tool import BaseTool
from anytool.grounding.core.security.policies import SecurityPolicyManager
from anytool.llm import LLMClient
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class ShellSession(BaseSession):
    backend_type = BackendType.SHELL

    def __init__(
        self, 
        connector: Union[ShellConnector, LocalShellConnector], 
        *, 
        session_id: str, 
        security_manager: SecurityPolicyManager = None,
        default_working_dir: str = None,
        default_env: dict = None,
        default_conda_env: str = None
    ):
        super().__init__(connector=connector, session_id=session_id,
                         backend_type=BackendType.SHELL)
        self.security_manager = security_manager
        self.default_working_dir = default_working_dir
        self.default_env = default_env or {}
        self.default_conda_env = default_conda_env

    async def initialize(self):
        self.tools = [ShellAgentTool(
            self, 
            security_manager=self.security_manager,
            default_working_dir=self.default_working_dir,
            default_env=self.default_env,
            default_conda_env=self.default_conda_env
        )]
        return {"tools": [t.name for t in self.tools]}

class PythonScriptTool(BaseTool):
    _name = "_python_exec"
    _description = "Internal helper: run python code."

    def __init__(self, session: "ShellSession", default_working_dir: str = None, default_env: dict = None, default_conda_env: str = None):
        self._session = session
        self._default_working_dir = default_working_dir
        self._default_env = default_env or {}
        self._default_conda_env = default_conda_env
        super().__init__()

    async def _arun(self, code: str, timeout: int = 90, working_dir: str | None = None, env: dict | None = None, conda_env: str | None = None):
        # Use provided params, or fall back to session defaults
        effective_working_dir = working_dir or self._default_working_dir
        effective_env = {**self._default_env, **(env or {})}  # Merge default and provided env
        effective_conda_env = conda_env or self._default_conda_env
        return await self._session.connector.run_python_script(
            code, 
            timeout=timeout, 
            working_dir=effective_working_dir,
            env=effective_env if effective_env else None,
            conda_env=effective_conda_env
        )

class BashScriptTool(BaseTool):
    _name = "_bash_exec"
    _description = "Internal helper: run bash script."

    def __init__(self, session: "ShellSession", default_working_dir: str = None, default_env: dict = None, default_conda_env: str = None):
        self._session = session
        self._default_working_dir = default_working_dir
        self._default_env = default_env or {}
        self._default_conda_env = default_conda_env
        super().__init__()

    async def _arun(self, script: str, timeout: int = 30, working_dir: str | None = None, env: dict | None = None, conda_env: str | None = None):
        # Use provided params, or fall back to session defaults
        effective_working_dir = working_dir or self._default_working_dir
        effective_env = {**self._default_env, **(env or {})}  # Merge default and provided env
        effective_conda_env = conda_env or self._default_conda_env
        return await self._session.connector.run_bash_script(
            script, 
            timeout=timeout, 
            working_dir=effective_working_dir,
            env=effective_env if effective_env else None,
            conda_env=effective_conda_env
        )

class ShellAgentTool(BaseTool):
    _name = "shell_agent"
    _description = """Execute commands or scripts directly in the computer's terminal. 
This tool uses an internal agent that will write and run Python or Bash code to accomplish tasks or inspect the current system state. The internal agent will automatically retry and fix errors when possible.

Use this tool when you need to:
- Execute any terminal-based task that requires code
- Check the current environment (files, processes, system info)
- Run calculations or data processing
- Install packages or modify system settings

The tool will keep trying until the task succeeds or determines it cannot be completed."""
    
    backend_type = BackendType.SHELL
    _CODE_RGX = re.compile(
        r"```(?P<lang>python|py|bash|shell|sh)[^\n]*\n(?P<code>.*?)```",
        re.S | re.I,
    )

    def __init__(
        self, 
        session: "ShellSession", 
        client_password: str = "", 
        max_steps: int = 5,
        security_manager: SecurityPolicyManager = None,
        default_working_dir: str = None,
        default_env: dict = None,
        default_conda_env: str = None
    ):
        self._session = session
        self._llm = LLMClient()
        self.client_password = client_password
        self.max_steps = max_steps
        self._system_info = None
        self.security_manager = security_manager
        self._default_working_dir = default_working_dir
        self._default_env = default_env or {}
        self._default_conda_env = default_conda_env
        self._py_tool = PythonScriptTool(session, default_working_dir=default_working_dir, default_env=default_env, default_conda_env=default_conda_env)
        self._bash_tool = BashScriptTool(session, default_working_dir=default_working_dir, default_env=default_env, default_conda_env=default_conda_env)
        super().__init__()

    async def _get_system_info(self):
        """
        Get system information for shell agent.
        
        First tries to get comprehensive info from local server's /platform endpoint.
        Falls back to simple bash commands if that fails.
        
        Returns:
            Dict with at least 'platform' and 'username' keys
        """
        if self._system_info is None:
            try:
                # Try to get system info from server via HTTP API
                try:
                    from anytool.platform import SystemInfoClient
                    
                    # Get base_url from connector
                    base_url = self._session.connector.base_url
                    
                    # Create temporary client
                    async with SystemInfoClient(base_url=base_url, timeout=5) as client:
                        info = await client.get_system_info(use_cache=False)
                        
                        if info:
                            # Use comprehensive info from server
                            self._system_info = {
                                "platform": info.get("system", "Linux"),
                                "username": info.get("username", "user"),
                                "machine": info.get("machine"),
                                "release": info.get("release"),
                                "full_info": info  # Keep full info for reference
                            }
                            logger.debug(f"Got system info from server: {info.get('system')}")
                            return self._system_info
                
                except ImportError:
                    logger.debug("SystemInfoClient not available, using bash commands")
                
                # Fallback: use simple bash commands (original method)
                platform_result = await self._session.connector.run_bash_script("uname -s", timeout=5)
                username_result = await self._session.connector.run_bash_script("whoami", timeout=5)
                
                platform = self._extract_output(platform_result).strip()
                username = self._extract_output(username_result).strip()
                
                self._system_info = {
                    "platform": platform,
                    "username": username
                }
                logger.debug(f"Got system info from bash: {platform}")
            
            except Exception as e:
                logger.warning(f"Failed to get system info: {e}, using defaults")
                self._system_info = {"platform": "Linux", "username": "user"}
        
        return self._system_info

    async def _arun(self, task: str, timeout: int = 300):
        from anytool.grounding.core.types import ToolResult, ToolStatus
        
        sys_info = await self._get_system_info()
        conversation_history = []
        iteration = 0
        last_error = None
        
        # record the code history
        code_history = []
        
        # Build environment context
        env_context = []
        if self._default_working_dir:
            env_context.append(f"Working Directory: {self._default_working_dir}")
        if self._default_conda_env:
            env_context.append(f"Conda Environment: {self._default_conda_env}")
        if self._default_env:
            env_vars = ", ".join([f"{k}={v}" for k, v in list(self._default_env.items())[:3]])
            if len(self._default_env) > 3:
                env_vars += f", ... (+{len(self._default_env)-3} more)"
            env_context.append(f"Custom Environment Variables: {env_vars}")
        
        env_section = "\n".join([f"# {ctx}" for ctx in env_context]) if env_context else ""
        
        SHELL_AGENT_SYSTEM_PROMPT = f"""You are an expert system administrator and programmer focused on executing tasks efficiently.

# System: {sys_info["platform"]}, User: {sys_info["username"]}
{env_section}

# Your task: {task}

# IMPORTANT: You MUST provide exactly ONE code block in EVERY response
# Either ```bash or ```python - never respond without code

# Available actions:
1. Execute bash commands: ```bash <commands>```
2. Write Python code: ```python <code>```

# Rules:
- ALWAYS include a code block in your response
- Write EXACTLY ONE code block per response
- If you need to understand the current environment, start with bash commands like: pwd, ls, ps, df, etc.
- If you get errors, analyze and fix them in the next iteration
- For sudo: use 'echo {self.client_password} | sudo -S <command>'
- The environment (working directory, conda env) is managed automatically

# CRITICAL: Avoid quote escaping errors in bash:
- For complex string operations (JSON, multi-line text, special chars): ALWAYS use Python with heredoc
- Good: ```python <your code>```
- Bad: bash commands with nested quotes like: echo "$(cat 'file' | grep "pattern")"
- When reading/writing files with complex content: prefer Python over bash
- When processing JSON: ALWAYS use Python's json module, never bash string manipulation

# Before executing, check if task output already exists:
- Use 'ls -la <directory>' to check for existing files
- If files exist, read and verify them first before recreating
- Avoid redundant work - reuse existing valid outputs

# Task completion marking:
When you believe the task is COMPLETED, end your response with:
[TASK_COMPLETED: brief explanation of what was accomplished]

When you encounter an UNRECOVERABLE error that you cannot fix, end your response with:
[TASK_FAILED: brief explanation of why it cannot be completed]"""

        conversation_history.append({"role": "system", "content": SHELL_AGENT_SYSTEM_PROMPT})
        
        no_code_counter = 0
        final_message = ""
        
        while iteration < self.max_steps:
            iteration += 1
            
            logger.info(f"[ShellAgent] Step {iteration}/{self.max_steps}: Processing task")
            
            try:           
                messages_text = LLMClient.format_messages_to_text(conversation_history)
                response = await self._llm.complete(messages_text)

                assistant_content = response["message"]["content"]
                logger.debug(f"[ShellAgent] Step {iteration} LLM response: {assistant_content[:200]}...")

                # extract and execute the code, and track the code block
                code_info, execution_result = await self._execute_code_from_response(assistant_content)
                if code_info:
                    code_history.append(code_info)
                
                logger.info(f"[ShellAgent] Step {iteration} execution result: {execution_result[:100]}...")
                if execution_result == "ERROR: No valid code block found":
                    no_code_counter += 1
                    if no_code_counter >= 3:
                        final_message = f"Task failed after {iteration} steps: LLM failed to provide code blocks repeatedly"
                        return ToolResult(
                            status=ToolStatus.ERROR,
                            content=final_message,
                            metadata={"tool": self._name, "code_history": code_history}
                        )
                else:
                    no_code_counter = 0
                
                completion_status = self._check_task_status(assistant_content, execution_result, last_error)
                
                if completion_status["completed"]:
                    content_parts = [f"Task completed successfully after {iteration} steps"]
                    content_parts.append(f"\n{'='*60}")
                    content_parts.append(f"\nFinal Result:")
                    content_parts.append(execution_result)
                    
                    if len(code_history) > 1:
                        content_parts.append(f"\n{'='*60}")
                        content_parts.append(f"\nExecution Summary ({len(code_history)} steps):")
                        for i, code_info in enumerate(code_history, 1):
                            lang = code_info.get("language", "unknown")
                            output = code_info.get("output", "")
                            output_preview = output[:200].replace('\n', ' ')
                            if len(output) > 200:
                                output_preview += "..."
                            content_parts.append(f"\n  Step {i} [{lang}]: {output_preview}")
                    
                    content_parts.append(f"\n{'='*60}")
                    content_parts.append(f"\nSummary: {completion_status['reason']}")
                    
                    final_message = "\n".join(content_parts)
                    return ToolResult(
                        status=ToolStatus.SUCCESS,
                        content=final_message,
                        metadata={"tool": self._name, "code_history": code_history}
                    )
                elif completion_status["failed"]:
                    final_message = f"Task failed after {iteration} steps: {completion_status['reason']}\nLast result: {execution_result}"
                    return ToolResult(
                        status=ToolStatus.ERROR,
                        content=final_message,
                        metadata={"tool": self._name, "code_history": code_history}
                    )

                feedback = self._generate_feedback(execution_result, iteration, last_error)
                
                conversation_history.extend([
                    {"role": "assistant", "content": assistant_content},
                    {"role": "user", "content": feedback}
                ])
                
                last_error = execution_result if "ERROR" in execution_result else None
                
            except Exception as e:
                final_message = f"Tool execution failed at step {iteration}: {str(e)}"
                return ToolResult(
                    status=ToolStatus.ERROR,
                    content=final_message,
                    metadata={"tool": self._name, "code_history": code_history}
                )
        
        final_message = f"Reached maximum steps ({self.max_steps}). Task may be too complex or impossible."
        return ToolResult(
            status=ToolStatus.ERROR,
            content=final_message,
            metadata={"tool": self._name, "code_history": code_history}
        )

    async def _execute_code_from_response(self, response: str):
        """
        execute the code and track the code block
        
        Returns:
            Tuple[Optional[Dict], str]: (code_info, execution_result)
            - code_info: {"lang": "python/bash", "code": "...", "status": "success/error"}
            - execution_result: the execution result string
        """
        matches = list(self._CODE_RGX.finditer(response))
        if not matches:
            return None, "ERROR: No valid code block found"
        
        lang, code = matches[0]["lang"].lower(), matches[0]["code"].strip()
        
        # standardize the language name
        lang_normalized = "python" if lang in ["python", "py"] else "bash"
        
        code_info = {
            "lang": lang_normalized,
            "code": code,
        }

        # Security check is only done at the Connector layer to avoid duplicate prompts
        
        try:
            if lang in ["python", "py"]:
                helper = self._py_tool
                result = await helper._arun(code)
            elif lang in ["bash", "shell", "sh"]:
                helper = self._bash_tool
                result = await helper._arun(code)
            else:
                execution_result = f"ERROR: Unsupported language: {lang}"
                code_info["status"] = "error"
                return code_info, execution_result
            
            execution_result = self._extract_output(result)
            code_info["status"] = "success" if "ERROR" not in execution_result else "error"
            return code_info, execution_result
            
        except Exception as e:
            execution_result = f"EXECUTION ERROR: {str(e)}"
            code_info["status"] = "error"
            return code_info, execution_result

    def _generate_feedback(self, result: str, iteration: int, last_error: str) -> str:
        feedback = f"Step {iteration} result:\n{result}\n\n"
        
        if "ERROR" in result:
            if last_error and last_error == result:
                feedback += "Same error as previous step. Try a different approach.\n"
            else:
                feedback += "Error occurred. Analyze the error and fix it.\n"
        else:
            feedback += "Execution successful. Continue to next step if needed.\n"
        
        feedback += "\nWhat's your next action? (Remember: provide exactly ONE code block)"
        return feedback

    def _extract_output(self, result):
        if isinstance(result, dict):
            # Check for execution errors
            stderr = result.get("error") or result.get("stderr") or ""
            returncode = result.get("returncode", 0)
            stdout = result.get("content") or result.get("output") or result.get("stdout") or ""
            
            # If there's a non-zero return code or stderr with actual errors, report it
            if returncode != 0 or (stderr and len(stderr.strip()) > 0):
                error_msg = f"EXECUTION ERROR (exit code {returncode}):\n"
                if stderr:
                    error_msg += f"stderr: {stderr}\n"
                if stdout:
                    error_msg += f"stdout: {stdout}"
                return error_msg
            
            return stdout or str(result)
        return str(result)

    def _check_task_status(self, response: str, execution_result: str, last_error: str) -> dict:
        if "[TASK_COMPLETED:" in response:
            reason = response.split("[TASK_COMPLETED:")[1].split("]")[0].strip()
            return {"completed": True, "failed": False, "reason": reason}

        if "[TASK_FAILED:" in response:
            reason = response.split("[TASK_FAILED:")[1].split("]")[0].strip()
            return {"completed": False, "failed": True, "reason": reason}

        # Extended error pattern detection
        error_patterns = [
            "ERROR:",
            "EXECUTION ERROR:",
            "CommandNotFoundError",
            "Traceback (most recent call last)",
            "Exception:",
            "PermissionError",
            "FileNotFoundError",
            "SyntaxError:",
            "ImportError:",
            "ModuleNotFoundError",
            "No such file or directory",
            "command not found",
        ]
        
        has_error = any(pattern in execution_result for pattern in error_patterns)
        
        if has_error:
            if last_error and last_error == execution_result:
                return {"completed": False, "failed": True, "reason": "Same error repeated - unable to resolve"}
            return {"completed": False, "failed": False, "reason": "Execution error occurred"}

        return {"completed": False, "failed": False, "reason": "Task in progress"}

================================================
FILE: anytool/grounding/backends/shell/transport/connector.py
================================================
import asyncio
from typing import Any, Optional, Dict

from anytool.grounding.core.transport.connectors import AioHttpConnector
from anytool.grounding.core.security import SecurityPolicyManager
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class ShellConnector(AioHttpConnector):
    """
    Shell backend HTTP connector
    Basic routes:
      POST /run_python      {"code": str}
      POST /run_bash_script {"script": str, "timeout": int, "working_dir": str | None}
    """

    def __init__(
        self,
        vm_ip: str,
        port: int = 5000,
        *,
        retry_times: int = 3,
        retry_interval: float = 5,
        security_manager: "SecurityPolicyManager | None" = None,
    ) -> None:
        base_url = f"http://{vm_ip}:{port}"
        super().__init__(base_url)
        self.retry_times = retry_times
        self.retry_interval = retry_interval
        self._security_manager = security_manager

    async def _retry_invoke(
        self, 
        name: str, 
        payload: Dict[str, Any], 
        script_timeout: int,
        *,
        break_on_timeout: bool = False
    ):
        """
        Execute HTTP request and retry
        
        Args:
            name: RPC method name
            payload: Request payload
            script_timeout: Script execution timeout
            break_on_timeout: Whether to exit immediately on timeout (default False)
        
        Returns:
            Server response result
        
        Raises:
            Exception: Last exception thrown after all retries fail
        """
        last_exc: Exception | None = None
        # HTTP request timeout should be longer than script execution timeout, leaving buffer time
        http_timeout = script_timeout + 60
        
        for attempt in range(1, self.retry_times + 1):
            try:
                # Pass timeout parameter to server
                result = await self.invoke(name, payload | {"timeout": script_timeout})
                logger.info("%s executed successfully (attempt %d/%d)", name, attempt, self.retry_times)
                return result
            except asyncio.TimeoutError as exc:
                # Timeout exception usually does not need to be retried (script execution time too long)
                if break_on_timeout:
                    logger.error("%s timed out after %d seconds, aborting retry", name, script_timeout)
                    raise RuntimeError(
                        f"Script execution timed out after {script_timeout} seconds"
                    ) from exc
                last_exc = exc
                if attempt == self.retry_times:
                    break
                logger.warning(
                    "%s timed out (attempt %d/%d), retrying in %.1f seconds...", 
                    name, attempt, self.retry_times, self.retry_interval
                )
                await asyncio.sleep(self.retry_interval)
            except Exception as exc:
                last_exc = exc
                if attempt == self.retry_times:
                    break
                logger.warning(
                    "%s failed (attempt %d/%d): %s, retrying in %.1f seconds...", 
                    name, attempt, self.retry_times, exc, self.retry_interval
                )
                await asyncio.sleep(self.retry_interval)
        
        error_msg = f"{name} failed after {self.retry_times} retries"
        logger.error(error_msg)
        raise last_exc or RuntimeError(error_msg)

    async def run_python_script(
        self, 
        code: str, 
        *, 
        timeout: int = 90,
        working_dir: Optional[str] = None,
        env: Optional[Dict[str, str]] = None,
        conda_env: Optional[str] = None
    ) -> Any:
        """
        Execute Python script on remote server
        
        Args:
            code: Python code string
            timeout: Execution timeout in seconds (default 90 seconds)
            working_dir: Working directory for script execution (optional)
            env: Environment variables for script execution (optional)
            conda_env: Conda environment name to activate (optional)
        
        Returns:
            Server response result
        
        Raises:
            PermissionError: Security policy blocked execution
            RuntimeError: Execution failed or timed out
        """
        if self._security_manager:
            from anytool.grounding.core.types import BackendType
            allowed = await self._security_manager.check_command_allowed(BackendType.SHELL, code)
            if not allowed:
                logger.error("SecurityPolicy blocked python code execution")
                raise PermissionError("SecurityPolicy: python code execution blocked")
        
        payload = {"code": code, "working_dir": working_dir, "env": env, "conda_env": conda_env}
        logger.info(
            "Executing python script with timeout=%d seconds%s%s%s",
            timeout,
            f", working_dir={working_dir}" if working_dir else "",
            f", env={list(env.keys())}" if env else "",
            f", conda_env={conda_env}" if conda_env else ""
        )
        # Python script timed out, exit immediately without retry (timeout usually means script logic problem)
        return await self._retry_invoke(
            "POST /run_python", 
            payload, 
            timeout,
            break_on_timeout=True
        )

    async def run_bash_script(
        self,
        script: str,
        *,
        timeout: int = 90,
        working_dir: Optional[str] = None,
        env: Optional[Dict[str, str]] = None,
        conda_env: Optional[str] = None
    ) -> Any:
        """
        Execute Bash script on remote server
        
        Args:
            script: Bash script content (can be multi-line)
            timeout: Execution timeout in seconds (default 90 seconds)
            working_dir: Working directory for script execution (optional)
            env: Environment variables for script execution (optional)
            conda_env: Conda environment name to activate (optional)
        
        Returns:
            Server response result, containing status, output, error, returncode, etc.
        
        Raises:
            PermissionError: Security policy blocked execution
            RuntimeError: Execution failed or timed out
        """
        if self._security_manager:
            from anytool.grounding.core.types import BackendType
            allowed = await self._security_manager.check_command_allowed(BackendType.SHELL, script)
            if not allowed:
                logger.error("SecurityPolicy blocked bash script execution")
                raise PermissionError("SecurityPolicy: bash script execution blocked")
        
        payload = {"script": script, "working_dir": working_dir, "env": env, "conda_env": conda_env}
        logger.info(
            "Executing bash script with timeout=%d seconds%s%s%s", 
            timeout,
            f", working_dir={working_dir}" if working_dir else "",
            f", env={list(env.keys())}" if env else "",
            f", conda_env={conda_env}" if conda_env else ""
        )
        
        # Bash script timed out, exit immediately without retry (timeout usually means script logic problem)
        result = await self._retry_invoke(
            "POST /run_bash_script", 
            payload, 
            timeout,
            break_on_timeout=True
        )
        
        # Record execution result
        if isinstance(result, dict) and "returncode" in result:
            logger.info("Bash script executed with return code: %d", result.get("returncode", -1))
        
        return result

================================================
FILE: anytool/grounding/backends/shell/transport/local_connector.py
================================================
"""
Local Shell Connector — execute Python / Bash scripts directly via subprocess.

This connector has the **same public API** as ShellConnector (HTTP version)
but runs everything in-process, removing the need for a local_server.

Return format is kept identical so that ShellSession / ShellAgentTool
work without any changes.
"""

import asyncio
import os
import platform
import tempfile
import uuid
from typing import Any, Optional, Dict

from anytool.grounding.core.transport.connectors.base import BaseConnector
from anytool.grounding.core.transport.task_managers.noop import NoOpConnectionManager
from anytool.grounding.core.security import SecurityPolicyManager
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)

platform_name = platform.system()


# ---------------------------------------------------------------------------
# Conda helpers (mirrored from local_server/main.py)
# ---------------------------------------------------------------------------

def _get_conda_activation_prefix(conda_env: str | None) -> str:
    """Generate platform-specific conda activation prefix."""
    if not conda_env:
        return ""
    if platform_name == "Windows":
        conda_paths = [
            os.path.expandvars(r"%USERPROFILE%\miniconda3\Scripts\activate.bat"),
            os.path.expandvars(r"%USERPROFILE%\anaconda3\Scripts\activate.bat"),
            r"C:\ProgramData\Miniconda3\Scripts\activate.bat",
            r"C:\ProgramData\Anaconda3\Scripts\activate.bat",
        ]
        for p in conda_paths:
            if os.path.exists(p):
                return f'call "{p}" {conda_env} && '
        return f"conda activate {conda_env} && "
    else:
        conda_paths = [
            os.path.expanduser("~/miniconda3/etc/profile.d/conda.sh"),
            os.path.expanduser("~/anaconda3/etc/profile.d/conda.sh"),
            "/opt/conda/etc/profile.d/conda.sh",
            "/usr/local/miniconda3/etc/profile.d/conda.sh",
            "/usr/local/anaconda3/etc/profile.d/conda.sh",
        ]
        for p in conda_paths:
            if os.path.exists(p):
                return f'source "{p}" && conda activate {conda_env} && '
        return f"conda activate {conda_env} && "


def _wrap_script_with_conda(script: str, conda_env: str | None) -> str:
    """Wrap bash script with conda activation if needed."""
    if not conda_env:
        return script
    if platform_name == "Windows":
        prefix = _get_conda_activation_prefix(conda_env)
        return f"{prefix}{script}"
    else:
        conda_paths = [
            os.path.expanduser("~/miniconda3/etc/profile.d/conda.sh"),
            os.path.expanduser("~/anaconda3/etc/profile.d/conda.sh"),
            os.path.expanduser("~/opt/anaconda3/etc/profile.d/conda.sh"),
            "/opt/conda/etc/profile.d/conda.sh",
        ]
        conda_sh = None
        for p in conda_paths:
            if os.path.exists(p):
                conda_sh = p
                break
        if conda_sh:
            return (
                f'#!/bin/bash\n'
                f'if [ -f "{conda_sh}" ]; then\n'
                f'    . "{conda_sh}"\n'
                f'    conda activate {conda_env} 2>/dev/null || true\n'
                f'fi\n\n'
                f'{script}\n'
            )
        else:
            logger.warning(
                "Conda environment '%s' requested but conda not found. "
                "Executing with system Python.", conda_env
            )
            return script


class LocalShellConnector(BaseConnector[Any]):
    """
    Shell connector that runs scripts **locally** using asyncio subprocesses,
    bypassing the Flask local_server entirely.
    
    Public API is compatible with ``ShellConnector`` so that ``ShellSession``
    works without modification.
    """

    def __init__(
        self,
        *,
        retry_times: int = 3,
        retry_interval: float = 5,
        security_manager: "SecurityPolicyManager | None" = None,
    ) -> None:
        super().__init__(NoOpConnectionManager())
        self.retry_times = retry_times
        self.retry_interval = retry_interval
        self._security_manager = security_manager
        # Provide base_url = None so ShellSession._get_system_info falls back
        # to bash-based detection instead of HTTP.
        self.base_url: str | None = None

    # ------------------------------------------------------------------
    # connect / disconnect (mostly no-ops for local execution)
    # ------------------------------------------------------------------

    async def connect(self) -> None:
        """No real connection to establish for local mode."""
        if self._connected:
            return
        await super().connect()
        logger.info("LocalShellConnector: ready (local mode, no server required)")

    # ------------------------------------------------------------------
    # Core execution helpers
    # ------------------------------------------------------------------

    async def _run_subprocess(
        self,
        cmd: list[str],
        *,
        timeout: int = 90,
        working_dir: str | None = None,
        env: dict[str, str] | None = None,
    ) -> Dict[str, Any]:
        """Run a command via asyncio subprocess and return a result dict
        matching the format returned by the local_server endpoints."""
        exec_env = os.environ.copy()
        if env:
            exec_env.update(env)

        cwd = working_dir or os.getcwd()

        try:
            proc = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE,
                cwd=cwd,
                env=exec_env,
            )
            stdout_b, stderr_b = await asyncio.wait_for(
                proc.communicate(), timeout=timeout
            )
            stdout = stdout_b.decode("utf-8", errors="replace") if stdout_b else ""
            stderr = stderr_b.decode("utf-8", errors="replace") if stderr_b else ""
            returncode = proc.returncode or 0

            return {
                "status": "success" if returncode == 0 else "error",
                "output": stdout,
                "content": stdout or "Code executed successfully (no output)",
                "error": stderr,
                "returncode": returncode,
            }
        except asyncio.TimeoutError:
            return {
                "status": "error",
                "output": f"Execution timed out after {timeout} seconds",
                "content": f"Execution timed out after {timeout} seconds",
                "error": "",
                "returncode": -1,
            }
        except Exception as e:
            return {
                "status": "error",
                "output": "",
                "content": "",
                "error": str(e),
                "returncode": -1,
            }

    async def _run_shell_command(
        self,
        shell_cmd: str,
        *,
        timeout: int = 90,
        working_dir: str | None = None,
        env: dict[str, str] | None = None,
    ) -> Dict[str, Any]:
        """Run a shell command string (used for conda-wrapped scripts)."""
        exec_env = os.environ.copy()
        if env:
            exec_env.update(env)

        cwd = working_dir or os.getcwd()

        try:
            proc = await asyncio.create_subprocess_shell(
                shell_cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.STDOUT,
                cwd=cwd,
                env=exec_env,
            )
            stdout_b, _ = await asyncio.wait_for(
                proc.communicate(), timeout=timeout
            )
            stdout = stdout_b.decode("utf-8", errors="replace") if stdout_b else ""
            returncode = proc.returncode or 0

            return {
                "status": "success" if returncode == 0 else "error",
                "output": stdout,
                "content": stdout or "Code executed successfully (no output)",
                "error": "",
                "returncode": returncode,
            }
        except asyncio.TimeoutError:
            return {
                "status": "error",
                "output": f"Script execution timed out after {timeout} seconds",
                "content": f"Script execution timed out after {timeout} seconds",
                "error": "",
                "returncode": -1,
            }
        except Exception as e:
            return {
                "status": "error",
                "output": "",
                "content": "",
                "error": str(e),
                "returncode": -1,
            }

    # ------------------------------------------------------------------
    # Public API (same signatures as ShellConnector)
    # ------------------------------------------------------------------

    async def run_python_script(
        self,
        code: str,
        *,
        timeout: int = 90,
        working_dir: Optional[str] = None,
        env: Optional[Dict[str, str]] = None,
        conda_env: Optional[str] = None,
    ) -> Any:
        """Execute a Python script locally.

        Return format matches the server's ``/run_python`` endpoint.
        """
        # Security check
        if self._security_manager:
            from anytool.grounding.core.types import BackendType
            allowed = await self._security_manager.check_command_allowed(
                BackendType.SHELL, code
            )
            if not allowed:
                logger.error("SecurityPolicy blocked python code execution")
                raise PermissionError("SecurityPolicy: python code execution blocked")

        # Write code to temp file (same as local_server)
        suffix = uuid.uuid4().hex
        if platform_name == "Windows":
            temp_filename = os.path.join(tempfile.gettempdir(), f"python_exec_{suffix}.py")
        else:
            temp_filename = f"/tmp/python_exec_{suffix}.py"

        try:
            with open(temp_filename, "w") as f:
                f.write(code)

            logger.info(
                "Executing python script locally with timeout=%d seconds%s%s%s",
                timeout,
                f", working_dir={working_dir}" if working_dir else "",
                f", env={list(env.keys())}" if env else "",
                f", conda_env={conda_env}" if conda_env else "",
            )

            if conda_env:
                activation = _get_conda_activation_prefix(conda_env)
                if activation:
                    python_cmd = "python" if platform_name == "Windows" else "python3"
                    full_cmd = f'{activation}{python_cmd} "{temp_filename}"'
                    result = await self._run_shell_command(
                        full_cmd, timeout=timeout, working_dir=working_dir, env=env
                    )
                else:
                    python_cmd = "python" if platform_name == "Windows" else "python3"
                    result = await self._run_subprocess(
                        [python_cmd, temp_filename],
                        timeout=timeout,
                        working_dir=working_dir,
                        env=env,
                    )
            else:
                python_cmd = "python" if platform_name == "Windows" else "python3"
                result = await self._run_subprocess(
                    [python_cmd, temp_filename],
                    timeout=timeout,
                    working_dir=working_dir,
                    env=env,
                )

            return result

        finally:
            if os.path.exists(temp_filename):
                os.remove(temp_filename)

    async def run_bash_script(
        self,
        script: str,
        *,
        timeout: int = 90,
        working_dir: Optional[str] = None,
        env: Optional[Dict[str, str]] = None,
        conda_env: Optional[str] = None,
    ) -> Any:
        """Execute a Bash script locally.

        Return format matches the server's ``/run_bash_script`` endpoint.
        """
        # Security check
        if self._security_manager:
            from anytool.grounding.core.types import BackendType
            allowed = await self._security_manager.check_command_allowed(
                BackendType.SHELL, script
            )
            if not allowed:
                logger.error("SecurityPolicy blocked bash script execution")
                raise PermissionError("SecurityPolicy: bash script execution blocked")

        # Wrap with conda if needed
        final_script = _wrap_script_with_conda(script, conda_env)

        # Write to temp file (same as local_server)
        suffix = uuid.uuid4().hex
        if platform_name == "Windows":
            temp_filename = os.path.join(tempfile.gettempdir(), f"bash_exec_{suffix}.sh")
        else:
            temp_filename = f"/tmp/bash_exec_{suffix}.sh"

        try:
            with open(temp_filename, "w") as f:
                f.write(final_script)
            os.chmod(temp_filename, 0o755)

            logger.info(
                "Executing bash script locally with timeout=%d seconds%s%s%s",
                timeout,
                f", working_dir={working_dir}" if working_dir else "",
                f", env={list(env.keys())}" if env else "",
                f", conda_env={conda_env}" if conda_env else "",
            )

            shell_cmd = ["bash", temp_filename] if platform_name == "Windows" else ["/bin/bash", temp_filename]
            result = await self._run_subprocess(
                shell_cmd,
                timeout=timeout,
                working_dir=working_dir,
                env=env,
            )
            return result

        finally:
            if os.path.exists(temp_filename):
                os.unlink(temp_filename)

    # ------------------------------------------------------------------
    # BaseConnector abstract methods
    # ------------------------------------------------------------------

    async def invoke(self, name: str, params: dict[str, Any]) -> Any:
        """Dispatch by name — same routing as ShellConnector via AioHttpConnector."""
        name_upper = name.strip().upper()
        if "/RUN_PYTHON" in name_upper:
            return await self.run_python_script(
                params.get("code", ""),
                timeout=params.get("timeout", 90),
                working_dir=params.get("working_dir"),
                env=params.get("env"),
                conda_env=params.get("conda_env"),
            )
        elif "/RUN_BASH_SCRIPT" in name_upper:
            return await self.run_bash_script(
                params.get("script", ""),
                timeout=params.get("timeout", 90),
                working_dir=params.get("working_dir"),
                env=params.get("env"),
                conda_env=params.get("conda_env"),
            )
        else:
            raise NotImplementedError(f"LocalShellConnector does not support: {name}")

    async def request(self, *args: Any, **kwargs: Any) -> Any:
        """Not used in local mode."""
        raise NotImplementedError(
            "LocalShellConnector does not support raw HTTP requests"
        )


================================================
FILE: anytool/grounding/backends/web/__init__.py
================================================
from .provider import WebProvider
from .session import WebSession

__all__ = [
    "WebProvider",
    "WebSession"
]

================================================
FILE: anytool/grounding/backends/web/provider.py
================================================
from typing import Dict, Any
from anytool.grounding.core.types import BackendType, SessionConfig
from anytool.grounding.core.provider import Provider
from .session import WebSession
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class WebProvider(Provider[WebSession]):
    
    DEFAULT_SID = BackendType.WEB.value
    
    def __init__(self, config: Dict[str, Any] = None):
        super().__init__(BackendType.WEB, config)
    
    async def initialize(self) -> None:
        """Initialize Web Provider and create default session"""
        if not self.is_initialized:
            logger.info("Initializing Web provider (Knowledge Research)")
            # Auto-create default session
            await self.create_session(SessionConfig(
                session_name=self.DEFAULT_SID,
                backend_type=BackendType.WEB,
                connection_params={}
            ))
            self.is_initialized = True
    
    async def create_session(self, session_config: SessionConfig) -> WebSession:
        """Create Web session"""
        session_name = session_config.session_name
        
        if session_name in self._sessions:
            logger.warning(f"Session {session_name} already exists, returning existing session")
            return self._sessions[session_name]
        
        # Create WebSession with auto-connect and auto-initialize enabled
        session = WebSession(
            session_id=session_name,
            config=session_config,
            auto_connect=True,
            auto_initialize=True
        )
        
        self._sessions[session_name] = session
        
        logger.info(f"Created Web session (Knowledge Research): {session_name}")
        return session
    
    async def close_session(self, session_name: str) -> None:
        """Close Web session"""
        session = self._sessions.pop(session_name, None)
        if session:
            await session.disconnect()
            logger.info(f"Closed Web session: {session_name}")

================================================
FILE: anytool/grounding/backends/web/session.py
================================================
import os
from typing import Dict, Any, Optional
from anytool.grounding.core.session import BaseSession
from anytool.grounding.core.types import BackendType, SessionConfig
from anytool.grounding.core.tool import BaseTool
from anytool.grounding.core.transport.connectors import BaseConnector
from anytool.llm import LLMClient
from anytool.utils.logging import Logger
from dotenv import load_dotenv

load_dotenv()
logger = Logger.get_logger(__name__)


try:
    from openai import AsyncOpenAI
    OPENAI_AVAILABLE = True
except ImportError:
    OPENAI_AVAILABLE = False


class WebConnector(BaseConnector):
    def __init__(self, api_key: str, base_url: str):
        self.api_key = api_key
        self.base_url = base_url
        self.client: Optional[AsyncOpenAI] = None
        self._connected = False
    
    async def connect(self) -> None:
        if self._connected:
            return
        
        if not OPENAI_AVAILABLE:
            raise RuntimeError(
                "OpenAI library not available. Install with: pip install openai"
            )
        
        if not self.api_key:
            raise RuntimeError(
                "API key not provided. Set OPENROUTER_API_KEY environment variable "
                "or provide deep_research_api_key in config."
            )
        
        self.client = AsyncOpenAI(
            base_url=self.base_url,
            api_key=self.api_key
        )
        self._connected = True
        logger.info(f"Web connector connected to {self.base_url}")
    
    async def disconnect(self) -> None:
        if not self._connected:
            return
        
        self.client = None
        self._connected = False
        logger.info("Web connector disconnected")
    
    @property
    def is_connected(self) -> bool:
        return self._connected
    
    async def invoke(self, name: str, params: dict) -> Any:
        if name == "chat_completion":
            if not self.client:
                raise RuntimeError("Client not connected")
            return await self.client.chat.completions.create(**params)
        raise NotImplementedError(f"Unknown method: {name}")
    
    async def request(self, *args: Any, **kwargs: Any) -> Any:
        raise NotImplementedError("Web backend uses invoke() instead of request()")


class WebSession(BaseSession):
    
    backend_type = BackendType.WEB
    
    def __init__(
        self,
        *,
        session_id: str,
        config: SessionConfig,
        deep_research_api_key: Optional[str] = None,
        deep_research_base_url: str = "https://openrouter.ai/api/v1",
        auto_connect: bool = True,
        auto_initialize: bool = True
    ):
        api_key = deep_research_api_key or os.getenv("OPENROUTER_API_KEY")
        connector = WebConnector(
            api_key=api_key or "",  # Empty string will raise an error when connect
            base_url=deep_research_base_url
        )
        
        super().__init__(
            connector=connector,
            session_id=session_id,
            backend_type=BackendType.WEB,
            auto_connect=auto_connect,
            auto_initialize=auto_initialize
        )
        self.config = config
    
    @property
    def web_connector(self) -> WebConnector:
        return self.connector
    
    async def initialize(self) -> Dict[str, Any]:
        """Connect to WebConnector and register tools.

        BaseSession in __aenter__ will call connect() according to auto_connect,
        but in provider.create_session directly instantiating Session will not trigger this logic.
        Therefore, we need to explicitly ensure that the connection is established, avoiding AttributeError
        when DeepResearchTool is called and `self.web_connector.client` is still None.
        """

        # If the connection is not established, connect explicitly
        if not self.is_connected:
            try:
                await self.connect()
            except Exception as e:
                logger.error(f"Failed to connect WebSession {self.session_id}: {e}")
                raise

        if self.tools:
            logger.debug(f"Web session {self.session_id} already initialized, skipping")
            return {
                "tools": [t.name for t in self.tools],
                "backend": BackendType.WEB.value
            }

        self.tools = [DeepResearchTool(session=self)]
        
        logger.info(f"Initialized Web session {self.session_id} with AI Deep Research tool")
        
        return {
            "tools": [t.name for t in self.tools],
            "backend": BackendType.WEB.value
        }


class DeepResearchTool(BaseTool):
    
    backend_type = BackendType.WEB
    _name = "deep_research_agent"
    _description = """Knowledge Research Tool - Primary tool for acquiring external knowledge

PURPOSE:
Acquires comprehensive knowledge from the web through deep research and analysis.
Powered by Perplexity AI's sonar-deep-research model, then post-processed to extract
actionable insights and concise summaries. The main tool for gathering information
beyond existing knowledge base.

WHEN TO USE:
- Information needed on professional/technical topics
- Research on technical problems, concepts, or implementations  
- Understanding of latest developments, trends, or news
- Comparison of different approaches, tools, or solutions
- Factual information, definitions, or explanations required
- Synthesis from multiple authoritative sources needed

HOW IT WORKS:
1. Conducts deep web search using Perplexity's sonar-deep-research
2. Analyzes and synthesizes information from multiple sources
3. Post-processes to distill knowledge-dense summary retaining critical details
4. Returns comprehensive summary ready for immediate use

RETURNS:
Knowledge-dense comprehensive summary (400-600 words) that:
- Retains important details and technical specifics
- Focuses on substantive knowledge without losing critical information
- Organized and structured for clarity
- Directly usable by agents for decision-making and task execution

NOT DESIGNED FOR:
- Tasks requiring browser interaction or UI manipulation
- Direct file downloads or web scraping operations
- Real-time system operations or executions

USAGE GUIDELINES:
- Frame clear, specific questions (e.g., "Explain the architecture of Transformer models")
- Specify context when needed (e.g., "Compare PostgreSQL vs MySQL for high-concurrency scenarios")
- Suitable for any knowledge or information acquisition needs
"""
    
    def __init__(
        self,
        session: WebSession
    ):
        super().__init__()
        self._session = session
        self._llm = LLMClient()
        
    async def _arun(self, query: str) -> str:
        if not query:
            return "ERROR: Missing required parameter: query"
        
        try:
            # Step 1: Deep research
            logger.info(f"Start deep research: {query}")
            
            completion = await self._session.web_connector.client.chat.completions.create(
                model="perplexity/sonar-deep-research",
                messages=[{"role": "user", "content": query}]
            )
            
            full_answer = completion.choices[0].message.content
            logger.info(f"Research completed, length: {len(full_answer)} characters")
            
            # Step 2: Use LLMClient to generate summary and distill key points
            logger.info(f"Begin to distill key points...")
            
            SUMMARY_AGENT_PROMPT = f"""Please distill the following deep research results into a knowledge-dense summary. Requirements:

Provide a comprehensive yet concise summary (400-600 words):
- Focus on SUBSTANTIVE knowledge and key information
- Retain important details, technical specifics, and concrete facts
- Do NOT sacrifice critical information for brevity
- Organize information clearly and logically with proper structure
- Remove only redundancy and verbose explanations
- Include actionable insights and decision-relevant information
- Make it directly usable for task execution and decision-making

Output ONLY the summary text, no additional formatting or JSON structure needed.

Deep Research Results:
{full_answer}
"""
            
            summary_response = await self._llm.complete(SUMMARY_AGENT_PROMPT)
            summary = summary_response["message"]["content"].strip()
            
            logger.info(f"Summary generation completed")
            
            return summary
            
        except Exception as e:
            logger.error(f"Deep research failed: {e}")
            return f"ERROR: AI research failed: {e}"

================================================
FILE: anytool/grounding/core/exceptions.py
================================================
"""
Unified exception & error-code definitions for the grounding framework
"""
from enum import Enum, auto
from typing import Any, Dict


class ErrorCode(str, Enum):
    # generic
    UNKNOWN = auto()
    CONFIG_INVALID = auto()

    # provider / session / connector
    PROVIDER_ERROR = auto()
    SESSION_NOT_FOUND = auto()

    # connection
    CONNECTION_FAILED = auto()
    CONNECTION_TIMEOUT = auto()

    # tool
    TOOL_NOT_FOUND = auto()
    TOOL_EXECUTION_FAIL = auto()
    AMBIGUOUS_TOOL = auto()


class GroundingError(Exception):
    """
    Framework-wide base exception.

    Parameters
    ----------
    message : str
        Human readable error message.
    code : ErrorCode
        One of the error codes defined above.
    retryable : bool
        Whether the caller may retry the operation automatically.
    context : Dict[str, Any]
        Extra key-value pairs (e.g. tool_name, session_id) for logging / metrics.
    """

    __slots__ = ("message", "code", "retryable", "context")

    def __init__(
        self,
        message: str,
        *,
        code: ErrorCode = ErrorCode.UNKNOWN,
        retryable: bool = False,
        **context: Any,
    ):
        super().__init__(f"[{code}] {message}")
        self.message: str = message
        self.code: ErrorCode = code
        self.retryable: bool = retryable
        self.context: Dict[str, Any] = context

    def to_dict(self) -> Dict[str, Any]:
        """Serialize error for structured logging / JSON response."""
        return {
            "code": self.code.value,
            "message": self.message,
            "retryable": self.retryable,
            "context": self.context,
        }

    def __str__(self) -> str:  
        return f"[{self.code}] {self.message}"

    def __repr__(self) -> str: 
        return f"GroundingError(code={self.code}, msg={self.message!r})"

================================================
FILE: anytool/grounding/core/grounding_client.py
================================================
import asyncio
import time
from collections import OrderedDict
from datetime import datetime
from typing import Any, Dict, List, Optional

from .types import BackendType, SessionConfig, SessionInfo, SessionStatus, ToolResult
from .exceptions import ErrorCode, GroundingError
from .tool import BaseTool
from .provider import Provider, ProviderRegistry
from .session import BaseSession
from .search_tools import SearchCoordinator
from anytool.config import GroundingConfig, get_config
from anytool.config.utils import get_config_value
from anytool.utils.logging import Logger
import importlib


class GroundingClient:
    """
    Global Entry, Facing Agent/Application, only concerned with Provider & Session
    """
    def __init__(self, config: Optional[GroundingConfig] = None, recording_manager=None) -> None:
        # Initialize logger first (needed by other initialization steps)
        self._logger = Logger.get_logger(__name__)
        
        self._config: GroundingConfig = config or get_config()
        self._registry: ProviderRegistry = ProviderRegistry()
        
        # Register providers from config
        self._register_providers_from_config()

        # Session
        self._sessions: Dict[str, BaseSession] = {}
        self._session_info: Dict[str, SessionInfo] = {}
        self._server_session_map: dict[tuple[BackendType, str], str] = {}             # (backend, server) -> session_name

        # Tool cache
        self._tool_cache: "OrderedDict[str, tuple[List[BaseTool], float]]" = OrderedDict()
        self._tool_cache_ttl: int = get_config_value(self._config, "tool_cache_ttl", 300)
        self._tool_cache_maxsize: int = get_config_value(self._config, "tool_cache_maxsize", 300)

        # Concurrent control
        self._lock = asyncio.Lock()
        self._cache_lock = asyncio.Lock()

        # Tool search coordinator
        self._search_coordinator: Optional[SearchCoordinator] = None
        
        # Recording manager (optional, for GUI intermediate step recording)
        self._recording_manager = recording_manager
        
        # Tool quality manager
        self._quality_manager = self._init_quality_manager()
        
        # Register SystemProvider (requires GroundingClient instance, so must be done after __init__)
        self._register_system_provider()
        
    def _register_providers_from_config(self) -> None:
            """
            Based on GroundingConfig.enabled_backends, register Provider instances to
            self._registry. Here only do *instantiation*, not await initialize(),
            to avoid blocking the event loop in the import stage; Provider will be lazily initialized when it is first used.
            
            Note: SystemProvider is skipped here and registered separately in _register_system_provider()
            because it requires a GroundingClient instance.
            """
            if not self._config.enabled_backends:
                self._logger.warning("No enabled_backends defined in config")
                return

            for item in self._config.enabled_backends:
                be_name: str | None = item.get("name")
                cls_path: str | None = item.get("provider_cls")
                if not (be_name and cls_path):
                    self._logger.warning("Invalid backend entry: %s", item)
                    continue

                backend = BackendType(be_name.lower())
                
                # Skip system backend - it will be registered separately
                if backend == BackendType.SYSTEM:
                    self._logger.debug("Skipping system backend in config registration (will be registered separately)")
                    continue
                
                if backend in self._registry.list():
                    continue        # Already registered

                # Dynamically import Provider class
                try:
                    module_path, _, cls_name = cls_path.rpartition(".")
                    module = importlib.import_module(module_path)
                    prov_cls = getattr(module, cls_name)
                except (ModuleNotFoundError, AttributeError) as e:
                    self._logger.error("Import provider failed: %s (%s)", cls_path, e)
                    continue

                backend_cfg = self._config.get_backend_config(be_name)
                provider: Provider = prov_cls(backend_cfg)
                self._registry.register(provider)
    
    def _register_system_provider(self) -> None:
        """
        Register SystemProvider separately because it requires GroundingClient instance.
        SystemProvider provides meta-level tools for querying system state (list providers, tools, etc.)
        and is always available regardless of configuration.
        """
        try:
            from .system import SystemProvider
            system_provider = SystemProvider(self)
            self._registry.register(system_provider)
            self._logger.debug("SystemProvider registered successfully")
        except Exception as e:
            self._logger.warning(f"Failed to register SystemProvider: {e}")
    
    def _init_quality_manager(self):
        """Initialize tool quality manager based on config."""
        try:
            # Check if quality tracking is enabled in config
            quality_config = getattr(self._config, 'tool_quality', None)
            if not quality_config or not getattr(quality_config, 'enabled', True):
                self._logger.debug("Tool quality tracking disabled")
                return None
            
            from .quality import ToolQualityManager, set_quality_manager
            from pathlib import Path
            
            cache_dir = getattr(quality_config, 'cache_dir', None)
            if cache_dir:
                cache_dir = Path(cache_dir)
            
            manager = ToolQualityManager(
                cache_dir=cache_dir,
                enable_persistence=getattr(quality_config, 'enable_persistence', True),
                auto_save=True,
                evolve_interval=getattr(quality_config, 'evolve_interval', 5),
            )
            
            # Set as global manager for BaseTool access
            set_quality_manager(manager)
            
            self._logger.info(
                f"ToolQualityManager initialized "
                f"(records={len(manager._records)})"
            )
            return manager
            
        except Exception as e:
            self._logger.warning(f"Failed to initialize ToolQualityManager: {e}")
            return None
    
    @property
    def quality_manager(self):
        """Get the tool quality manager."""
        return self._quality_manager
    
    # Quality API for Upper Layer
    def get_quality_report(self) -> Dict[str, Any]:
        """
        Get comprehensive tool quality report.
        """
        if not self._quality_manager:
            return {"status": "disabled", "message": "Quality tracking not enabled"}
        return self._quality_manager.get_quality_report()
    
    async def evolve_quality(self) -> Dict[str, Any]:
        """
        Run quality self-evolution cycle.
        
        This triggers:
        - Tool change detection
        - Description re-evaluation for updated tools
        - Adaptive quality weight computation
        
        Call this periodically or after tool set changes.
        """
        if not self._quality_manager:
            return {"status": "disabled"}
        
        # Get all tools
        all_tools = await self.list_tools()
        return await self._quality_manager.evolve(all_tools)
    
    def get_tool_insights(self, tool: BaseTool) -> Dict[str, Any]:
        """
        Get detailed quality insights for a specific tool.
        """
        if not self._quality_manager:
            return {"status": "disabled"}
        return self._quality_manager.get_tool_insights(tool)

    def register_provider(self, provider: Provider) -> None:
        self._registry.register(provider)
    
    def get_provider(self, backend: BackendType) -> Provider:
        return self._registry.get(backend)

    def list_providers(self) -> Dict[BackendType, Provider]:
        return self._registry.list()
    
    @property
    def recording_manager(self):
        """Get the recording manager."""
        return self._recording_manager
    
    @recording_manager.setter
    def recording_manager(self, manager):
        """
        Set or update the recording manager.
        This allows coordinator to inject recording_manager after GroundingClient creation.
        """
        self._recording_manager = manager
        self._logger.info("GroundingClient: RecordingManager updated")
    
    async def initialize_all_providers(self) -> None:
        await asyncio.gather(*[provider.initialize() for provider in self._registry.list().values() if not provider.is_initialized])


    async def create_session(
        self,
        *,
        backend: BackendType,
        name: str | None = None,
        connection_params: Dict[str, Any] | None = None,
        server: str | None = None,
        **options,
    ) -> str:
        """
        Create and initialize Session, return "session_name" (external visible)
        name is auto generated when it's None: <backend>-<index>
        MCP backend needs to provide server
        """
        async with self._lock:
            # Check concurrent sessions limit
            max_sessions = get_config_value(self._config, "max_concurrent_sessions", 100)
            if len(self._sessions) >= max_sessions:
                raise GroundingError(f"Reached maximum session limit: {max_sessions}")

            # Session naming strategy
            if server:                                       # Only MCP will pass in server
                name = name or f"{backend.value}-{server}"
            else:
                name = name or backend.value                 # Other backends have a fixed 1 session
                
            if name in self._sessions:
                # Reuse existing session
                self._logger.warning("Session '%s' exists, reusing.", name)
                return name

        # Get Provider (initialize if first time)
        provider = self._registry.get(backend)
        if not provider.is_initialized:
            await provider.initialize()
            
        if backend == BackendType.MCP:
            if server is None:
                raise GroundingError("Must specify 'server' when creating MCP session")

        # Construct SessionConfig, pass to Provider to create
        connection_params = connection_params or {}
        if server:
            connection_params.setdefault("server", server)
        
        # Inject recording_manager for GUI backend (for intermediate step recording)
        if backend == BackendType.GUI and self._recording_manager is not None:
            connection_params.setdefault("recording_manager", self._recording_manager)

        sess_cfg = SessionConfig(
            session_name=name, # Use external visible name
            backend_type=backend,
            connection_params=connection_params,
            **options,
        )
        session_obj = await provider.create_session(sess_cfg)

        # Store session and monitoring info
        async with self._lock:
            self._sessions[name] = session_obj
            now = datetime.utcnow()
            self._session_info[name] = SessionInfo(
                session_name=name,
                backend_type=backend,
                status=SessionStatus.CONNECTED,
                created_at=now,
                last_activity=now,
            )
            if server:
                self._server_session_map[(backend, server)] = name

        self._logger.info("Session created: %s", name)
        return name
    
    def list_sessions(self) -> List[str]:
        return list(self._sessions.keys())

    async def close_session(self, name: str) -> None:
        async with self._lock:
            session = self._sessions.pop(name, None)
            info = self._session_info.pop(name, None)
            self._tool_cache.pop(name, None)

            for k, v in list(self._server_session_map.items()):
                if v == name:
                    self._server_session_map.pop(k)

        if not session:
            self._logger.warning("Session '%s' not found", name)
            return

        try:
            provider = self._registry.get(info.backend_type) if info else None
            if provider:
                await provider.close_session(name)
            else:
                # Fallback: if no provider, disconnect directly
                await session.disconnect()
        finally:
            self._logger.info("Session closed: %s", name)

    async def close_all_sessions(self) -> None:
        for sid in list(self._sessions.keys()):
            await self.close_session(sid)
            
    async def ensure_session(self, backend: BackendType, server: str | None = None) -> str:
        sid = backend.value if server is None else f"{backend.value}-{server}"
        if sid not in self._sessions:
            await self.create_session(backend=backend, name=sid, server=server)
        return sid
            
    def get_session_info(self, name: str) -> SessionInfo:
        """Get session monitoring info"""
        if name not in self._session_info:
            raise ErrorCode.SESSION_NOT_FOUND(name)
        return self._session_info[name]
    
    def get_session(self, name: str) -> BaseSession:
        """Get session"""
        if name not in self._sessions:
            raise ErrorCode.SESSION_NOT_FOUND(name)
        return self._sessions[name]
    
    
    async def _fetch_tools(
        self,
        backend: BackendType,
        *,
        session_name: str | None = None,
        use_cache: bool = False,
        bind_runtime_info: bool = True,  
    ) -> List[BaseTool]:
        """
        Fetch tools from provider.
        
        Args:
            backend: Backend type
            session_name: 
                - None: fetch all tools from all sessions of this backend
                - str: fetch tools from specific session
            use_cache: Whether to use cache
            bind_runtime_info: Whether to bind runtime info to tool instances
        """
        now = time.time()
        
        # Auto-generate cache_scope from parameters
        if session_name:
            cache_scope = session_name
        else:
            cache_scope = f"backend-{backend.value}"

        # Check cache
        if use_cache:
            async with self._cache_lock:
                if cache_scope in self._tool_cache:
                    tools, ts = self._tool_cache[cache_scope]
                    if now - ts < self._tool_cache_ttl:
                        self._tool_cache.move_to_end(cache_scope)
                        return tools

        provider = self._registry.get(backend)
        if not provider.is_initialized:
            await provider.initialize()

        tools = await provider.list_tools(session_name=session_name)

        if bind_runtime_info:
            # If session_name is specified, bind all tools to that session
            if session_name:
                server_name = None
                if backend == BackendType.MCP:
                    server_name = session_name.replace(f"{backend.value}-", "", 1)
                
                for tool in tools:
                    tool.bind_runtime_info(
                        backend=backend,
                        session_name=session_name,
                        server_name=server_name,
                        grounding_client=self,
                    )
            else:
                # No session_name specified - get tools from all sessions
                # For each backend, find the default/primary session
                # For Shell/Web/GUI: use the default session (backend.value)
                # For MCP: tools should already be bound by the provider
                default_session_name = None
                
                # Try to find an existing session for this backend
                for sid, info in self._session_info.items():
                    if info.backend_type == backend:
                        default_session_name = sid
                        break
                
                # Fallback: use backend default naming
                if not default_session_name:
                    default_session_name = backend.value
                
                server_name = None
                if backend == BackendType.MCP and default_session_name:
                    server_name = default_session_name.replace(f"{backend.value}-", "", 1)
                
                for tool in tools:
                    # Only bind if tool doesn't have runtime info already
                    # (some providers like MCP bind runtime info during list_tools)
                    if not tool.is_bound:
                        tool.bind_runtime_info(
                            backend=backend,
                            session_name=default_session_name,
                            server_name=server_name,
                            grounding_client=self,
                        )
                    elif not tool.runtime_info.grounding_client:
                        # Tool has runtime info but no grounding_client, add it
                        tool.bind_runtime_info(
                            backend=tool.runtime_info.backend,
                            session_name=tool.runtime_info.session_name,
                            server_name=tool.runtime_info.server_name,
                            grounding_client=self,
                        )

        # Save to cache
        if use_cache:
            async with self._cache_lock:
                self._tool_cache[cache_scope] = (tools, now)
                self._tool_cache.move_to_end(cache_scope)
                while len(self._tool_cache) > self._tool_cache_maxsize:
                    self._tool_cache.popitem(last=False)

        return tools

    async def list_tools(
        self,
        backend: BackendType | list[BackendType] | None = None,
        session_name: str | None = None,
        *,
        use_cache: bool = False,
    ) -> List[BaseTool]:
        """
        List tools from backend(s) or session.
        
        1. session_name is provided → return tools from that session
        2. backend is list → return tools from multiple backends
        3. backend is single → return tools from that backend
        4. backend is None → return tools from all backends
        
        Args:
            backend: Single backend, list of backends, or None for all
            session_name: Specific session name (overrides backend parameter)
            use_cache: Whether to use cache
            
        Returns:
            List of tools
        """
        # Session-level
        if session_name:                  
            if session_name not in self._sessions:
                raise ErrorCode.SESSION_NOT_FOUND(session_name)
            backend_type = self._session_info[session_name].backend_type
            return await self._fetch_tools(
                backend_type,
                session_name=session_name,
                use_cache=use_cache,
            )
        
        # Multiple backends
        if isinstance(backend, list):
            tools: List[BaseTool] = []
            for be in backend:
                backend_tools = await self._fetch_tools(
                    be,
                    session_name=None,  # Provider aggregates all sessions
                    use_cache=use_cache,
                )
                tools.extend(backend_tools)
            return tools
        
        # Single backend
        if backend is not None:
            return await self._fetch_tools(
                backend,
                session_name=None,
                use_cache=use_cache,
            )

        # All backends
        tools: List[BaseTool] = []
        for backend_type in self._registry.list().keys():
            backend_tools = await self._fetch_tools(
                backend_type,
                session_name=None,
                use_cache=use_cache,
            )
            tools.extend(backend_tools)
        return tools

    async def list_backend_tools(
        self, 
        backend: BackendType | list[BackendType] | None = None,
        use_cache: bool = False
    ) -> list[BaseTool]:
        return await self.list_tools(backend=backend, session_name=None, use_cache=use_cache)

    async def list_session_tools(
        self, 
        session_name: str, 
        use_cache: bool = False
    ) -> list[BaseTool]:
        if session_name not in self._session_info:
            raise ErrorCode.SESSION_NOT_FOUND(session_name)
        backend = self._session_info[session_name].backend_type
        return await self.list_tools(backend, session_name, use_cache)

    async def list_all_backend_tools(
        self,
        use_cache: bool = False
    ) -> Dict[BackendType, list[BaseTool]]:
        """List static tools for every registered backend."""
        result = {}
        for backend_type in self.list_providers().keys():
            tools = await self.list_backend_tools(backend=backend_type, use_cache=use_cache)
            result[backend_type] = tools
        return result

    async def search_tools(
        self,
        task_description: str,
        *,
        backend: BackendType | list[BackendType] | None = None,
        session_name: str | None = None,
        max_tools: int | None = None,
        search_mode: str | None = None,
        use_cache: bool = True,
        llm_callable = None,
        enable_llm_filter: bool | None = None,
        llm_filter_threshold: int | None = None,
        enable_cache_persistence: bool | None = None,
        cache_dir: str | None = None,
    ) -> list[BaseTool]:
        """
        Search tools from backend(s) or session.
        
        Args:
            task_description: Task description for searching relevant tools
            backend: Backend type(s) to search
            session_name: Specific session to search
            max_tools: Maximum number of tools to return
            search_mode: Search mode ("semantic", "keyword", "hybrid")
            use_cache: Whether to use cached tool list
            llm_callable: LLM client for intelligent filtering
            enable_llm_filter: Whether to use LLM pre-filtering
            llm_filter_threshold: Threshold for applying LLM filter
            enable_cache_persistence: Whether to persist embeddings to disk. If None, uses config value.
            cache_dir: Directory for persistent cache. If None, uses config value or default.
        """
        candidate_tools = await self.list_tools(
            backend=backend,
            session_name=session_name,
            use_cache=use_cache,
        )
        
        if not candidate_tools:
            self._logger.warning("No candidate tools found for search")
            return []
        
        # lazy initialize SearchCoordinator (or recreate if parameters changed)
        if self._search_coordinator is None:
            # Get quality ranking settings from config
            quality_config = getattr(self._config, 'tool_quality', None)
            enable_quality_ranking = getattr(quality_config, 'enable_quality_ranking', True) if quality_config else True
            
            self._search_coordinator = SearchCoordinator(
                max_tools=max_tools,
                llm=llm_callable,
                enable_llm_filter=enable_llm_filter,
                llm_filter_threshold=llm_filter_threshold,
                enable_cache_persistence=enable_cache_persistence,
                cache_dir=cache_dir,
                quality_manager=self._quality_manager,
                enable_quality_ranking=enable_quality_ranking,
            )
        
        # execute search and sort
        try:
            filtered_tools = await self._search_coordinator._arun(
                task_prompt=task_description,
                candidate_tools=candidate_tools,
                max_tools=max_tools,
                mode=search_mode,
            )
            return filtered_tools
        except Exception as exc:
            self._logger.error(f"Tool search failed: {exc}")
            # fallback: return top N tools
            fallback_max = max_tools or self._config.tool_search.max_tools
            return candidate_tools[:fallback_max]
    
    def get_last_search_debug_info(self) -> Optional[Dict[str, Any]]:
        """Get debug info from the last tool search operation.
        
        Returns:
            Dict containing search debug info, or None if no search has been performed.
        """
        if self._search_coordinator is None:
            return None
        return self._search_coordinator.get_last_search_debug_info()
    
    async def get_tools_with_auto_search(
        self,
        *,
        task_description: str | None = None,
        backend: BackendType | list[BackendType] | None = None,
        session_name: str | None = None,
        max_tools: int | None = None,
        search_mode: str | None = None,
        use_cache: bool = True,
        llm_callable = None,
        enable_llm_filter: bool | None = None,
        llm_filter_threshold: int | None = None,
        enable_cache_persistence: bool | None = None,
        cache_dir: str | None = None,
    ) -> list[BaseTool]:
        """
        Intelligent tool retrieval: automatically decides whether to return all tools or trigger search.
        
        Logic:
        - If tool_count <= max_tools: return all tools directly
        - If tool_count > max_tools: trigger search and return top max_tools
        
        Args:
            task_description: Task description (required for search if triggered). 
                If None, search will not be triggered even if tool count exceeds max_tools.
            backend: Backend type(s) to query
            session_name: Specific session name
            max_tools: Maximum number of tools to return. Also acts as the threshold for triggering search.
                - None: Use value from config (default: 30)
            search_mode: Search mode ("semantic", "keyword", "hybrid")
            use_cache: Whether to use cache
            llm_callable: LLM client (for intelligent filtering)
            enable_llm_filter: Whether to use LLM for backend/server pre-filtering.
                - None: Use config default
                - False: Disable LLM filter, use tool-level search only
                - True: Enable LLM filter
            llm_filter_threshold: Only apply LLM filter when tool count > this threshold.
                - None: Use default (50)
                - N: Only apply LLM filter when > N tools
            enable_cache_persistence: Whether to persist embeddings to disk. If None, uses config value.
            cache_dir: Directory for persistent cache. If None, uses config value or default.
            
        Returns:
            List of tools (at most max_tools)
            
        Examples:
            # Scenario 1: Auto-detect whether search is needed
            tools = await gc.get_tools_with_auto_search(
                task_description="Create a flowchart",
                backend=BackendType.MCP
            )
            
            # Scenario 2: Custom max_tools
            tools = await gc.get_tools_with_auto_search(
                task_description="Edit file",
                backend=BackendType.SHELL,
                max_tools=30  # Return at most 30 tools
            )
            
            # Scenario 3: Disable search (return all tools regardless of count)
            tools = await gc.get_tools_with_auto_search(
                backend=BackendType.MCP  # No task_description = no search
            )
        """
        # Fetch all candidate tools
        all_tools = await self.list_tools(
            backend=backend,
            session_name=session_name,
            use_cache=use_cache,
        )
        
        if not all_tools:
            self._logger.warning("No tools found")
            return []
        
        # Determine max_tools from config if not provided
        if max_tools is None:
            max_tools = self._config.tool_search.max_tools
        
        # Decide whether search is needed
        tools_count = len(all_tools)
        need_search = tools_count > max_tools and task_description is not None
        
        if need_search:
            self._logger.info(
                f"Tool count ({tools_count}) > max_tools ({max_tools}), "
                f"triggering search to filter relevant tools..."
            )
            return await self.search_tools(
                task_description=task_description,
                backend=backend,
                session_name=session_name,
                max_tools=max_tools,
                search_mode=search_mode,
                use_cache=use_cache,
                llm_callable=llm_callable,
                enable_llm_filter=enable_llm_filter,
                llm_filter_threshold=llm_filter_threshold,
                enable_cache_persistence=enable_cache_persistence,
                cache_dir=cache_dir,
            )
        else:
            if task_description is None:
                self._logger.debug(
                    f"No task description provided, returning all {tools_count} tools"
                )
            else:
                self._logger.debug(
                    f"Tool count ({tools_count}) ≤ max_tools ({max_tools}), "
                    f"returning all tools without search"
                )
            return all_tools

    async def invoke_tool(
        self,
        tool: BaseTool | str,
        parameters: Dict[str, Any] | None = None,
        *,
        backend: BackendType | None = None,
        session_name: str | None = None,
        server: str | None = None,
        keep_session: bool = False,
        **kwargs
    ) -> ToolResult:
        """
        Universal tool invocation method.
        Supports multiple calling patterns:
        
        1. Using BaseTool instance with bound runtime info
        2. Using BaseTool instance with explicit backend/session
        3. Using tool name with automatic lookup
        4. Using tool name with explicit backend/session/server
        
        Args:
            tool: BaseTool instance or tool name string
            parameters: Tool parameters as dict
            backend: Backend type (optional for BaseTool with runtime_info)
            session_name: Session name (optional for BaseTool with runtime_info)
            server: Server name (for MCP, optional for BaseTool with runtime_info)
            keep_session: Whether to keep session alive after invocation
            **kwargs: Alternative parameter passing
        
        Returns:
            ToolResult
        
        Examples:
            # Pattern 1: Tool instance with runtime info (from list_tools)
            tools = await gc.list_tools()
            tool = next(t for t in tools if t.name == "read_file")
            result = await gc.invoke_tool(tool, {"path": "/tmp/a.txt"})
            
            # Pattern 2: Tool instance with explicit backend/session
            my_tool = MyTool()
            result = await gc.invoke_tool(
                my_tool, 
                {"arg": "value"}, 
                backend=BackendType.SHELL
            )
            
            # Pattern 3: Tool name with automatic lookup
            result = await gc.invoke_tool("read_file", {"path": "/tmp/a.txt"})
            
            # Pattern 4: Tool name with explicit backend/server
            result = await gc.invoke_tool(
                "read_file",
                {"path": "/tmp/a.txt"},
                backend=BackendType.MCP,
                server="filesystem"
            )
        """
        params = parameters or kwargs
        
        # BaseTool instance
        if isinstance(tool, BaseTool):
            tool_name = tool.schema.name
            
            # Try to use bound runtime info first
            if tool.is_bound and not (backend or session_name or server):
                # Use runtime info
                runtime_backend = tool.runtime_info.backend
                runtime_session = tool.runtime_info.session_name
                runtime_server = tool.runtime_info.server_name
            else:
                # Use provided or tool's default backend
                runtime_backend = backend or tool.backend_type
                runtime_session = session_name
                runtime_server = server
                
                if runtime_backend == BackendType.NOT_SET:
                    raise GroundingError(
                        f"Cannot invoke tool '{tool_name}': no backend specified. "
                        f"Either bind runtime info or provide backend parameter.",
                        code=ErrorCode.TOOL_EXECUTION_FAIL
                    )
    
        # Tool name string
        elif isinstance(tool, str):
            tool_name = tool
            
            # If explicit backend/session provided, use them
            if backend or session_name:
                runtime_session = session_name
                runtime_server = server
                
                # Infer backend: prefer explicit backend; otherwise get from session
                if backend is not None:
                    runtime_backend = backend
                else:
                    if runtime_session not in self._session_info:
                        raise ErrorCode.SESSION_NOT_FOUND(runtime_session)
                    runtime_backend = self._session_info[
                        runtime_session
                    ].backend_type
            else:
                # Auto-lookup: search for the tool
                all_tools = await self.list_tools(use_cache=True)
                matching = [t for t in all_tools if t.name == tool_name]
                
                if not matching:
                    raise GroundingError(
                        f"Tool '{tool_name}' not found",
                        code=ErrorCode.TOOL_NOT_FOUND
                    )
                
                if len(matching) > 1:
                    sources = [
                        f"{t.runtime_info.backend.value}/{t.runtime_info.session_name}" 
                        for t in matching if t.is_bound
                    ]
                    raise GroundingError(
                        f"Multiple tools named '{tool_name}' found in: {sources}. "
                        f"Please specify 'backend' or 'session_name' parameter.",
                        code=ErrorCode.AMBIGUOUS_TOOL
                    )
                
                # Use the found tool's runtime info
                found_tool = matching[0]
                runtime_backend = found_tool.runtime_info.backend
                runtime_session = found_tool.runtime_info.session_name
                runtime_server = found_tool.runtime_info.server_name
        
        # Execute the tool
        # Ensure session exists (except for SYSTEM backend which doesn't use sessions)
        # Check if session really exists - cached tools have session_name but session may not be running
        if runtime_backend != BackendType.SYSTEM:
            if not runtime_session or runtime_session not in self._sessions:
                runtime_session = await self.ensure_session(runtime_backend, runtime_server)
        
        try:
            provider = self._registry.get(runtime_backend)
            # SystemProvider doesn't use sessions, pass a dummy value
            session_param = runtime_session if runtime_session else "system"
            result = await provider.call_tool(session_param, tool_name, params)
            
            # Update last_activity in session_info (skip for SYSTEM backend)
            if runtime_backend != BackendType.SYSTEM and runtime_session and runtime_session in self._session_info:
                async with self._lock:
                    old_info = self._session_info[runtime_session]
                    self._session_info[runtime_session] = old_info.model_copy(
                        update={"last_activity": datetime.utcnow()}
                    )
            
            return result
        finally:
            # Auto-close session if requested (skip for SYSTEM backend)
            if runtime_backend != BackendType.SYSTEM and not keep_session and runtime_session:
                if runtime_server or runtime_session.startswith(runtime_backend.value):
                    await self.close_session(runtime_session)

================================================
FILE: anytool/grounding/core/provider.py
================================================
"""
provider is to manage sessions of a backend, if the backend is mcp, then provider will manage sessions through servers
"""
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional, Generic, TypeVar

from .tool import BaseTool
from .types import BackendType, SessionConfig, ToolResult, ToolStatus
from .session import BaseSession
from .security.policies import SecurityPolicyManager
from anytool.config import get_config
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)
TSession = TypeVar('TSession', bound=BaseSession)


class Provider(ABC, Generic[TSession]):
    """Backend provider base class"""  
    def __init__(self, backend_type: BackendType, config: Dict[str, Any] = None):
        self.backend_type = backend_type
        self.config = config or {}
        self.is_initialized = False
        self._sessions: Dict[str, TSession] = {}  # session management
        self._session_counter: int = 0
        self.security_manager = SecurityPolicyManager()
        
        self._setup_security_policy(config)
        
    def _setup_security_policy(self, config: dict | None = None):   
        security_policy = get_config().get_security_policy(self.backend_type.value)
        self.security_manager.set_backend_policy(BackendType.SHELL, security_policy)
        
    async def ensure_initialized(self) -> None:
        """
         Internal helper.  Guarantee that `initialize()` has been executed
        """
        if not self.is_initialized:
            await self.initialize()
        
    @abstractmethod
    async def initialize(self) -> None:
        """Initialize provider, call `create_session` to create all sessions if not exist        
        Subclasses should set `self.is_initialized = True` after successful initialization
        """
        pass
    
    @abstractmethod
    async def create_session(self, session_config: SessionConfig) -> TSession:
        """Create session, update _sessions"""
        pass

    @abstractmethod
    async def close_session(self, session_name: str) -> None:
        """Close session"""
        pass
    
    def list_sessions(self) -> List[str]:
        """Get all session IDs"""
        return list(self._sessions.keys())
    
    def get_session(self, session_name: str) -> Optional[TSession]:
        """Get session object by ID"""
        return self._sessions.get(session_name)
    
    async def close_all_sessions(self) -> None:
        """Provider shutdown cleanup"""
        for session_name in list(self._sessions.keys()):
            try:
                await self.close_session(session_name)
            except Exception as e:
                print(f"Error closing session {session_name}: {e}")
        
        self._sessions.clear()
        self.is_initialized = False

    def __repr__(self) -> str:
        return (f"Provider(backend={self.backend_type.value}, "
                f"initialized={self.is_initialized}, "
                f"sessions={len(self._sessions)}, "
                f"config_items={len(self.config)})")
        
    async def list_tools(self, session_name: Optional[str] = None) -> List[BaseTool]:
        """
        Return BaseTool list.
        If session_name is specified, only return the tools of the specified session. 
        If session_name is not specified, return all tools of all sessions.
        """
        await self.ensure_initialized()
        
        if session_name:
            session = self._sessions.get(session_name)
            return await session.list_tools() if session else []

        tools: list[BaseTool] = []
        for sess in self._sessions.values():
            tools.extend(await sess.list_tools())
        return tools
    
    async def call_tool(
        self,
        session_name: str,
        tool_name: str,
        parameters: Dict[str, Any] | None = None,
    ) -> ToolResult:
        
        await self.ensure_initialized()
        parameters = parameters or {}

        session = self._sessions.get(session_name)
        if session is None:
            return ToolResult(
                status=ToolStatus.ERROR,
                content="",
                error=f"Session '{session_name}' not found",
                metadata={"session_name": session_name, "tool_name": tool_name},
            )

        try:
            return await session.call_tool(tool_name, parameters)
        except Exception as e:
            logger.error("Execute tool error: %s @%s - %s", tool_name, session_name, e)
            return ToolResult(
                status=ToolStatus.ERROR,
                content="",
                error=str(e),
                metadata={"session_name": session_name, "tool_name": tool_name},
            )


class ProviderRegistry:
    """
    Maintain mapping of BackendType -> Provider, and provide dynamic registration / retrieval capabilities
    """
    def __init__(self) -> None:
        self._providers: dict[BackendType, Provider] = {}

    def register(self, provider: "Provider") -> None:
        self._providers[provider.backend_type] = provider
        logger.debug("Provider for %s registered", provider.backend_type)

    def get(self, backend: BackendType) -> "Provider":
        if backend not in self._providers: 
            raise KeyError(f"Provider for '{backend.value}' not registered")
        return self._providers[backend]

    def list(self) -> dict[BackendType, "Provider"]:
        return dict(self._providers)

================================================
FILE: anytool/grounding/core/quality/__init__.py
================================================
from .types import ToolQualityRecord, ExecutionRecord, DescriptionQuality
from .manager import ToolQualityManager
from .store import QualityStore

# Global manager instance
_global_manager: "ToolQualityManager | None" = None


def get_quality_manager() -> "ToolQualityManager | None":
    """Get the global quality manager instance."""
    return _global_manager


def set_quality_manager(manager: "ToolQualityManager") -> None:
    """Set the global quality manager instance."""
    global _global_manager
    _global_manager = manager


__all__ = [
    "ToolQualityRecord",
    "ExecutionRecord",
    "DescriptionQuality",
    "ToolQualityManager",
    "QualityStore",
    "get_quality_manager",
    "set_quality_manager",
]


================================================
FILE: anytool/grounding/core/quality/manager.py
================================================
"""
Tool Quality Manager

Core API (called by main flow):
- record_execution(): Called by BaseTool after execution
- adjust_ranking(): Called by SearchCoordinator for quality-aware sorting
- evolve(): Called periodically by ToolLayer for self-evolution

Query API (for inspection/debugging):
- get_quality_report(), get_tool_insights()
"""

import hashlib
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple, TYPE_CHECKING

from .types import ToolQualityRecord, ExecutionRecord, DescriptionQuality
from .store import QualityStore
from anytool.utils.logging import Logger
from anytool.config.constants import PROJECT_ROOT

if TYPE_CHECKING:
    from anytool.grounding.core.tool import BaseTool
    from anytool.grounding.core.types import ToolResult
    from anytool.llm import LLMClient

logger = Logger.get_logger(__name__)


class ToolQualityManager:
    """
    Manages tool quality tracking and quality-aware ranking.
    
    Features:
    - Track execution success rate and latency
    - LLM-based description quality evaluation (optional, requires llm_client)
    - Persistent memory across sessions
    - Quality-integrated tool ranking
    - Incremental update detection
    """
    
    def __init__(
        self,
        *,
        cache_dir: Optional[Path] = None,
        llm_client: Optional["LLMClient"] = None,
        enable_persistence: bool = True,
        auto_save: bool = True,
        evolve_interval: int = 5,
    ):
        self._cache_dir = cache_dir or PROJECT_ROOT / ".anytool" / "tool_quality"
        self._llm_client = llm_client
        self._enable_persistence = enable_persistence
        self._auto_save = auto_save
        self._evolve_interval = evolve_interval
        
        # In-memory cache
        self._records: Dict[str, ToolQualityRecord] = {}
        self._global_execution_count: int = 0
        self._last_evolve_count: int = 0
        
        # Persistent store
        self._store = QualityStore(self._cache_dir) if enable_persistence else None
        
        # Load from disk
        if self._store:
            self._records, self._global_execution_count = self._store.load_all()
            self._last_evolve_count = (self._global_execution_count // self._evolve_interval) * self._evolve_interval
        
        logger.info(
            f"ToolQualityManager initialized "
            f"(persistence={enable_persistence}, records={len(self._records)}, "
            f"global_count={self._global_execution_count}, evolve_interval={self._evolve_interval})"
        )

    def get_tool_key(self, tool: "BaseTool") -> str:
        """Generate unique key for a tool."""
        from anytool.grounding.core.types import BackendType
        
        if tool.is_bound:
            backend = tool.runtime_info.backend.value
            server = tool.runtime_info.server_name or "default"
        else:
            backend = tool.backend_type.value if tool.backend_type != BackendType.NOT_SET else "unknown"
            server = "default"
        
        return f"{backend}:{server}:{tool.name}"
    
    def _compute_description_hash(self, tool: "BaseTool") -> str:
        """Compute hash of tool description for change detection."""
        content = f"{tool.name}|{tool.description or ''}|{tool.schema.parameters}"
        return hashlib.md5(content.encode()).hexdigest()[:16]

    def get_record(self, tool: "BaseTool") -> ToolQualityRecord:
        """Get or create quality record for a tool."""
        key = self.get_tool_key(tool)
        
        if key not in self._records:
            backend, server, name = key.split(":", 2)
            self._records[key] = ToolQualityRecord(
                tool_key=key,
                backend=backend,
                server=server,
                tool_name=name,
                description_hash=self._compute_description_hash(tool),
            )
        
        return self._records[key]
    
    def get_quality_score(self, tool: "BaseTool") -> float:
        """Get quality score for a tool (0-1)."""
        return self.get_record(tool).quality_score
    
    # Execution Tracking
    async def record_execution(
        self,
        tool: "BaseTool",
        result: "ToolResult",
        execution_time_ms: float,
    ) -> None:
        """Record tool execution result and increment global counter."""
        record = self.get_record(tool)
        
        # Extract error message if failed
        error_message = None
        if result.is_error and result.error:
            error_message = str(result.error)[:500]
        
        # Add execution record
        record.add_execution(ExecutionRecord(
            timestamp=datetime.now(),
            success=result.is_success,
            execution_time_ms=execution_time_ms,
            error_message=error_message,
        ))
        
        # Increment global execution count
        self._global_execution_count += 1
        
        # Auto-save
        if self._auto_save and self._store:
            await self._store.save_record(record, self._records, self._global_execution_count)
        
        logger.debug(
            f"Recorded execution: {record.tool_key} "
            f"success={result.is_success} time={execution_time_ms:.0f}ms "
            f"(global_count={self._global_execution_count})"
        )
    
    async def evaluate_description(
        self,
        tool: "BaseTool",
        force: bool = False,
    ) -> Optional[DescriptionQuality]:
        """
        Evaluate tool description quality using LLM.
        """
        if not self._llm_client:
            logger.debug("LLM client not available for description evaluation")
            return None
        
        record = self.get_record(tool)
        
        # Skip if already evaluated and not forced
        if record.description_quality and not force:
            # Check if description changed
            current_hash = self._compute_description_hash(tool)
            if current_hash == record.description_hash:
                return record.description_quality
        
        # Build evaluation prompt
        desc = tool.description or "No description provided"
        if len(desc) > 4000:
            desc = desc[:4000] + "\n... (truncated for length)"
        
        params = tool.schema.parameters or {}
        if params:
            param_lines = []
            # Extract parameter names and types from JSON schema
            if "properties" in params:
                for param_name, param_info in params.get("properties", {}).items():
                    param_type = param_info.get("type", "unknown")
                    param_desc = param_info.get("description", "")
                    param_lines.append(f"- {param_name} ({param_type}): {param_desc}" if param_desc else f"- {param_name} ({param_type})")
            param_text = "\n".join(param_lines) if param_lines else "No parameter descriptions available"
        else:
            param_text = "No parameters"
        
        prompt = f"""# Task: Evaluate this tool's documentation quality

## Tool Information

Name: {tool.name}

Description:
{desc}

Parameters:
{param_text}

## Evaluation Task

Rate the documentation on two dimensions (0.0 to 1.0 scale):

### 1. Clarity
How clear is the tool's purpose and usage?

- 0.0-0.3: No description or completely unclear
- 0.4-0.6: Basic purpose but vague
- 0.7-0.8: Clear purpose and functionality
- 0.9-1.0: Very clear with usage examples or context

### 2. Completeness
Are inputs/outputs properly documented?

- 0.0-0.3: Missing critical information
- 0.4-0.6: Basic info but lacks details
- 0.7-0.8: Well documented with types
- 0.9-1.0: Comprehensive with constraints and examples

## Scoring Guidelines

- Short descriptions can score high if clear and accurate
- If parameters exist but aren't explained in description, reduce completeness score
- Missing description means clarity = 0.0

## Output

Respond with JSON only:

```json
{{
  "reasoning": "Brief 1-2 sentence analysis",
  "clarity": 0.8,
  "completeness": 0.7
}}
```"""

        try:
            response = await self._llm_client.complete(prompt)
            content = response["message"]["content"]
            
            # Parse JSON response
            import json
            
            # Extract complete JSON object
            def extract_json_object(text: str) -> str | None:
                """Extract first complete JSON object from text by counting braces."""
                start = text.find('{')
                if start == -1:
                    return None
                
                count = 0
                in_string = False
                escape_next = False
                
                for i, char in enumerate(text[start:], start):
                    if escape_next:
                        escape_next = False
                        continue
                    
                    if char == '\\':
                        escape_next = True
                        continue
                    
                    if char == '"' and not escape_next:
                        in_string = not in_string
                        continue
                    
                    if not in_string:
                        if char == '{':
                            count += 1
                        elif char == '}':
                            count -= 1
                            if count == 0:
                                return text[start:i+1]
                return None
            
            json_str = extract_json_object(content)
            if not json_str:
                logger.warning(f"Could not find JSON in LLM response for {tool.name}")
                return None
            
            data = json.loads(json_str)
            
            # Extract and validate scores with robust error handling
            def safe_float(value, default=0.5, min_val=0.0, max_val=1.0):
                """Safely convert to float and clamp to valid range."""
                try:
                    if value is None:
                        return default
                    f = float(value)
                    return max(min_val, min(max_val, f))
                except (ValueError, TypeError):
                    logger.warning(f"Invalid score value: {value}, using default {default}")
                    return default
            
            clarity = safe_float(data.get("clarity"), default=0.5)
            completeness = safe_float(data.get("completeness"), default=0.5)
            reasoning = str(data.get("reasoning", ""))[:500]  # Limit reasoning length
            
            quality = DescriptionQuality(
                clarity=clarity,
                completeness=completeness,
                evaluated_at=datetime.now(),
                reasoning=reasoning,
            )
            
            # Update record
            record.description_quality = quality
            record.description_hash = self._compute_description_hash(tool)
            record.last_updated = datetime.now()
            
            # Save
            if self._auto_save and self._store:
                await self._store.save_record(record, self._records, self._global_execution_count)
            
            logger.info(f"Evaluated description: {tool.name} score={quality.overall_score:.2f}")
            return quality
            
        except Exception as e:
            logger.error(f"Description evaluation failed for {tool.name}: {e}")
            return None
    
    # Quality-Aware Ranking
    def adjust_ranking(
        self,
        tools_with_scores: List[Tuple["BaseTool", float]],
    ) -> List[Tuple["BaseTool", float]]:
        """
        Adjust tool ranking using penalty-based approach.
           
        Args:
            tools_with_scores: List of (tool, semantic_score) tuples
        """
        adjusted = []
        for tool, semantic_score in tools_with_scores:
            penalty = self.get_penalty(tool)
            
            adjusted_score = semantic_score * penalty
            
            adjusted.append((tool, adjusted_score))
        
        # Sort by adjusted score (descending)
        adjusted.sort(key=lambda x: x[1], reverse=True)
        
        return adjusted
    
    def get_penalty(self, tool: "BaseTool") -> float:
        """Get penalty factor for a tool (0.2-1.0)."""
        return self.get_record(tool).penalty
    
    # Change Detection
    def check_changes(self, tools: List["BaseTool"]) -> Dict[str, str]:
        """
        Check for tool changes (new/updated/unchanged).
        
        Returns dict: {tool_key: "new"|"updated"|"unchanged"}
        """
        changes = {}
        
        for tool in tools:
            key = self.get_tool_key(tool)
            current_hash = self._compute_description_hash(tool)
            
            if key not in self._records:
                changes[key] = "new"
            elif self._records[key].description_hash != current_hash:
                changes[key] = "updated"
                # Clear old evaluation on description change
                self._records[key].description_quality = None
                self._records[key].description_hash = current_hash
            else:
                changes[key] = "unchanged"
        
        new_count = sum(1 for v in changes.values() if v == "new")
        updated_count = sum(1 for v in changes.values() if v == "updated")
        
        if new_count or updated_count:
            logger.info(f"Tool changes: {new_count} new, {updated_count} updated")
        
        return changes
    
    async def save(self) -> None:
        """
        Manually save all records to disk.
        
        Note: Usually not needed - auto_save handles persistence in
        record_execution(), evaluate_description(), and evolve().
        Provided as public API for explicit save when needed.
        """
        if self._store:
            await self._store.save_all(self._records)
    
    def clear_cache(self) -> None:
        """Clear all cached data."""
        self._records.clear()
        if self._store:
            self._store.clear()
    
    def get_stats(self) -> Dict:
        """
        Get quality tracking statistics.
        
        Note: Query API for inspection, may not be called in main flow.
        """
        if not self._records:
            return {"total_tools": 0}
        
        records = list(self._records.values())
        
        return {
            "total_tools": len(records),
            "total_executions": sum(r.total_calls for r in records),
            "avg_success_rate": (
                sum(r.success_rate for r in records) / len(records)
                if records else 0
            ),
            "avg_quality_score": (
                sum(r.quality_score for r in records) / len(records)
                if records else 0
            ),
            "tools_with_description_eval": sum(
                1 for r in records if r.description_quality
            ),
        }

    def get_top_tools(
        self,
        n: int = 10,
        backend: Optional[str] = None,
        min_calls: int = 3,
    ) -> List[ToolQualityRecord]:
        """
        Get top N tools by quality score.
        
        Args:
            n: Number of tools to return
            backend: Filter by backend type (optional)
            min_calls: Minimum calls required (to filter untested tools)
        """
        records = [
            r for r in self._records.values()
            if r.total_calls >= min_calls
            and (backend is None or r.backend == backend)
        ]
        
        records.sort(key=lambda r: r.quality_score, reverse=True)
        return records[:n]
    
    def get_problematic_tools(
        self,
        success_rate_threshold: float = 0.5,
        min_calls: int = 5,
    ) -> List[ToolQualityRecord]:
        """
        Get tools with low success rate (candidates for review/removal).
        
        Args:
            success_rate_threshold: Tools below this rate are flagged
            min_calls: Minimum calls required (avoid flagging new tools)
        """
        return [
            r for r in self._records.values()
            if r.total_calls >= min_calls
            and r.recent_success_rate < success_rate_threshold
        ]
    
    def get_quality_report(self) -> Dict:
        """
        Generate comprehensive quality report for upper layer.
        
        Returns structured report with:
        - Overall stats
        - Per-backend breakdown
        - Top/problematic tools
        - Improvement suggestions
        """
        if not self._records:
            return {"status": "no_data", "message": "No quality data collected yet"}
        
        records = list(self._records.values())
        tested_records = [r for r in records if r.total_calls >= 3]
        
        # Per-backend stats
        backends = {}
        for r in records:
            if r.backend not in backends:
                backends[r.backend] = {
                    "tools": 0,
                    "total_calls": 0,
                    "success_count": 0,
                    "servers": set()
                }
            backends[r.backend]["tools"] += 1
            backends[r.backend]["total_calls"] += r.total_calls
            backends[r.backend]["success_count"] += r.success_count
            backends[r.backend]["servers"].add(r.server)
        
        # Convert sets to counts
        for b in backends:
            backends[b]["servers"] = len(backends[b]["servers"])
            backends[b]["success_rate"] = (
                backends[b]["success_count"] / backends[b]["total_calls"]
                if backends[b]["total_calls"] > 0 else 0
            )
        
        # Top and problematic tools
        top_tools = self.get_top_tools(5)
        problematic = self.get_problematic_tools()
        
        return {
            "summary": {
                "total_tools": len(records),
                "tested_tools": len(tested_records),
                "total_executions": sum(r.total_calls for r in records),
                "overall_success_rate": (
                    sum(r.success_count for r in records) /
                    max(1, sum(r.total_calls for r in records))
                ),
                "avg_quality_score": (
                    sum(r.quality_score for r in tested_records) / len(tested_records)
                    if tested_records else 0
                ),
            },
            "by_backend": backends,
            "top_tools": [
                {"key": r.tool_key, "score": r.quality_score, "success_rate": r.success_rate}
                for r in top_tools
            ],
            "problematic_tools": [
                {"key": r.tool_key, "success_rate": r.success_rate, "calls": r.total_calls}
                for r in problematic
            ],
            "recommendations": self._generate_recommendations(records, problematic),
        }
    
    def _generate_recommendations(
        self,
        records: List[ToolQualityRecord],
        problematic: List[ToolQualityRecord],
    ) -> List[str]:
        """Generate actionable recommendations based on quality data."""
        recommendations = []
        
        # Check for problematic tools
        if problematic:
            tool_names = [r.tool_name for r in problematic[:3]]
            recommendations.append(
                f"Review low-success tools: {', '.join(tool_names)}"
            )
        
        # Check for tools needing description evaluation
        unevaluated = [r for r in records if not r.description_quality and r.total_calls >= 3]
        if unevaluated:
            recommendations.append(
                f"{len(unevaluated)} tools need description quality evaluation"
            )
        
        # Check for low description quality
        poor_docs = [
            r for r in records
            if r.description_quality and r.description_quality.overall_score < 0.5
        ]
        if poor_docs:
            recommendations.append(
                f"{len(poor_docs)} tools have poor documentation quality"
            )
        
        return recommendations

    def compute_adaptive_quality_weight(self) -> float:
        """
        Compute adaptive quality weight based on data confidence.
        
        Returns higher weight when we have more reliable quality data,
        lower weight when data is sparse.
        """
        if not self._records:
            return 0.1  # Low weight when no data
        
        records = list(self._records.values())
        tested_count = sum(1 for r in records if r.total_calls >= 3)
        
        if tested_count == 0:
            return 0.1
        
        # More tested tools -> higher confidence -> higher weight
        coverage = tested_count / len(records)
        
        # Average calls per tested tool -> data richness
        avg_calls = sum(r.total_calls for r in records) / len(records)
        richness = min(1.0, avg_calls / 20)  # Cap at 20 calls average
        
        # Combine coverage and richness
        confidence = (coverage * 0.5 + richness * 0.5)
        
        # Map to weight range [0.1, 0.5]
        weight = 0.1 + confidence * 0.4
        
        return round(weight, 2)
    
    def should_reevaluate_description(self, tool: "BaseTool") -> bool:
        """
        Check if a tool's description should be re-evaluated.
        
        Triggers re-evaluation when:
        - Description hash changed
        - Success rate dropped significantly
        - No evaluation yet but enough calls
        """
        record = self._records.get(self.get_tool_key(tool))
        if not record:
            return True
        
        # Check hash change
        current_hash = self._compute_description_hash(tool)
        if current_hash != record.description_hash:
            return True
        
        # No evaluation yet but enough data
        if not record.description_quality and record.total_calls >= 5:
            return True
        
        # Success rate dropped significantly (maybe description is misleading)
        if record.description_quality and record.total_calls >= 10:
            if record.recent_success_rate < 0.5 and record.description_quality.overall_score > 0.7:
                # High doc quality but low success -> mismatch
                return True
        
        return False
    
    async def evolve(self, tools: List["BaseTool"]) -> Dict:
        """
        Run self-evolution cycle on given tools.
        
        This method:
        1. Detects tool changes
        2. Re-evaluates descriptions where needed
        3. Updates quality weights
        4. Returns evolution report
        """
        report = {
            "changes_detected": {},
            "descriptions_evaluated": 0,
            "adaptive_weight": 0.0,
            "recommendations": [],
        }
        
        # 1. Detect changes
        report["changes_detected"] = self.check_changes(tools)
        
        # 2. Find tools needing re-evaluation
        needs_eval = [t for t in tools if self.should_reevaluate_description(t)]
        
        # 3. Evaluate descriptions (limit to avoid too many LLM calls)
        if needs_eval and self._llm_client:
            for tool in needs_eval[:5]:  # Max 5 per cycle
                result = await self.evaluate_description(tool, force=True)
                if result:
                    report["descriptions_evaluated"] += 1
        
        # 4. Compute adaptive weight
        report["adaptive_weight"] = self.compute_adaptive_quality_weight()
        
        # 5. Generate recommendations
        problematic = self.get_problematic_tools()
        report["recommendations"] = self._generate_recommendations(
            list(self._records.values()), problematic
        )
        
        # 6. Update last evolve count
        self._last_evolve_count = self._global_execution_count
        
        # Save
        if self._store:
            await self._store.save_all(self._records, self._global_execution_count)
        
        logger.info(
            f"Evolution cycle complete: "
            f"changes={len([v for v in report['changes_detected'].values() if v != 'unchanged'])}, "
            f"evaluated={report['descriptions_evaluated']}, "
            f"weight={report['adaptive_weight']}, "
            f"global_count={self._global_execution_count}"
        )
        
        return report
    
    def should_evolve(self) -> bool:
        """Check if evolution should be triggered based on global execution count."""
        return self._global_execution_count >= self._last_evolve_count + self._evolve_interval
    
    def get_tool_insights(self, tool: "BaseTool") -> Dict:
        """
        Get detailed insights for a specific tool (for debugging/analysis).
        
        Returns comprehensive info about tool's quality history.
        """
        record = self._records.get(self.get_tool_key(tool))
        if not record:
            return {"status": "not_tracked", "tool": tool.name}
        
        # Count recent failures
        recent_failures_count = sum(
            1 for e in record.recent_executions[-20:]
            if not e.success
        )
        
        return {
            "tool_key": record.tool_key,
            "total_calls": record.total_calls,
            "success_rate": record.success_rate,
            "recent_success_rate": record.recent_success_rate,
            "avg_execution_time_ms": record.avg_execution_time_ms,
            "quality_score": record.quality_score,
            "description_quality": {
                "overall_score": record.description_quality.overall_score,
                "clarity": record.description_quality.clarity,
                "completeness": record.description_quality.completeness,
                "reasoning": record.description_quality.reasoning,
            } if record.description_quality else None,
            "recent_failures_count": recent_failures_count,
            "first_seen": record.first_seen.isoformat(),
            "last_updated": record.last_updated.isoformat(),
        }


================================================
FILE: anytool/grounding/core/quality/store.py
================================================
"""
Persistent storage for tool quality data.
"""

import json
import asyncio
from pathlib import Path
from typing import Dict, List, Optional

from .types import ToolQualityRecord
from anytool.utils.logging import Logger
from anytool.config.constants import PROJECT_ROOT

logger = Logger.get_logger(__name__)


class QualityStore:
    """
    Persistent storage for tool quality records.
    
    Storage structure:
    <project_root>/.anytool/tool_quality/
    ├── records.json          # All quality records
    └── records_backup.json   # Backup on save
    """
    
    VERSION = 1
    
    def __init__(self, cache_dir: Optional[Path] = None):
        if cache_dir is None:
            cache_dir = PROJECT_ROOT / ".anytool" / "tool_quality"
        
        self._cache_dir = Path(cache_dir)
        self._cache_dir.mkdir(parents=True, exist_ok=True)
        
        self._records_file = self._cache_dir / "records.json"
        self._backup_file = self._cache_dir / "records_backup.json"
        
        self._write_lock = asyncio.Lock()
        
        logger.debug(f"QualityStore initialized at {self._cache_dir}")
    
    def load_all(self) -> tuple[Dict[str, ToolQualityRecord], int]:
        """Load all quality records and global execution count from disk.
        
        Returns:
            Tuple of (records_dict, global_execution_count)
        """
        if not self._records_file.exists():
            return {}, 0
        
        try:
            with open(self._records_file, "r", encoding="utf-8") as f:
                data = json.load(f)
            
            # Version check
            if data.get("version") != self.VERSION:
                logger.warning(f"Cache version mismatch, clearing cache")
                return {}, 0
            
            records = {}
            for key, record_data in data.get("records", {}).items():
                try:
                    records[key] = ToolQualityRecord.from_dict(record_data)
                except Exception as e:
                    logger.warning(f"Failed to load record {key}: {e}")
            
            global_count = data.get("global_execution_count", 0)
            logger.info(f"Loaded {len(records)} quality records from cache (global_count={global_count})")
            return records, global_count
            
        except Exception as e:
            logger.error(f"Failed to load quality cache: {e}")
            return {}, 0
    
    async def save_all(self, records: Dict[str, ToolQualityRecord], global_execution_count: int = 0) -> None:
        """Save all quality records and global execution count to disk."""
        async with self._write_lock:
            try:
                # Backup existing file
                if self._records_file.exists():
                    import shutil
                    shutil.copy(self._records_file, self._backup_file)
                
                data = {
                    "version": self.VERSION,
                    "global_execution_count": global_execution_count,
                    "records": {
                        key: record.to_dict()
                        for key, record in records.items()
                    }
                }
                
                with open(self._records_file, "w", encoding="utf-8") as f:
                    json.dump(data, f, indent=2, ensure_ascii=False)
                
                logger.debug(f"Saved {len(records)} quality records to cache (global_count={global_execution_count})")
                
            except Exception as e:
                logger.error(f"Failed to save quality cache: {e}")
    
    async def save_record(self, record: ToolQualityRecord, all_records: Dict[str, ToolQualityRecord], global_execution_count: int = 0) -> None:
        """Save a single record (saves all for simplicity)."""
        all_records[record.tool_key] = record
        await self.save_all(all_records, global_execution_count)
    
    def clear(self) -> None:
        """Clear all cached data."""
        if self._records_file.exists():
            self._records_file.unlink()
        if self._backup_file.exists():
            self._backup_file.unlink()
        logger.info("Quality cache cleared")


================================================
FILE: anytool/grounding/core/quality/types.py
================================================
"""
Data types for tool quality tracking.
"""

from dataclasses import dataclass, field
from datetime import datetime
from typing import ClassVar, Dict, List, Optional, Any


@dataclass
class ExecutionRecord:
    """Single execution record."""
    timestamp: datetime
    success: bool
    execution_time_ms: float
    error_message: Optional[str] = None


@dataclass
class DescriptionQuality:
    """LLM-evaluated description quality."""
    clarity: float  # 0-1: Is the purpose and usage clear?
    completeness: float  # 0-1: Are inputs/outputs documented?
    evaluated_at: datetime
    reasoning: str = ""  # LLM's reasoning for the scores
    
    @property
    def overall_score(self) -> float:
        """Computed overall score (average of all dimensions)."""
        return (self.clarity + self.completeness) / 2


@dataclass
class ToolQualityRecord:
    """
    Complete quality record for a tool.
    
    Key: "{backend}:{server}:{tool_name}"
    """
    tool_key: str
    backend: str
    server: str
    tool_name: str
    
    # Execution stats
    total_calls: int = 0
    success_count: int = 0
    total_execution_time_ms: float = 0.0
    
    # Recent execution history (rolling window)
    recent_executions: List[ExecutionRecord] = field(default_factory=list)
    
    # Description quality (LLM-evaluated)
    description_quality: Optional[DescriptionQuality] = None
    
    # Metadata
    description_hash: Optional[str] = None
    first_seen: datetime = field(default_factory=datetime.now)
    last_updated: datetime = field(default_factory=datetime.now)
    
    # Keep only recent N executions
    MAX_RECENT_EXECUTIONS: ClassVar[int] = 100
    
    # Penalty threshold: only penalize tools with success rate below this value
    # Tools with success rate >= this threshold get penalty = 1.0 (no penalty)
    PENALTY_THRESHOLD: ClassVar[float] = 0.4
    
    @property
    def success_rate(self) -> float:
        """Overall success rate."""
        if self.total_calls == 0:
            return 0.0
        return self.success_count / self.total_calls
    
    @property
    def avg_execution_time_ms(self) -> float:
        """Average execution time."""
        if self.total_calls == 0:
            return 0.0
        return self.total_execution_time_ms / self.total_calls
    
    @property
    def recent_success_rate(self) -> float:
        """Success rate from recent executions."""
        if not self.recent_executions:
            return self.success_rate
        successes = sum(1 for e in self.recent_executions if e.success)
        return successes / len(self.recent_executions)
    
    @property
    def consecutive_failures(self) -> int:
        """Count consecutive failures from the most recent execution."""
        count = 0
        for exec_record in reversed(self.recent_executions):
            if not exec_record.success:
                count += 1
            else:
                break
        return count
    
    @property
    def penalty(self) -> float:
        """
        Compute penalty factor based on failure rate.
        
        Design principles:
        - Only penalize tools with success rate < PENALTY_THRESHOLD (default 40%)
        - New tools (< 3 calls) get no penalty to allow fair evaluation
        
        Returns value between 0.2-1.0:
        - 1.0: No penalty (success rate >= threshold or insufficient data)
        - 0.2: Maximum penalty (consistently failing tool)
        """
        if self.total_calls < 3:
            return 1.0
        
        success_rate = self.recent_success_rate
        threshold = self.PENALTY_THRESHOLD
        
        if success_rate >= threshold:
            return 1.0
        
        # Linear mapping: penalty = 0.3 + (success_rate / threshold) * 0.7
        base_penalty = 0.3 + (success_rate / threshold) * 0.7
        
        # Extra penalty for consecutive failures (indicates systematic issues)
        consec = self.consecutive_failures
        if consec >= 3:
            # 3 consecutive → extra 0.1, 5 consecutive → extra 0.3
            extra_penalty = min(0.3, (consec - 2) * 0.1)
            base_penalty -= extra_penalty
        
        # Clamp to [0.2, 1.0]
        return max(0.2, min(1.0, base_penalty))
    
    @property
    def quality_score(self) -> float:
        """
        Legacy quality score for backward compatibility.
        Now delegates to penalty property.
        """
        return self.penalty
    
    def add_execution(self, record: ExecutionRecord) -> None:
        """Add execution record and update stats."""
        self.total_calls += 1
        self.total_execution_time_ms += record.execution_time_ms
        
        if record.success:
            self.success_count += 1
        
        self.recent_executions.append(record)
        
        # Trim to max size
        if len(self.recent_executions) > self.MAX_RECENT_EXECUTIONS:
            self.recent_executions = self.recent_executions[-self.MAX_RECENT_EXECUTIONS:]
        
        self.last_updated = datetime.now()
    
    def to_dict(self) -> Dict[str, Any]:
        """Serialize to dict for persistence."""
        return {
            "tool_key": self.tool_key,
            "backend": self.backend,
            "server": self.server,
            "tool_name": self.tool_name,
            "total_calls": self.total_calls,
            "success_count": self.success_count,
            "total_execution_time_ms": self.total_execution_time_ms,
            "recent_executions": [
                {
                    "timestamp": e.timestamp.isoformat(),
                    "success": e.success,
                    "execution_time_ms": e.execution_time_ms,
                    "error_message": e.error_message,
                }
                for e in self.recent_executions
            ],
            "description_quality": {
                "clarity": self.description_quality.clarity,
                "completeness": self.description_quality.completeness,
                "evaluated_at": self.description_quality.evaluated_at.isoformat(),
                "reasoning": self.description_quality.reasoning,
            } if self.description_quality else None,
            "description_hash": self.description_hash,
            "first_seen": self.first_seen.isoformat(),
            "last_updated": self.last_updated.isoformat(),
        }
    
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> "ToolQualityRecord":
        """Deserialize from dict."""
        record = cls(
            tool_key=data["tool_key"],
            backend=data["backend"],
            server=data["server"],
            tool_name=data["tool_name"],
            total_calls=data.get("total_calls", 0),
            success_count=data.get("success_count", 0),
            total_execution_time_ms=data.get("total_execution_time_ms", 0.0),
            description_hash=data.get("description_hash"),
            first_seen=datetime.fromisoformat(data["first_seen"]),
            last_updated=datetime.fromisoformat(data["last_updated"]),
        )
        
        # Parse recent executions
        for e in data.get("recent_executions", []):
            record.recent_executions.append(ExecutionRecord(
                timestamp=datetime.fromisoformat(e["timestamp"]),
                success=e["success"],
                execution_time_ms=e["execution_time_ms"],
                error_message=e.get("error_message"),
            ))
        
        # Parse description quality
        dq = data.get("description_quality")
        if dq:
            record.description_quality = DescriptionQuality(
                clarity=dq.get("clarity", 0.5),  # Fallback for old data
                completeness=dq.get("completeness", 0.5),
                evaluated_at=datetime.fromisoformat(dq["evaluated_at"]),
                reasoning=dq.get("reasoning", ""),  # Optional field
            )
        
        return record


================================================
FILE: anytool/grounding/core/search_tools.py
================================================
from anytool.grounding.core.tool.base import BaseTool
import re
import os
import numpy as np
import httpx
from typing import Iterable, List, Tuple, Dict, Optional, Any, TYPE_CHECKING
from enum import Enum
import json
import pickle
from pathlib import Path
from datetime import datetime

from .tool import BaseTool
from .types import BackendType
from anytool.llm import LLMClient
from anytool.utils.logging import Logger
from anytool.config.constants import PROJECT_ROOT

if TYPE_CHECKING:
    from .quality import ToolQualityManager

logger = Logger.get_logger(__name__)


class SearchMode(str, Enum):
    SEMANTIC = "semantic"
    KEYWORD = "keyword"
    HYBRID = "hybrid"


class ToolRanker:
    """
    ToolRanker: rank tools by keyword, semantic or hybrid
    """
    # Cache version for persistent storage - increment when cache format changes
    CACHE_VERSION = 1
    
    def __init__(
        self, 
        model_name: Optional[str] = None,
        cache_dir: Optional[str | Path] = None,
        enable_cache_persistence: bool = False
    ):
        """Initialize ToolRanker.
        
        Args:
            model_name: Embedding model name. If None, will use env or config value.
            cache_dir: Directory to store persistent embedding cache.
            enable_cache_persistence: Whether to persist embeddings to disk.
        """
        # Check for remote API config from environment
        self._api_base_url = os.getenv("EMBEDDING_BASE_URL")
        self._api_key = os.getenv("EMBEDDING_API_KEY")
        self._use_remote_api = bool(self._api_key and self._api_base_url)
        
        # Get model name: env > param > config > default
        if model_name is None:
            model_name = os.getenv("EMBEDDING_MODEL")
        
        if model_name is None:
            try:
                from anytool.config import get_config
                config = get_config()
                model_name = config.tool_search.embedding_model
            except Exception as exc:
                logger.warning(f"Failed to load config, using default model: {exc}")
                model_name = "BAAI/bge-small-en-v1.5"
        
        self._model_name = model_name
        self._embed_model = None  # lazy load
        self._embedding_fn = None
        
        if self._use_remote_api:
            logger.info(f"Using remote embedding API: {self._api_base_url}, model: {model_name}")
        
        # Persistent cache settings
        self._enable_cache_persistence = enable_cache_persistence
        if cache_dir is None:
            cache_dir = PROJECT_ROOT / ".anytool" / "embedding_cache"
        self._cache_dir = Path(cache_dir)
        
        # Log cache settings
        logger.info(
            f"ToolRanker initialized: enable_cache_persistence={enable_cache_persistence}, "
            f"cache_dir={self._cache_dir}"
        )
        
        # Structured in-memory cache
        # Structure: {backend: {server: {tool_name: {"embedding": np.ndarray, "description": str, "cached_at": str}}}}
        self._structured_cache: Dict[str, Dict[str, Dict[str, Dict[str, Any]]]] = {}
        
        # For backward compatibility and quick lookup: {text -> (backend, server, tool_name)}
        self._text_to_key: Dict[str, Tuple[str, str, str]] = {}
        
        # Load persistent cache if enabled
        if self._enable_cache_persistence:
            logger.info(f"Loading persistent cache from {self._cache_dir}")
            self._load_persistent_cache()
    
    def _get_cache_key(self, tool: BaseTool) -> Tuple[str, str, str]:
        """Get structured cache key (backend, server, tool_name) from tool."""
        if tool.is_bound:
            backend = tool.runtime_info.backend.value
            server = tool.runtime_info.server_name or "default"
        else:
            if not tool.backend_type or tool.backend_type == BackendType.NOT_SET:
                backend = "UNKNOWN"
            else:
                backend = tool.backend_type.value
            server = "default"
        
        return (backend, server, tool.name)
    
    def _get_cache_file_path(self) -> Path:
        """Get the cache file path for the current model."""
        # Use model name in filename to support multiple models
        safe_model_name = self._model_name.replace("/", "_").replace("\\", "_")
        return self._cache_dir / f"embeddings_{safe_model_name}_v{self.CACHE_VERSION}.pkl"
    
    def _load_persistent_cache(self) -> None:
        """Load embeddings from disk cache."""
        cache_file = self._get_cache_file_path()
        
        if not cache_file.exists():
            logger.debug(f"No persistent cache found at {cache_file}")
            return
        
        try:
            with open(cache_file, 'rb') as f:
                data = pickle.load(f)
            
            # Validate cache version
            if isinstance(data, dict) and data.get("version") == self.CACHE_VERSION:
                self._structured_cache = data.get("embeddings", {})
                self._rebuild_text_index()
                
                # Count total embeddings
                total = sum(
                    len(tools) 
                    for backend in self._structured_cache.values() 
                    for tools in backend.values()
                )
                logger.info(f"Loaded {total} embeddings from cache: {cache_file}")
            else:
                logger.warning(f"Cache version mismatch or invalid format, starting fresh")
                self._structured_cache = {}
        except Exception as exc:
            logger.warning(f"Failed to load persistent cache: {exc}")
            self._structured_cache = {}
    
    def _rebuild_text_index(self) -> None:
        """Rebuild text-to-key mapping for quick lookup."""
        self._text_to_key.clear()
        for backend, servers in self._structured_cache.items():
            for server, tools in servers.items():
                for tool_name, tool_data in tools.items():
                    desc = tool_data.get("description", "")
                    text = f"{tool_name}: {desc}"
                    self._text_to_key[text] = (backend, server, tool_name)
    
    def _save_persistent_cache(self) -> None:
        """Save embeddings to disk cache."""
        if not self._enable_cache_persistence or not self._structured_cache:
            return
        
        cache_file = self._get_cache_file_path()
        
        try:
            # Create directory if it doesn't exist
            cache_file.parent.mkdir(parents=True, exist_ok=True)
            
            # Build cache data with metadata
            cache_data = {
                "version": self.CACHE_VERSION,
                "model_name": self._model_name,
                "last_updated": datetime.now().isoformat(),
                "embeddings": self._structured_cache
            }
            
            # Save cache
            with open(cache_file, 'wb') as f:
                pickle.dump(cache_data, f, protocol=pickle.HIGHEST_PROTOCOL)
            
            # Count total embeddings
            total = sum(
                len(tools) 
                for backend in self._structured_cache.values() 
                for tools in backend.values()
            )
            logger.debug(f"Saved {total} embeddings to cache: {cache_file}")
        except Exception as exc:
            logger.warning(f"Failed to save persistent cache: {exc}")

    def rank(
        self,
        query: str,
        tools: List[BaseTool],
        *,
        top_k: int = 50,
        mode: SearchMode = SearchMode.SEMANTIC,
    ) -> List[Tuple[BaseTool, float]]:
        if mode == SearchMode.KEYWORD:
            return self._keyword_search(query, tools, top_k)
        if mode == SearchMode.SEMANTIC:
            return self._semantic_search(query, tools, top_k)
        # hybrid
        return self._hybrid_search(query, tools, top_k)

    @staticmethod
    def _tokenize(text: str) -> list[str]:
        tokens = re.split(r"[^\w]+", text.lower())
        tokens = [tok for tok in tokens if tok]
        return tokens

    def _keyword_search(
        self, query: str, tools: Iterable[BaseTool], top_k: int
    ) -> List[Tuple[BaseTool, float]]:
        try:
            from rank_bm25 import BM25Okapi  # type: ignore
        except ImportError:
            BM25Okapi = None  # fallback below

        tool_list = list(tools)
        if not tool_list:
            return []
        
        corpus_tokens: list[list[str]] = [self._tokenize(f"{t.name} {t.description}") for t in tool_list]
        query_tokens = self._tokenize(query)

        if BM25Okapi and corpus_tokens:
            bm25 = BM25Okapi(corpus_tokens)
            scores = bm25.get_scores(query_tokens)
            scored = [(t, float(s)) for t, s in zip(tool_list, scores, strict=True)]
        else:
            # fallback: simple term overlap ratio
            q_set = set(query_tokens)
            scored = []
            for t, toks in zip(tool_list, corpus_tokens, strict=True):
                if not toks:
                    scored.append((t, 0.0))  # Include tool with 0 score
                    continue
                overlap = q_set.intersection(toks)
                score = len(overlap) / len(q_set) if len(q_set) > 0 else 0.0
                scored.append((t, score))

        scored.sort(key=lambda x: x[1], reverse=True)
        result = scored[:top_k]
        
        # If no matches found (all scores are 0), return all tools
        if not result or all(score == 0.0 for _, score in result):
            logger.debug(f"Keyword search found no matches, returning all {len(tool_list)} tools")
            return [(t, 0.0) for t in tool_list]
        
        return result

    def _ensure_model(self) -> bool:
        """Ensure embedding model is ready (local or remote)."""
        if self._embedding_fn is not None:
            return True
        
        if self._use_remote_api:
            return self._init_remote_embedding()
        return self._init_local_embedding()

    def _init_remote_embedding(self) -> bool:
        """Initialize remote embedding API (OpenRouter/OpenAI compatible)."""
        try:
            def embed_texts(texts: List[str]) -> List[np.ndarray]:
                with httpx.Client(timeout=60.0) as client:
                    response = client.post(
                        f"{self._api_base_url}/embeddings",
                        headers={
                            "Authorization": f"Bearer {self._api_key}",
                            "Content-Type": "application/json"
                        },
                        json={"model": self._model_name, "input": texts}
                    )
                    response.raise_for_status()
                    data = response.json()
                    return [np.array(item["embedding"]) for item in data["data"]]
            
            self._embedding_fn = embed_texts
            logger.info(f"Remote embedding API initialized: {self._model_name}")
            return True
        except Exception as exc:
            logger.error(f"Failed to initialize remote embedding API: {exc}")
            return False

    def _init_local_embedding(self) -> bool:
        """Initialize local fastembed model."""
        try:
            from fastembed import TextEmbedding 
            logger.debug(f"fastembed imported successfully, loading model: {self._model_name}")
        except ImportError as e:
            logger.warning(
                f"fastembed not installed (ImportError: {e}), semantic search unavailable. "
                f"Install with: pip install fastembed"
            )
            return False
        
        try:
            logger.info(f"Loading embedding model: {self._model_name}...")
            self._embed_model = TextEmbedding(model_name=self._model_name)
            self._embedding_fn = lambda txts: list(self._embed_model.embed(txts))
            logger.info(f"Embedding model '{self._model_name}' loaded successfully")
            return True
        except Exception as exc:
            logger.error(f"Embedding model '{self._model_name}' loading failed: {exc}")
            return False

    def _get_embedding(self, tool: BaseTool) -> Optional[np.ndarray]:
        """Get embedding from structured cache."""
        backend, server, tool_name = self._get_cache_key(tool)
        
        if backend not in self._structured_cache:
            return None
        if server not in self._structured_cache[backend]:
            return None
        if tool_name not in self._structured_cache[backend][server]:
            return None
        
        return self._structured_cache[backend][server][tool_name].get("embedding")
    
    def _set_embedding(self, tool: BaseTool, embedding: np.ndarray) -> None:
        """Store embedding in structured cache."""
        backend, server, tool_name = self._get_cache_key(tool)
        
        # Initialize nested structure if needed
        if backend not in self._structured_cache:
            self._structured_cache[backend] = {}
        if server not in self._structured_cache[backend]:
            self._structured_cache[backend][server] = {}
        
        # Store embedding with metadata
        self._structured_cache[backend][server][tool_name] = {
            "embedding": embedding,
            "description": tool.description or "",
            "cached_at": datetime.now().isoformat()
        }
        
        # Update text index for backward compatibility
        text = f"{tool.name}: {tool.description}"
        self._text_to_key[text] = (backend, server, tool_name)
    
    def _semantic_search(
        self, query: str, tools: Iterable[BaseTool], top_k: int
    ) -> List[Tuple[BaseTool, float]]:
        if not self._ensure_model():
            logger.debug("Semantic search unavailable, returning empty list")
            return []
        
        tools_list = list(tools)
        
        # Collect embeddings with cache reuse
        missing_tools = [t for t in tools_list if self._get_embedding(t) is None]
        cache_updated = False
        
        if missing_tools:
            try:
                # Generate embeddings for missing tools
                missing_texts = [f"{t.name}: {t.description}" for t in missing_tools]
                new_embs = self._embedding_fn(missing_texts)
                
                for tool, emb in zip(missing_tools, new_embs, strict=True):
                    self._set_embedding(tool, emb)
                
                cache_updated = True
                logger.debug(f"Computed embeddings for {len(missing_tools)} new tools")
            except Exception as exc:
                logger.error("Failed to generate embeddings: %s", exc)
                return []
        
        # Save to persistent cache if updated
        if cache_updated:
            self._save_persistent_cache()

        try:
            q_emb = self._embedding_fn([query])[0]
        except Exception as exc:
            logger.error("Failed to embed query: %s", exc)
            return []

        scored: list[tuple[BaseTool, float]] = []
        for t in tools_list:
            emb = self._get_embedding(t)
            if emb is None:
                # Should not happen, but handle gracefully
                logger.warning(f"No embedding found for tool: {t.name}")
                scored.append((t, 0.0))
                continue
            
            # Calculate cosine similarity with zero-division protection
            q_norm = np.linalg.norm(q_emb)
            emb_norm = np.linalg.norm(emb)
            if q_norm == 0 or emb_norm == 0:
                sim = 0.0
            else:
                sim = float(np.dot(q_emb, emb) / (q_norm * emb_norm))
            scored.append((t, sim))
        
        scored.sort(key=lambda x: x[1], reverse=True)
        return scored[:top_k]

    def _hybrid_search(
        self, query: str, tools: Iterable[BaseTool], top_k: int
    ) -> List[Tuple[BaseTool, float]]:
        # keyword filter
        kw_top = self._keyword_search(query, tools, top_k * 3)
        if not kw_top:
            # No keyword matches, try semantic search
            semantic_results = self._semantic_search(query, tools, top_k)
            if semantic_results:
                return semantic_results
            # Both failed, return top N tools
            logger.warning("Both keyword and semantic search failed, returning top N tools")
            return [(t, 0.0) for t in list(tools)[:top_k]]
        
        # semantic ranking on keyword results
        semantic_results = self._semantic_search(query, [t for t, _ in kw_top], top_k)
        if semantic_results:
            return semantic_results
        
        # Semantic unavailable, return keyword results
        logger.debug("Semantic search unavailable, using keyword results only")
        return kw_top[:top_k]
    
    def get_cache_stats(self) -> Dict[str, Any]:
        """Get statistics about the embedding cache.
        
        Returns:
            Dict with structure: {
                "total_embeddings": int,
                "backends": {
                    "backend_name": {
                        "total": int,
                        "servers": {
                            "server_name": int  # count of tools
                        }
                    }
                }
            }
        """
        stats = {
            "total_embeddings": 0,
            "backends": {}
        }
        
        for backend, servers in self._structured_cache.items():
            backend_total = 0
            server_stats = {}
            
            for server, tools in servers.items():
                tool_count = len(tools)
                backend_total += tool_count
                server_stats[server] = tool_count
            
            stats["backends"][backend] = {
                "total": backend_total,
                "servers": server_stats
            }
            stats["total_embeddings"] += backend_total
        
        return stats
    
    def clear_cache(self, backend: Optional[str] = None, server: Optional[str] = None) -> int:
        """Clear embeddings from cache.
        
        Args:
            backend: If provided, only clear this backend. If None, clear all.
            server: If provided (and backend is provided), only clear this server.
        
        Returns:
            Number of embeddings cleared.
        """
        cleared_count = 0
        
        if backend is None:
            # Clear everything
            for b in self._structured_cache.values():
                for s in b.values():
                    cleared_count += len(s)
            self._structured_cache.clear()
            self._text_to_key.clear()
        elif server is None:
            # Clear specific backend
            if backend in self._structured_cache:
                for s in self._structured_cache[backend].values():
                    cleared_count += len(s)
                del self._structured_cache[backend]
                # Rebuild text index
                self._rebuild_text_index()
        else:
            # Clear specific backend+server
            if backend in self._structured_cache and server in self._structured_cache[backend]:
                cleared_count = len(self._structured_cache[backend][server])
                del self._structured_cache[backend][server]
                # Clean up empty backend
                if not self._structured_cache[backend]:
                    del self._structured_cache[backend]
                # Rebuild text index
                self._rebuild_text_index()
        
        # Save after clearing
        if cleared_count > 0 and self._enable_cache_persistence:
            self._save_persistent_cache()
            logger.info(f"Cleared {cleared_count} embeddings from cache")
        
        return cleared_count


class SearchDebugInfo:
    """Debug information from tool search process."""
    
    def __init__(self):
        self.search_mode: str = ""
        self.total_candidates: int = 0
        self.mcp_count: int = 0
        self.non_mcp_count: int = 0
        
        # LLM filter info
        self.llm_filter_used: bool = False
        self.llm_brief_plan: str = ""
        self.llm_utility_tools: Dict[str, List[str]] = {}  # server -> tool names
        self.llm_domain_servers: List[str] = []
        self.llm_utility_count: int = 0
        self.llm_domain_count: int = 0
        
        # Semantic search scores
        self.tool_scores: List[Dict[str, Any]] = []  # [{name, server, score, selected}]
        
        # Final selected tools
        self.selected_tools: List[Dict[str, Any]] = []  # [{name, server, backend}]
    
    def to_dict(self) -> Dict[str, Any]:
        return {
            "search_mode": self.search_mode,
            "total_candidates": self.total_candidates,
            "mcp_count": self.mcp_count,
            "non_mcp_count": self.non_mcp_count,
            "llm_filter": {
                "used": self.llm_filter_used,
                "brief_plan": self.llm_brief_plan,
                "utility_tools": self.llm_utility_tools,
                "domain_servers": self.llm_domain_servers,
                "utility_count": self.llm_utility_count,
                "domain_count": self.llm_domain_count,
            },
            "tool_scores": self.tool_scores,
            "selected_tools": self.selected_tools,
        }


class SearchCoordinator(BaseTool):
    _name = "_filter_tools"
    _description = "Internal helper: filter & rank tools from a given list."
    
    # Fallback defaults when config loading fails
    DEFAULT_MAX_TOOLS: int = 20
    DEFAULT_LLM_FILTER: bool = True
    DEFAULT_LLM_THRESHOLD: int = 50
    DEFAULT_CACHE_PERSISTENCE: bool = False
    DEFAULT_SEARCH_MODE: str = "hybrid"

    @classmethod
    def get_parameters_schema(cls) -> Dict[str, Any]:
        """Override to avoid JSON schema generation for list[BaseTool] parameter.
        
        The _arun method uses `candidate_tools: list[BaseTool]` which cannot be
        converted to JSON Schema because BaseTool is an ABC class, not a Pydantic model.
        Since this is an internal tool, we return an empty schema.
        """
        return {}

    def __init__(
        self,
        *,
        max_tools: Optional[int] = None,
        llm: LLMClient = LLMClient(),
        enable_llm_filter: Optional[bool] = None,
        llm_filter_threshold: Optional[int] = None,
        enable_cache_persistence: Optional[bool] = None,
        cache_dir: Optional[str | Path] = None,
        quality_manager: Optional["ToolQualityManager"] = None,
        enable_quality_ranking: bool = True,
    ):
        """Create a SearchCoordinator.

        Args:
            max_tools: max number of tools to return. If None, will use the value from config.
            llm: optional async LLM, used to filter backend/server first
            enable_llm_filter: whether to use LLM to pre-filter by backend/server. 
                If None, uses config value.
            llm_filter_threshold: only apply LLM filter when tool count > this threshold.
                If None, always apply (when enabled).
            enable_cache_persistence: whether to persist embeddings to disk. If None, uses config value.
            cache_dir: directory to store persistent embedding cache. If None, uses config value or default.
        """
        super().__init__()
        
        # Load config (may be None if loading fails)
        tool_search_config = None
        try:
            from anytool.config import get_config
            tool_search_config = getattr(get_config(), 'tool_search', None)
        except Exception as exc:
            logger.warning(f"Failed to load config: {exc}")
        
        def resolve(user_value, config_attr: str, default):
            """Priority: user_value → config → default"""
            if user_value is not None:
                return user_value
            if tool_search_config is not None:
                config_value = getattr(tool_search_config, config_attr, None)
                if config_value is not None:
                    return config_value
            return default
        
        # Resolve each setting with priority: user → config → default
        self.max_tools = resolve(max_tools, 'max_tools', self.DEFAULT_MAX_TOOLS)
        enable_llm_filter = resolve(enable_llm_filter, 'enable_llm_filter', self.DEFAULT_LLM_FILTER)
        llm_filter_threshold = resolve(llm_filter_threshold, 'llm_filter_threshold', self.DEFAULT_LLM_THRESHOLD)
        enable_cache_persistence = resolve(enable_cache_persistence, 'enable_cache_persistence', self.DEFAULT_CACHE_PERSISTENCE)
        cache_dir = resolve(cache_dir, 'cache_dir', None)
        self._default_mode = resolve(None, 'search_mode', self.DEFAULT_SEARCH_MODE)
        
        # Log cache settings for debugging
        logger.info(
            f"SearchCoordinator initialized with cache settings: "
            f"enable_cache_persistence={enable_cache_persistence}, cache_dir={cache_dir}"
        )
        
        self._ranker = ToolRanker(
            enable_cache_persistence=enable_cache_persistence,
            cache_dir=cache_dir
        )
        self._llm: LLMClient | None = llm if llm is not None else LLMClient()
        
        # LLM filter settings
        self._enable_llm_filter = enable_llm_filter
        self._llm_filter_threshold = llm_filter_threshold
        
        # Quality-aware ranking settings
        self._quality_manager = quality_manager
        self._enable_quality_ranking = enable_quality_ranking
        
        # Debug info from last search
        self._last_search_debug_info: Optional[SearchDebugInfo] = None

    async def _arun(
        self,
        task_prompt: str,
        candidate_tools: list[BaseTool],
        *,
        max_tools: int | None = None,
        mode: str | None = None, # "semantic" | "keyword" | "hybrid"
    ) -> list[BaseTool]:
        max_tools = self.max_tools if max_tools is None else max_tools
        mode = self._default_mode if mode is None else mode

        # Initialize debug info
        debug_info = SearchDebugInfo()
        debug_info.search_mode = mode
        debug_info.total_candidates = len(candidate_tools)
        self._last_search_debug_info = debug_info

        # Cache check
        cache_key = (id(candidate_tools), task_prompt, mode, max_tools)
        if not hasattr(self, "_query_cache"):
            self._query_cache: Dict[tuple, list[BaseTool]] = {}
        if cache_key in self._query_cache:
            return self._query_cache[cache_key]

        # Split MCP tools and non-MCP tools
        # Non-MCP tools (shell, gui, web, etc.) are always included, skip all filtering
        mcp_tools = []
        non_mcp_tools = []
        
        for t in candidate_tools:
            if t.is_bound:
                backend = t.runtime_info.backend.value
            else:
                backend = t.backend_type.value if t.backend_type else "UNKNOWN"
            
            if backend.lower() == "mcp":
                mcp_tools.append(t)
            else:
                non_mcp_tools.append(t)
        
        debug_info.mcp_count = len(mcp_tools)
        debug_info.non_mcp_count = len(non_mcp_tools)
        logger.info(f"Tool split: {len(mcp_tools)} MCP, {len(non_mcp_tools)} non-MCP (always included)")
        
        # If MCP tools within limit, return all
        if len(mcp_tools) <= max_tools:
            result = mcp_tools + non_mcp_tools
            self._query_cache[cache_key] = result
            self._populate_selected_tools(debug_info, result)
            return result

        mcp_count = len(mcp_tools)
        should_use_llm_filter = (
            self._llm and 
            self._enable_llm_filter and 
            mcp_count > self._llm_filter_threshold
        )
        
        # Path 1: LLM pre-filter (large MCP tool set)
        if should_use_llm_filter:
            logger.info(f"Path 1: MCP count ({mcp_count}) > threshold, using LLM filter...")
            debug_info.llm_filter_used = True
            
            try:
                utility_tools, domain_tools, llm_filter_info = await self._llm_filter_with_planning(
                    task_prompt, mcp_tools
                )
                
                # Record LLM filter results
                debug_info.llm_brief_plan = llm_filter_info.get("brief_plan", "")
                debug_info.llm_utility_tools = llm_filter_info.get("utility_tools", {})
                debug_info.llm_domain_servers = llm_filter_info.get("domain_servers", [])
                
                utility_count = len(utility_tools)
                domain_count = len(domain_tools)
                debug_info.llm_utility_count = utility_count
                debug_info.llm_domain_count = domain_count
                total_count = utility_count + domain_count
                
                if total_count <= max_tools:
                    mcp_result = utility_tools + domain_tools
                else:
                    # Exceeds limit: keep utility, search domain
                    domain_quota = max(max_tools - utility_count, 5)
                    logger.info(
                        f"Total ({total_count}) > max_tools ({max_tools}), "
                        f"keeping {utility_count} utility, searching {domain_count} domain (quota: {domain_quota})"
                    )
                    
                    # Compute scores for utility tools (marked as LLM-selected)
                    if utility_tools:
                        utility_ranked = self._ranker.rank(
                            task_prompt, utility_tools,
                            top_k=len(utility_tools), mode=SearchMode(mode)
                        )
                        self._record_tool_scores(debug_info, utility_ranked, is_selected=True)
                    
                    if domain_tools:
                        # Rank all domain tools to see all scores for debugging
                        all_domain_ranked = self._ranker.rank(
                            task_prompt, domain_tools, 
                            top_k=len(domain_tools), mode=SearchMode(mode)
                        )
                        # Save scores for all domain tools (mark which ones are selected)
                        for i, (tool, score) in enumerate(all_domain_ranked):
                            server_name = None
                            if tool.is_bound and tool.runtime_info:
                                server_name = tool.runtime_info.server_name
                            debug_info.tool_scores.append({
                                "name": tool.name,
                                "server": server_name,
                                "score": round(score, 4),
                                "selected": i < domain_quota,
                            })
                        searched_domain = [t for t, _ in all_domain_ranked[:domain_quota]]
                    else:
                        searched_domain = []
                    
                    mcp_result = utility_tools + searched_domain
                
            except Exception as exc:
                logger.warning(f"LLM filter failed ({exc}), fallback to direct ranking")
                ranked = self._ranker.rank(task_prompt, mcp_tools, top_k=max_tools, mode=SearchMode(mode))
                self._record_tool_scores(debug_info, ranked, is_selected=True)
                mcp_result = [t for t, _ in ranked]
        
        # Path 2: Plan-enhanced search (small MCP tool set)
        else:
            logger.info(f"Path 2: MCP count ({mcp_count}) <= threshold, using enhanced search...")
            debug_info.llm_filter_used = False
            
            if self._llm:
                try:
                    enhanced_query = await self._generate_search_query(task_prompt)
                except Exception:
                    enhanced_query = task_prompt
            else:
                enhanced_query = task_prompt
            
            try:
                ranked = self._ranker.rank(
                    enhanced_query, mcp_tools, 
                    top_k=max_tools, mode=SearchMode(mode)
                )
                # Record all scores from semantic search
                self._record_tool_scores(debug_info, ranked, is_selected=True)
                mcp_result = [t for t, _ in ranked]
            except Exception:
                ranked = self._ranker._keyword_search(
                    enhanced_query, mcp_tools, max_tools
                )
                self._record_tool_scores(debug_info, ranked, is_selected=True)
                mcp_result = [t for t, _ in ranked]

        # Apply quality ranking on MCP results
        if self._enable_quality_ranking and self._quality_manager and mcp_result:
            try:
                ranked_with_scores = [(t, 1.0) for t in mcp_result]
                ranked_with_scores = self._quality_manager.adjust_ranking(ranked_with_scores)
                mcp_result = [t for t, _ in ranked_with_scores]
            except Exception:
                pass

        # Limit MCP tools, then combine with non-MCP tools
        mcp_result = mcp_result[:max_tools]
        result = mcp_result + non_mcp_tools
        
        # Populate final selected tools in debug info
        self._populate_selected_tools(debug_info, result)
        
        self._log_search_results(candidate_tools, result, mode)
        self._query_cache[cache_key] = result
        return result
    
    def _record_tool_scores(
        self, 
        debug_info: SearchDebugInfo, 
        ranked: List[Tuple[BaseTool, float]], 
        is_selected: bool = False
    ) -> None:
        """Record tool scores from ranking results."""
        for tool, score in ranked:
            server_name = None
            if tool.is_bound and tool.runtime_info:
                server_name = tool.runtime_info.server_name
            
            debug_info.tool_scores.append({
                "name": tool.name,
                "server": server_name,
                "score": round(score, 4),
                "selected": is_selected,
            })
    
    def _populate_selected_tools(
        self, 
        debug_info: SearchDebugInfo, 
        tools: List[BaseTool]
    ) -> None:
        """Populate selected tools in debug info."""
        for tool in tools:
            backend = "UNKNOWN"
            server_name = None
            
            if tool.is_bound and tool.runtime_info:
                backend = tool.runtime_info.backend.value
                server_name = tool.runtime_info.server_name
            elif tool.backend_type:
                backend = tool.backend_type.value
            
            debug_info.selected_tools.append({
                "name": tool.name,
                "server": server_name,
                "backend": backend,
            })

    async def _llm_filter_with_planning(
        self, 
        task_prompt: str, 
        tools: list[BaseTool]
    ) -> tuple[list[BaseTool], list[BaseTool], Dict[str, Any]]:
        """
        LLM pre-filter for MCP servers.
        Returns (utility_tools, domain_tools, llm_filter_info).
        """
        from collections import defaultdict
        
        # Group tools by server name
        server_tools: Dict[str, list[BaseTool]] = defaultdict(list)
        for t in tools:
            if t.is_bound and t.runtime_info:
                server = t.runtime_info.server_name or "default"
            else:
                server = "unknown"
            server_tools[server].append(t)

        # Build tool name -> tool object mapping
        tool_name_map: Dict[str, BaseTool] = {t.name: t for t in tools}

        # Build server description with tool names
        lines: list[str] = ["Available MCP servers:"]
        lines.append("")
        
        for server, tool_list in server_tools.items():
            lines.append(f"### Server: {server} ({len(tool_list)} tools)")
            tool_names = [t.name for t in tool_list]
            lines.append(f"  All tools: {', '.join(tool_names)}")
            if tool_list:
                lines.append(f"  Example capabilities:")
                for tool in tool_list[:5]:
                    tool_desc = tool.description or "No description"
                    if len(tool_desc) > 100:
                        tool_desc = tool_desc[:97] + "..."
                    lines.append(f"    - {tool.name}: {tool_desc}")
            lines.append("")

        servers_block = "\n".join(lines)

        TOOL_FILTER_SYSTEM_PROMPT = f"""You are an expert tool selection assistant.

# Your task
Analyze the given task and determine which MCP servers and tools are needed.
Think about how you would accomplish this task step by step, then classify needed servers and tools.

# Important guidelines
- **Focus on tool names and capabilities**: Carefully examine the tool names to understand what each server can do
- **Be inclusive for domain servers**: If a server has tools that might be relevant to the core task, include it
- **Be precise for utility tools**: Only select the specific auxiliary tools needed (e.g., file save, time query)
- **When in doubt, include in domain_servers**: It's better to include a server than miss relevant tools

{servers_block}

# Output format
Return ONLY a JSON object (no markdown, no explanation):
{{
  "brief_plan": "1-2 sentence execution plan",
  "utility_tools": {{
    "server1": ["tool1", "tool2"]
  }},
  "domain_servers": ["server2", "server3"]
}}

- **utility_tools**: Dict mapping server name to list of specific tool names.
  These are auxiliary tools for supporting operations (e.g., filesystem: ["write_file"], time-server: ["get_time"]).
  Only include the specific tools needed, NOT the entire server.
- **domain_servers**: Server names that directly provide the main capabilities for the task.
  All tools from these servers will be considered. Be inclusive here."""

        user_query = f"Task: {task_prompt}\n\nClassify the needed servers and tools."

        messages_text = LLMClient.format_messages_to_text([
            {"role": "system", "content": TOOL_FILTER_SYSTEM_PROMPT},
            {"role": "user", "content": user_query}
        ])
        resp = await self._llm.complete(messages_text)
        content = resp["message"]["content"].strip()
        
        # Extract JSON
        code_block_pattern = r'```(?:json)?\s*\n?(.*?)\n?```'
        match = re.search(code_block_pattern, content, re.DOTALL)
        if match:
            content = match.group(1).strip()
        else:
            json_match = re.search(r'\{.*\}', content, re.DOTALL)
            if json_match:
                content = json_match.group()
        
        try:
            result = json.loads(content)
        except json.JSONDecodeError as e:
            logger.warning(f"Failed to parse LLM response: {e}")
            return [], tools
        
        # Parse utility_tools: {server: [tool_names]}
        utility_tools_config = result.get("utility_tools", {})
        domain_servers = set(result.get("domain_servers", []))
        brief_plan = result.get("brief_plan", "N/A")
        
        logger.info(f"LLM Planning: {brief_plan}")
        logger.info(f"Utility tools: {utility_tools_config}")
        logger.info(f"Domain servers: {domain_servers}")
        
        # Collect utility tools (specific tools only)
        utility_tools = []
        for server_name, tool_names in utility_tools_config.items():
            if server_name in server_tools:
                server_tool_names = {t.name for t in server_tools[server_name]}
                for tool_name in tool_names:
                    if tool_name in server_tool_names and tool_name in tool_name_map:
                        utility_tools.append(tool_name_map[tool_name])
        
        # Collect domain tools (entire servers)
        domain_tools = []
        for server, tool_list in server_tools.items():
            if server in domain_servers:
                domain_tools.extend(tool_list)
        
        logger.info(f"LLM filter result: {len(utility_tools)} utility tools, {len(domain_tools)} domain tools")
        
        # Build LLM filter info for debugging
        llm_filter_info = {
            "brief_plan": brief_plan,
            "utility_tools": utility_tools_config,
            "domain_servers": list(domain_servers),
        }
        
        # Fallback if no match
        if not utility_tools and not domain_tools:
            logger.warning(f"LLM filter matched 0 tools, returning all as domain")
            return [], tools, llm_filter_info
        
        return utility_tools, domain_tools, llm_filter_info

    async def _generate_search_query(self, task_prompt: str) -> str:
        prompt = f"""Task: {task_prompt}

List keywords for the capabilities needed (comma-separated, brief):"""

        resp = await self._llm.complete(prompt)
        capabilities = resp["message"]["content"].strip().replace("\n", " ")
        
        enhanced_query = f"{task_prompt} {capabilities}"
        logger.debug(f"Enhanced search query: {enhanced_query[:150]}...")
        
        return enhanced_query

    def _log_search_results(self, all_tools: list[BaseTool], filtered_tools: list[BaseTool], mode: str) -> None:
        """
        Log search results in a concise, grouped format.
        Shows backend/server breakdown and tool names (truncated if too many).
        """
        from collections import defaultdict
        
        # Group filtered tools by backend and server
        grouped: Dict[str, Dict[str | None, list[str]]] = defaultdict(lambda: defaultdict(list))
        
        for t in filtered_tools:
            # Get backend and server info
            if t.is_bound:
                backend = t.runtime_info.backend.value
                server = t.runtime_info.server_name if backend.lower() == "mcp" else None
            else:
                if not t.backend_type or t.backend_type == BackendType.NOT_SET:
                    backend = "UNKNOWN"
                    server = None
                else:
                    backend = t.backend_type.value
                    server = None
            
            grouped[backend][server].append(t.name)
        
        # Build concise summary
        lines = [f"\n{'='*60}"]
        lines.append(f"🔍 Tool Search Results (mode: {mode})")
        lines.append(f"   {len(all_tools)} candidates → {len(filtered_tools)} selected tools")
        lines.append(f"{'='*60}")
        
        for backend, srv_map in sorted(grouped.items()):
            backend_total = sum(len(tools) for tools in srv_map.values())
            lines.append(f"\n📦 {backend} ({backend_total} tools)")
            
            for server, tool_names in sorted(srv_map.items()):
                if backend.lower() == "mcp" and server:
                    prefix = f"   └─ {server}: "
                else:
                    prefix = f"   └─ "
                
                # Limit display to avoid overwhelming output
                if len(tool_names) <= 8:
                    tools_display = ", ".join(tool_names)
                else:
                    tools_display = ", ".join(tool_names[:8]) + f" ... (+{len(tool_names)-8} more)"
                
                lines.append(f"{prefix}{tools_display}")
        
        lines.append(f"{'='*60}\n")
        
        # Use info level so users can see it
        logger.info("\n".join(lines))

    @staticmethod
    def _format_tool_list(tools: list[BaseTool]) -> str:
        rows = [f"{i}. **{t.name}**: {t.description}" for i, t in enumerate(tools, 1)]
        return f"Total {len(tools)} tools, list out directly:\n\n" + "\n".join(rows)

    @staticmethod
    def _format_ranked(results: list[tuple[BaseTool, float]], mode: SearchMode) -> str:
        lines = [f"Search results (mode={mode}) total {len(results)}:\n"]
        for i, (tool, score) in enumerate(results, 1):
            lines.append(f"{i}. {tool.name}  (score: {score:.3f})\n    {tool.description}")
        return "\n".join(lines)

    def _run(self, *args, **kwargs):
        raise NotImplementedError("SearchCoordinator only supports asynchronous calls. Use _arun instead.")
    
    def get_embedding_cache_stats(self) -> Dict[str, Any]:
        """Get statistics about the embedding cache.
        
        Returns:
            Dict with cache statistics including total embeddings and breakdown by backend/server.
        """
        return self._ranker.get_cache_stats()
    
    def clear_embedding_cache(self, backend: Optional[str] = None, server: Optional[str] = None) -> int:
        """Clear embeddings from cache.
        
        Args:
            backend: If provided, only clear this backend. If None, clear all.
            server: If provided (and backend is provided), only clear this server.
        
        Returns:
            Number of embeddings cleared.
        """
        return self._ranker.clear_cache(backend=backend, server=server)
    
    def get_last_search_debug_info(self) -> Optional[Dict[str, Any]]:
        """Get debug info from the last search operation.
        
        Returns:
            Dict containing search debug info, or None if no search has been performed.
            Includes:
                - search_mode: The search mode used
                - total_candidates: Total number of candidate tools
                - mcp_count/non_mcp_count: Tool counts by type
                - llm_filter: LLM filter information if used
                - tool_scores: Similarity scores for each tool
                - selected_tools: Final selected tools
        """
        if self._last_search_debug_info is None:
            return None
        return self._last_search_debug_info.to_dict()

================================================
FILE: anytool/grounding/core/security/__init__.py
================================================
from .sandbox import BaseSandbox, SandboxManager
from .policies import SecurityPolicyManager, SecurityPolicy

# Try to import E2BSandbox (optional dependency)
try:
    from .e2b_sandbox import E2BSandbox
    E2B_AVAILABLE = True
except ImportError:
    E2BSandbox = None
    E2B_AVAILABLE = False

__all__ = [
    "BaseSandbox",
    "SandboxManager",
    "SecurityPolicyManager",
    "SecurityPolicy"
]

if E2B_AVAILABLE:
    __all__.append("E2BSandbox")

================================================
FILE: anytool/grounding/core/security/e2b_sandbox.py
================================================
"""
E2B Sandbox implementation.

This module provides a concrete implementation of BaseSandbox using E2B.
"""

import os
from typing import Any, Dict, Optional, TYPE_CHECKING

from anytool.utils.logging import Logger
from .sandbox import BaseSandbox
from ..types import SandboxOptions

logger = Logger.get_logger(__name__)

# Import E2B SDK components (optional dependency)
if TYPE_CHECKING:
    # For type checking purposes only
    try:
        from e2b_code_interpreter import CommandHandle, Sandbox
    except ImportError:
        CommandHandle = None  # type: ignore
        Sandbox = None  # type: ignore

try:
    logger.debug("Attempting to import e2b_code_interpreter...")
    from e2b_code_interpreter import (  # type: ignore
        CommandHandle,
        Sandbox,
    )
    logger.debug("Successfully imported e2b_code_interpreter")
    E2B_AVAILABLE = True
except ImportError as e:
    logger.debug(f"Failed to import e2b_code_interpreter: {e}")
    CommandHandle = None  # type: ignore
    Sandbox = None  # type: ignore
    E2B_AVAILABLE = False


class E2BSandbox(BaseSandbox):
    """E2B sandbox implementation for secure code execution."""
    
    def __init__(self, options: SandboxOptions):
        """Initialize E2B sandbox.
        
        Args:
            options: Sandbox configuration options including:
                - api_key: E2B API key (or use E2B_API_KEY env var)
                - sandbox_template_id: Template ID for the sandbox (default: "base")
                - timeout: Command execution timeout in seconds
        """
        super().__init__(options)
        
        if not E2B_AVAILABLE:
            raise ImportError(
                "E2B SDK (e2b-code-interpreter) not found. Please install it with "
                "'pip install e2b-code-interpreter'."
            )
        
        # Get API key from options or environment
        self.api_key = options.get("api_key") or os.environ.get("E2B_API_KEY")
        if not self.api_key:
            raise ValueError(
                "E2B API key is required. Provide it via 'options.api_key'"
                " or the E2B_API_KEY environment variable."
            )
        
        # Get sandbox configuration
        self.sandbox_template_id = options.get("sandbox_template_id", "base")
        self.timeout = options.get("timeout", 600)  # Default 10 minutes
        
        # Sandbox instance (using Any to avoid import issues with optional dependency)
        self._sandbox: Any = None
        self._process: Any = None
        
    async def start(self) -> bool:
        """Start the E2B sandbox instance.
        
        Returns:
            True if sandbox started successfully, False otherwise.
        """
        if self._active:
            logger.debug("E2B sandbox already active")
            return True
        
        try:
            logger.debug(f"Creating E2B sandbox with template: {self.sandbox_template_id}")
            self._sandbox = Sandbox(
                template=self.sandbox_template_id,
                api_key=self.api_key,
            )
            self._active = True
            logger.info(f"E2B sandbox started successfully (template: {self.sandbox_template_id})")
            return True
            
        except Exception as e:
            logger.error(f"Failed to start E2B sandbox: {e}")
            self._active = False
            return False
    
    async def stop(self) -> None:
        """Stop the E2B sandbox instance."""
        if not self._active:
            logger.debug("E2B sandbox not active")
            return
        
        try:
            # Terminate any running process
            if self._process:
                try:
                    logger.debug("Terminating sandbox process")
                    self._process.kill()
                except Exception as e:
                    logger.warning(f"Error terminating sandbox process: {e}")
                finally:
                    self._process = None
            
            # Close the sandbox
            if self._sandbox:
                try:
                    logger.debug("Closing E2B sandbox instance")
                    self._sandbox.kill()
                    logger.info("E2B sandbox stopped successfully")
                except Exception as e:
                    logger.warning(f"Error closing E2B sandbox: {e}")
                finally:
                    self._sandbox = None
            
            self._active = False
            
        except Exception as e:
            logger.error(f"Error stopping E2B sandbox: {e}")
            raise
    
    async def execute_safe(self, command: str, **kwargs) -> Any:
        """Execute a command safely in the E2B sandbox.
        
        Args:
            command: The command to execute
            **kwargs: Additional options:
                - envs: Environment variables (dict)
                - timeout: Command timeout in milliseconds
                - background: Run in background (bool)
                - on_stdout: Stdout callback function
                - on_stderr: Stderr callback function
        
        Returns:
            CommandHandle object representing the running process
        """
        if not self._active or not self._sandbox:
            raise RuntimeError("E2B sandbox is not active. Call start() first.")
        
        try:
            # Extract execution options
            envs = kwargs.get("envs", {})
            timeout = kwargs.get("timeout", self.timeout * 1000)  # Convert to ms
            background = kwargs.get("background", False)
            on_stdout = kwargs.get("on_stdout")
            on_stderr = kwargs.get("on_stderr")
            
            logger.debug(f"Executing command in E2B sandbox: {command}")
            
            # Execute the command
            self._process = self._sandbox.commands.run(
                command,
                envs=envs,
                timeout=timeout,
                background=background,
                on_stdout=on_stdout,
                on_stderr=on_stderr,
            )
            
            return self._process
            
        except Exception as e:
            logger.error(f"Failed to execute command in E2B sandbox: {e}")
            raise
    
    def get_connector(self) -> Any:
        """Get the underlying E2B sandbox connector.
        
        Returns:
            The E2B Sandbox instance, or None if not active.
        """
        return self._sandbox
    
    def get_host(self, port: int) -> str:
        """Get the host URL for a specific port.
        
        Args:
            port: The port number to get the host for
            
        Returns:
            The host URL string
            
        Raises:
            RuntimeError: If sandbox is not active
        """
        if not self._active or not self._sandbox:
            raise RuntimeError("E2B sandbox is not active. Call start() first.")
        
        return self._sandbox.get_host(port)
    
    @property
    def sandbox(self) -> Any:
        """Get the underlying E2B Sandbox instance."""
        return self._sandbox
    
    @property
    def process(self) -> Any:
        """Get the current running process handle."""
        return self._process


================================================
FILE: anytool/grounding/core/security/policies.py
================================================
import asyncio
import sys
from typing import Callable, Awaitable, Dict, Optional
from ..types import SecurityPolicy, BackendType

PromptFunc = Callable[[str], Awaitable[bool]] 


# ANSI color codes
class Colors:
    RESET = "\033[0m"
    BOLD = "\033[1m"
    RED = "\033[91m"
    YELLOW = "\033[93m"
    GREEN = "\033[92m"
    CYAN = "\033[96m"
    GRAY = "\033[90m"
    WHITE = "\033[97m"


class SecurityPolicyManager:
    def __init__(self, prompt: PromptFunc | None = None):
        self._policies: Dict[BackendType, SecurityPolicy] = {}
        self._global_policy: Optional[SecurityPolicy] = None
        self._prompt: PromptFunc | None = prompt or self._default_cli_prompt
    
    async def _default_cli_prompt(self, message: str) -> bool:
        # Clean and professional prompt using unified display
        from anytool.utils.display import Box, BoxStyle, colorize, print_separator
        
        print()
        print_separator(70, 'y', 2)
        print(f"  {colorize('⚠️  Security Policy Warning', color=Colors.RED, bold=True)}")
        print_separator(70, 'y', 2)
        print(f"  {message}")
        print_separator(70, 'gr', 2)
        print(f"  {colorize('[y/yes]', color=Colors.GREEN)} Allow  |  {colorize('[n/no]', color=Colors.RED)} Deny")
        print_separator(70, 'gr', 2)
        print(f"  {colorize('Your choice:', bold=True)} ", end="", flush=True)
        
        answer = await asyncio.get_running_loop().run_in_executor(None, sys.stdin.readline)
        response = answer.strip().lower() in {"y", "yes"}
        
        if response:
            print(f"  {colorize('✓ Allowed', color=Colors.GREEN)}\n")
        else:
            print(f"  {colorize('✗ Denied', color=Colors.RED)}\n")
        
        return response
    
    def set_global_policy(self, policy: SecurityPolicy) -> None:
        self._global_policy = policy
    
    def set_backend_policy(self, backend_type: BackendType, policy: SecurityPolicy) -> None:
        self._policies[backend_type] = policy
    
    def get_policy(self, backend_type: BackendType) -> SecurityPolicy:
        policy = self._policies.get(backend_type) 
        if policy:
            return policy
        
        if self._global_policy:
            return self._global_policy
        
        return SecurityPolicy()
    
    async def _ask_user(self, message: str) -> bool:
        """If prompt is provided, ask user for confirmation, otherwise default to deny"""
        if self._prompt:
            try:
                return await self._prompt(message)
            except Exception:
                return False
        return False

    async def check_command_allowed(self, backend_type: BackendType, command: str) -> bool:
        policy = self.get_policy(backend_type)

        if policy.check(command=command):
            return True

        # Find dangerous tokens
        dangerous_tokens = policy.find_dangerous_tokens(command)
        
        # Extract only lines containing dangerous commands
        lines = command.split('\n')
        dangerous_lines = []
        for i, line in enumerate(lines):
            line_lower = line.lower()
            if any(token in line_lower for token in dangerous_tokens):
                # Add line number and the line itself
                dangerous_lines.append((i + 1, line.strip()))
        
        # If no specific dangerous lines found but policy failed, show first few lines
        if not dangerous_lines:
            dangerous_lines = [(i + 1, line.strip()) for i, line in enumerate(lines[:5])]
        
        # Format dangerous lines for display (limit to 10 lines)
        max_display_lines = 10
        if len(dangerous_lines) > max_display_lines:
            display_lines = dangerous_lines[:max_display_lines]
            truncated = True
        else:
            display_lines = dangerous_lines
            truncated = False
        
        # Build formatted command display
        formatted_cmd_lines = []
        for line_num, line in display_lines:
            # Truncate very long lines
            if len(line) > 80:
                line = line[:77] + "..."
            formatted_cmd_lines.append(f"  L{line_num}: {line}")
        
        if truncated:
            formatted_cmd_lines.append("  ... (more lines)")
        
        formatted_command = '\n'.join(formatted_cmd_lines)
        
        # Show which dangerous commands were detected
        dangerous_list = ', '.join([f"{Colors.RED}{tok}{Colors.RESET}" for tok in dangerous_tokens[:5]])
        
        from anytool.utils.display import Box, BoxStyle, colorize
        
        # Build command box
        box = Box(width=66, style=BoxStyle.SQUARE, color='gr')
        cmd_box = [
            box.top_line(2),
            box.empty_line(2),
        ]
        for line in formatted_cmd_lines:
            cmd_box.append(box.text_line(line, indent=2))
        cmd_box.extend([
            box.empty_line(2),
            box.bottom_line(2)
        ])
        
        message = (
            f"\n{colorize('Potentially dangerous command detected', color=Colors.WHITE)}\n\n"
            f"Backend:  {colorize(backend_type.value, color=Colors.CYAN)}\n"
            f"Dangerous commands: {dangerous_list}\n\n"
            f"Affected lines:\n"
            + "\n".join(cmd_box) + "\n\n"
            f"{colorize('This command may contain risky operations. Continue?', color=Colors.YELLOW)}"
        )

        return await self._ask_user(message)
    
    async def check_domain_allowed(self, backend_type: BackendType, domain: str) -> bool:
        policy = self.get_policy(backend_type)

        if policy.check(domain=domain):
            return True

        message = (
            f"\n{Colors.WHITE}Unauthorized domain access detected{Colors.RESET}\n\n"
            f"Backend: {Colors.CYAN}{backend_type.value}{Colors.RESET}\n"
            f"Domain:  {Colors.YELLOW}{domain}{Colors.RESET}\n\n"
            f"{Colors.YELLOW}This domain is not in the allowed list. Continue?{Colors.RESET}"
        )

        return await self._ask_user(message)

================================================
FILE: anytool/grounding/core/security/sandbox.py
================================================
from typing import Any, Dict, Optional
from abc import ABC, abstractmethod

from ..types import SandboxOptions, BackendType


class BaseSandbox(ABC):   
    def __init__(self, options: SandboxOptions):
        self.options = options
        self._active = False
    
    @abstractmethod
    async def start(self) -> bool:
        """Set self._active to True"""
        pass
    
    @abstractmethod
    async def stop(self) -> None:
        """Set self._active to False"""
        pass
    
    @abstractmethod
    async def execute_safe(self, command: str, **kwargs) -> Any:
        pass
    
    @abstractmethod
    def get_connector(self) -> Any:
        pass
    
    @property
    def is_active(self) -> bool:
        return self._active


class SandboxManager:
    def __init__(self):
        self._sandboxes: Dict[BackendType, BaseSandbox] = {}
    
    def register_sandbox(self, backend_type: BackendType, sandbox: BaseSandbox) -> None:
        self._sandboxes[backend_type] = sandbox
    
    def get_sandbox(self, backend_type: BackendType) -> Optional[BaseSandbox]:
        return self._sandboxes.get(backend_type)
    
    async def start_all(self) -> None:
        for sandbox in self._sandboxes.values():
            await sandbox.start()
    
    async def stop_all(self) -> None:
        for sandbox in self._sandboxes.values():
            await sandbox.stop()

================================================
FILE: anytool/grounding/core/session.py
================================================
from abc import ABC, abstractmethod
from typing import Any, Dict, List
from datetime import datetime

from .tool import BaseTool
from .transport.connectors import BaseConnector
from .types import SessionInfo, SessionStatus, BackendType, ToolResult
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class BaseSession(ABC):
    """
    Session manager for all backends.
    """
    def __init__(
        self,
        connector: BaseConnector,
        *,
        session_id: str,
        backend_type: BackendType | None = None,
        auto_connect: bool = True,
        auto_initialize: bool = True,
    ) -> None:
        self.connector = connector
        self.session_id = session_id
        self.backend_type = backend_type or BackendType.NOT_SET
        self.auto_connect = auto_connect
        self.auto_initialize = auto_initialize

        self.status: SessionStatus = SessionStatus.DISCONNECTED
        self.session_info: Dict[str, Any] | None = None
        self._created_at = datetime.utcnow()
        self._last_activity = self._created_at
        self.tools: List[BaseTool] = []

    async def __aenter__(self) -> "BaseSession":
        if self.auto_connect:
            await self.connect()
        if self.auto_initialize:
            self.session_info = await self.initialize()
        return self

    async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
        """Exit the async context manager.

        Args:
            exc_type: The exception type, if an exception was raised.
            exc_val: The exception value, if an exception was raised.
            exc_tb: The exception traceback, if an exception was raised.
        """
        await self.disconnect()

    async def connect(self) -> None:
        if self.connector.is_connected:
            return
        self.status = SessionStatus.CONNECTING
        await self.connector.connect()
        self.status = SessionStatus.CONNECTED

    async def disconnect(self) -> None:
        if not self.connector.is_connected:
            return
        await self.connector.disconnect()
        self.status = SessionStatus.DISCONNECTED

    @property
    def is_connected(self) -> bool:
        return self.connector.is_connected

    @abstractmethod
    async def initialize(self) -> Dict[str, Any]:
        """
        Negotiate with the backend, discover tools, etc.
        Return session information (can be an empty dict).
        
        `self.tools` need to be set in this method.
        """
        raise NotImplementedError("Sub-class must implement this method")
    
    async def list_tools(self) -> List[BaseTool]:
        """
        Return tools discovered during `initialize()`.
        """
        if not self.tools:
            self.session_info = await self.initialize()
        return self.tools
    
    async def call_tool(self, tool_name: str, parameters=None) -> ToolResult:
        parameters = parameters or {}
        
        # Ensure tools are initialized before calling
        if not self.tools:
            logger.debug(f"Tools not initialized for session {self.session_id}, initializing now...")
            self.session_info = await self.initialize()
        
        tool_map = {t.schema.name: t for t in self.tools}
        if tool_name not in tool_map:
            raise ValueError(f"Unknown tool: {tool_name}")
        result = await tool_map[tool_name].arun(**parameters)
        self._touch()
        return result
 
    # Update when a successful call is made
    def _touch(self):
        self._last_activity = datetime.utcnow()

    @property
    def info(self) -> SessionInfo:
        return SessionInfo(
            session_id=self.session_id,
            backend_type=getattr(self, "backend_type", BackendType.NOT_SET),
            status=self.status,
            created_at=self._created_at,
            last_activity=self._last_activity,
            metadata=self.session_info or {},
        )

================================================
FILE: anytool/grounding/core/system/__init__.py
================================================
from .provider import SystemProvider
from .tool import SYSTEM_TOOLS

__all__ = [
    "SystemProvider",
    "SYSTEM_TOOLS",
]

================================================
FILE: anytool/grounding/core/system/provider.py
================================================
from typing import List, Dict, Any
from ..provider import Provider
from ..types import BackendType, SessionConfig
from ..grounding_client import GroundingClient
from .tool import SYSTEM_TOOLS, _BaseSystemTool
from ..exceptions import GroundingError, ErrorCode


class SystemProvider(Provider):
    """
    Provider for system-level query tools
    """
    def __init__(self, client: GroundingClient):
        super().__init__(BackendType.SYSTEM, {})
        # Instantiates all system tools
        self._tools: List[_BaseSystemTool] = [tool_cls(client) for tool_cls in SYSTEM_TOOLS]

    async def initialize(self): 
        self.is_initialized = True

    async def create_session(self, session_config: SessionConfig):
        raise GroundingError(
            "SystemProvider does not support sessions",
            code=ErrorCode.CONFIG_INVALID,
        )

    async def list_tools(self, session_name: str | None = None):
        return self._tools

    async def call_tool(
        self,
        session_name: str,
        tool_name: str,
        parameters: Dict[str, Any] | None = None,
    ):
        tool_map = {t.schema.name: t for t in self._tools}
        if tool_name not in tool_map:
            raise GroundingError(
                f"System tool '{tool_name}' not found",
                code=ErrorCode.TOOL_NOT_FOUND,
            )
        return await tool_map[tool_name].arun(**(parameters or {}))

    async def close_session(self, session_name: str) -> None:
        return

================================================
FILE: anytool/grounding/core/system/tool.py
================================================
from ..tool.local_tool import LocalTool
from ..types import BackendType, ToolResult, ToolStatus
from ..grounding_client import GroundingClient


class _BaseSystemTool(LocalTool):
    backend_type = BackendType.SYSTEM

    def __init__(self, client: GroundingClient):
        super().__init__(verbose=False, handle_errors=True)
        self._client = client

    @property
    def client(self) -> GroundingClient:
        return self._client


class ListProvidersTool(_BaseSystemTool):
    _name = "list_providers"
    _description = "List all registered backend providers"

    async def _arun(self) -> ToolResult:
        prov = list(self.client.list_providers().keys())
        return ToolResult(
            status=ToolStatus.SUCCESS,
            content=", ".join(prov),
        )


class ListBackendToolsTool(_BaseSystemTool):
    _name = "list_backend_tools"
    _description = "List static tools for a backend"

    async def _arun(self, backend: str) -> ToolResult:
        try:
            be = BackendType(backend.lower())
        except ValueError:
            return ToolResult(ToolStatus.ERROR, error=f"Unknown backend '{backend}'")

        tools = await self.client.list_backend_tools(be)
        names = [t.schema.name for t in tools]
        return ToolResult(
            status=ToolStatus.SUCCESS,
            content=", ".join(names),
        )


class ListSessionToolsTool(_BaseSystemTool):
    _name = "list_session_tools"
    _description = "List tools (incl. dynamic) for a session"

    async def _arun(self, session_id: str) -> ToolResult:
        tools = await self.client.list_session_tools(session_id)
        names = [t.schema.name for t in tools]
        return ToolResult(
            status=ToolStatus.SUCCESS,
            content=", ".join(names),
        )


class ListAllBackendToolsTool(_BaseSystemTool):
    _name = "list_all_backend_tools"
    _description = "List static tools for every registered backend"

    async def _arun(self, use_cache: bool = False) -> ToolResult:
        all_tools = await self.client.list_all_backend_tools(use_cache=use_cache)
        lines = [
            f"{backend.value}: {', '.join(t.schema.name for t in tools)}"
            for backend, tools in all_tools.items()
        ]
        return ToolResult(
            status=ToolStatus.SUCCESS,
            content="\n".join(lines),
        )


SYSTEM_TOOLS: list[type[_BaseSystemTool]] = [
    ListProvidersTool,
    ListBackendToolsTool,
    ListSessionToolsTool,
    ListAllBackendToolsTool,
]

================================================
FILE: anytool/grounding/core/tool/__init__.py
================================================
from .base import BaseTool
from .local_tool import LocalTool
from .remote_tool import RemoteTool

__all__ = ["BaseTool", "LocalTool", "RemoteTool"]

================================================
FILE: anytool/grounding/core/tool/base.py
================================================
"""
BaseTool.
All pre-defined grounding atomic operations will inherit this tool class.
RemoteTool needs to pass in connector.
"""
import asyncio, time, inspect
from abc import ABC, abstractmethod
from functools import lru_cache
from typing import Any, ClassVar, Dict, Optional, TYPE_CHECKING
from pydantic import BaseModel, ConfigDict, Field, create_model

from ..types import BackendType, ToolResult, ToolSchema, ToolStatus
from ..exceptions import GroundingError, ErrorCode
from anytool.utils.logging import Logger
import jsonschema

if TYPE_CHECKING:
    from ..grounding_client import GroundingClient

logger = Logger.get_logger(__name__)


class ToolRuntimeInfo:
    """Runtime information for a tool instance"""
    def __init__(
        self,
        backend: BackendType,
        session_name: str,
        server_name: Optional[str] = None,
        grounding_client: Optional['GroundingClient'] = None,
    ):
        self.backend = backend
        self.session_name = session_name
        self.server_name = server_name
        self.grounding_client = grounding_client
    
    def __repr__(self):
        return f"<ToolRuntimeInfo backend={self.backend.value} session={self.session_name}>"
    

class BaseTool(ABC):
    _name: ClassVar[str] = ""
    _description: ClassVar[str] = ""
    backend_type: ClassVar[BackendType] = BackendType.NOT_SET

    def __init__(self,
                 schema: Optional[ToolSchema] = None,
                 *,
                 verbose: bool = False,
                 handle_errors: bool = True) -> None:
        self.verbose = verbose
        self.handle_errors = handle_errors
        self.schema: ToolSchema = schema or ToolSchema(
            name=self._name or self.__class__.__name__.lower(),
            description=self._description,
            parameters=self.get_parameters_schema(),
            backend_type=self.backend_type,
        )
        
        self._runtime_info: Optional[ToolRuntimeInfo] = None
        self._disable_outer_recording = True
    
    @property
    def name(self) -> str:
        """Get tool name from schema (supports both class-defined and runtime-injected names)"""
        return self.schema.name if hasattr(self, 'schema') and self.schema else self._name
    
    @property
    def description(self) -> str:
        """Get tool description from schema (supports both class-defined and runtime-injected descriptions)"""
        return self.schema.description if hasattr(self, 'schema') and self.schema else self._description

    @classmethod
    @lru_cache
    def get_parameters_schema(cls) -> Dict[str, Any]:
        """Auto-generate JSON-schema from _run() or _arun() signature.
        
        Returns empty dict for tools with no parameters.
        Priority: prefer _arun if overridden, otherwise use _run.
        """
        # Priority: prefer _arun if it's overridden by subclass, else use _run
        # This allows async-first tools to define their signature via _arun
        sig_src = None
        
        # Check if _arun is overridden (not from BaseTool)
        if cls._arun is not BaseTool._arun:
            sig_src = cls._arun
        # Otherwise check if _run is overridden
        elif cls._run is not BaseTool._run:
            sig_src = cls._run
        # If neither is overridden, raise error
        else:
            raise ValueError(
                f"{cls.__name__} must implement _run() or _arun() to define its parameters schema"
            )
        
        sig = inspect.signature(sig_src)
        fields: dict[str, Any] = {}
        for name, p in sig.parameters.items():
            # Skip 'self' and **kwargs / *args
            if name == "self" or p.kind in (inspect.Parameter.VAR_KEYWORD, inspect.Parameter.VAR_POSITIONAL):
                continue
            typ = p.annotation if p.annotation is not inspect._empty else str
            default = p.default if p.default is not inspect._empty else ...
            fields[name] = (typ, Field(default))
        
        if not fields:
            return {}
        
        PModel: type[BaseModel] = create_model(
            f"{cls.__name__}Params",
            __config__=ConfigDict(arbitrary_types_allowed=True),
            **fields
        )
        return PModel.model_json_schema()

    def validate_parameters(self, params: Dict[str, Any]) -> None:
        try:
            self.schema.validate_parameters(params, raise_exc=True)
        except jsonschema.ValidationError as ve:
            raise GroundingError(
                f"Invalid parameters: {ve.message}",
                code=ErrorCode.TOOL_EXECUTION_FAIL,
                tool_name=self.schema.name,
            ) from ve

    def run(self, **kwargs):
        try:
            return asyncio.run(self.invoke(**kwargs))
        except RuntimeError:                     # already in running loop
            loop = asyncio.get_running_loop()
            return loop.create_task(self.invoke(**kwargs))

    def __call__(self, **kwargs):
        return self.run(**kwargs)

    async def __acall__(self, **kwargs):
        return await self.arun(**kwargs)
    
    async def arun(self, **kwargs) -> ToolResult:
        start = time.time()
        try:
            self.validate_parameters(kwargs)
            raw = await self._arun(**kwargs)
            result = self._wrap_result(raw, time.time() - start)
            
            # Auto-record (if enabled)
            await self._auto_record_execution(kwargs, result, time.time() - start)
            
            return result
        except Exception as e:                    
            if self.handle_errors:
                return ToolResult(
                    status=ToolStatus.ERROR,
                    error=str(e),
                    metadata={"tool": self.schema.name},
                )
            raise

    # to be implemented by subclasses
    @abstractmethod
    async def _arun(self, **kwargs): ...
    
    def bind_runtime_info(
        self,
        backend: BackendType,
        session_name: str,
        server_name: Optional[str] = None,
        grounding_client: Optional['GroundingClient'] = None,
    ) -> 'BaseTool':
        """
        Bind runtime information to the tool instance.
        Allow the tool to be invoked directly without specifying backend/session/server.
        
        Args:
            backend: Backend type
            session_name: Session name
            server_name: Server name (for MCP)
            grounding_client: Optional reference to GroundingClient for direct invocation
        """
        self._runtime_info = ToolRuntimeInfo(
            backend=backend,
            session_name=session_name,
            server_name=server_name,
            grounding_client=grounding_client,
        )
        return self
    
    @property
    def runtime_info(self) -> Optional['ToolRuntimeInfo']:
        """Get runtime information if bound"""
        return self._runtime_info
    
    @property
    def is_bound(self) -> bool:
        """Check if tool has runtime information bound"""
        return self._runtime_info is not None
    
    async def invoke(
        self, 
        parameters: Dict[str, Any] | None = None, 
        keep_session: bool = True,
        **kwargs
    ) -> ToolResult:
        """
        Invoke this tool using bound runtime information.
        Requires runtime info to be bound via bind_runtime_info().
        If no runtime info is bound, the tool will be executed locally.   
        """
        params = parameters or kwargs

        if self.is_bound and self._runtime_info.grounding_client:
            return await self._runtime_info.grounding_client.invoke_tool(
                tool=self,
                parameters=params,
                keep_session=keep_session,
            )

        return await self.arun(**params)

    def _wrap_result(self, obj: Any, elapsed: float) -> ToolResult:
        if isinstance(obj, ToolResult):
            obj.execution_time = elapsed
            return obj
        if self.verbose:
            logger.debug("[%s] done in %.2f s", self.schema.name, elapsed)
        if isinstance(obj, (bytes, bytearray)):
            obj = obj.decode("utf-8", errors="replace")
        return ToolResult(
            status=ToolStatus.SUCCESS,
            content=str(obj),
            execution_time=elapsed,
            metadata={"tool": self.schema.name},
        )
    
    async def _auto_record_execution(
        self,
        parameters: Dict[str, Any],
        result: ToolResult,
        execution_time: float,
    ):
        """Auto-record tool execution to recording manager and quality manager."""
        # Record to quality manager (for quality tracking)
        await self._record_to_quality_manager(result, execution_time * 1000)
        
        # Record to recording manager (for trajectory recording)
        try:
            from anytool.recording import RecordingManager
            
            if not RecordingManager.is_recording():
                return
            
            # Check if tool has disabled outer recording (e.g., GUI agent with intermediate steps)
            if hasattr(self, '_disable_outer_recording') and self._disable_outer_recording:
                logger.debug(f"Skipping outer recording for {self.schema.name} (intermediate steps recorded)")
                return
            
            # Get backend and server_name from runtime_info (if bound)
            backend = self.backend_type.value
            server_name = None
            
            if self.is_bound and self._runtime_info:
                # Prefer runtime_info information (more accurate)
                backend = self._runtime_info.backend.value
                server_name = self._runtime_info.server_name
            
            # Get screenshot (if GUI backend)
            screenshot = None
            if self.backend_type == BackendType.GUI and hasattr(self, 'connector'):
                try:
                    screenshot = await self.connector.get_screenshot()
                except Exception as e:
                    logger.debug(f"Failed to capture screenshot: {e}")
            
            # Record tool execution with complete runtime information
            await RecordingManager.record_tool_execution(
                tool_name=self.schema.name,
                backend=backend,
                parameters=parameters,
                result=result.content,
                server_name=server_name,
                is_success=result.is_success,  # Pass actual success status from ToolResult
            )
        except Exception as e:
            # Recording failure should not affect tool execution
            logger.debug(f"Failed to auto-record tool execution: {e}")
    
    async def _record_to_quality_manager(
        self,
        result: ToolResult,
        execution_time_ms: float,
    ):
        """Record execution result to quality manager for quality tracking."""
        try:
            from anytool.grounding.core.quality import get_quality_manager
            
            manager = get_quality_manager()
            if manager:
                await manager.record_execution(self, result, execution_time_ms)
        except Exception as e:
            # Quality recording failure should not affect tool execution
            logger.debug(f"Failed to record to quality manager: {e}")

    # keep _run for backward-compatibility / thread-pool fallback
    def _run(self, **kwargs):
        raise NotImplementedError

    def __repr__(self):
        base = f"<Tool {self.schema.name} ({self.backend_type.value})"
        if self.is_bound:
            base += f" @ {self._runtime_info.session_name}"
        return base + ">"

    def __init_subclass__(cls, **kwargs):
        """
        - at least implement _run or _arun
        - backend_type is NOT_SET, only give a warning, allow RemoteTool to inject at runtime
        """
        super().__init_subclass__(**kwargs)

        if cls._arun is BaseTool._arun and cls._run is BaseTool._run:
            raise ValueError(f"{cls.__name__} must implement _run() or _arun()")

        if cls.backend_type is BackendType.NOT_SET:
            logger.debug(
                "%s.backend_type is NOT_SET; remember to override or set at runtime.",
                cls.__name__,
            )

================================================
FILE: anytool/grounding/core/tool/local_tool.py
================================================
"""
LocalTool.
Executes entirely inside this Python process.
"""
import asyncio
from typing import Any
from .base import BaseTool


class LocalTool(BaseTool):
    def _run(self, **kwargs):  
        raise NotImplementedError
    
    async def _dispatch_run(self, **kwargs) -> Any:
        # Prefer subclass's own _arun if it was overridden
        if self.__class__._arun is not LocalTool._arun:
            return await super()._arun(**kwargs)

        # Else fall back to thread-pooled _run if provided
        if self.__class__._run is not LocalTool._run:
            loop = asyncio.get_running_loop()
            return await loop.run_in_executor(None, lambda: self._run(**kwargs))

        raise NotImplementedError(
            f"{self.__class__.__name__} must implement _run() or _arun()"
        )

    async def _arun(self, **kwargs):
        return await self._dispatch_run(**kwargs)

================================================
FILE: anytool/grounding/core/tool/remote_tool.py
================================================
"""
RemoteTool.
Wrapper around a connector that calls a remote tool.
"""
from typing import Optional
from anytool.utils.logging import Logger
from ..types import BackendType, ToolResult, ToolSchema, ToolStatus
from .base import BaseTool
from anytool.grounding.core.transport.connectors import BaseConnector

logger = Logger.get_logger(__name__)


class RemoteTool(BaseTool):
    backend_type = BackendType.NOT_SET

    def __init__(
        self,
        schema: ToolSchema | None = None,
        connector: Optional[BaseConnector] = None,
        remote_name: str = "",
        *,
        verbose: bool = False,
        backend: BackendType = BackendType.NOT_SET,
    ):
        self._conn = connector
        self._remote_name = remote_name or (schema.name if schema else "")
        self.backend_type = backend
        super().__init__(schema=schema, verbose=verbose)

    async def _arun(self, **kwargs):
        # If no connector, tool must be invoked via grounding_client (on-demand startup)
        if self._conn is None:
            raise RuntimeError(
                f"Tool '{self.name}' has no connector. "
                "Use grounding_client.invoke_tool() to execute it with on-demand server startup."
            )
        
        raw = await self._conn.invoke(self._remote_name, kwargs)
        
        if hasattr(raw, 'content') and hasattr(raw, 'isError'):
            content_parts = []
            for item in (raw.content or []):
                # Extract text from TextContent
                if hasattr(item, 'text') and item.text:
                    content_parts.append(item.text)
                # Handle ImageContent (just note its presence)
                elif hasattr(item, 'data'):
                    content_parts.append(f"[Image data: {len(item.data) if item.data else 0} bytes]")
                # Handle EmbeddedResource
                elif hasattr(item, 'resource'):
                    content_parts.append(f"[Embedded resource: {getattr(item.resource, 'uri', 'unknown')}]")
            
            content = "\n".join(content_parts) if content_parts else ""
            is_error = getattr(raw, 'isError', False)
            
            return ToolResult(
                status=ToolStatus.ERROR if is_error else ToolStatus.SUCCESS,
                content=content,
                error=content if is_error else None,
            )
        
        # Handle dict response
        if isinstance(raw, dict):
            import json
            try:
                content = json.dumps(raw, ensure_ascii=False, indent=2)
            except (TypeError, ValueError):
                content = str(raw)
        # Handle list/tuple response
        elif isinstance(raw, (list, tuple)):
            import json
            try:
                content = json.dumps(raw, ensure_ascii=False, indent=2)
            except (TypeError, ValueError):
                content = str(raw)
        # Handle primitive types
        elif isinstance(raw, (int, float, bool)):
            content = str(raw)
        elif isinstance(raw, str):
            content = raw
        # Fallback for unknown types
        else:
            content = str(raw)
        
        return ToolResult(
            status=ToolStatus.SUCCESS,
            content=content,
        )

================================================
FILE: anytool/grounding/core/transport/connectors/__init__.py
================================================
from .base import BaseConnector
from .aiohttp_connector import AioHttpConnector

__all__ = [
    "BaseConnector", 
    "AioHttpConnector",
]

================================================
FILE: anytool/grounding/core/transport/connectors/aiohttp_connector.py
================================================
from typing import Any
from yarl import URL
import aiohttp

from ..task_managers import AioHttpConnectionManager
from .base import BaseConnector
from anytool.utils.logging import Logger
from pydantic import BaseModel

logger = Logger.get_logger(__name__)


class AioHttpConnector(BaseConnector[aiohttp.ClientSession]):
    """Generic HTTP-based connector with auto-reconnect & helper methods."""

    def __init__(self, base_url: str, **session_kw):
        connection_manager = AioHttpConnectionManager(base_url, **session_kw)
        super().__init__(connection_manager)
        self.base_url = base_url.rstrip("/")
        
    async def connect(self) -> None:
        await super().connect()
        try:
            async with self._connection.get(self.base_url, timeout=5) as resp:
                if resp.status >= 500:
                    raise ConnectionError(f"HTTP {resp.status}")
        except Exception as e:
            await self.disconnect()
            raise ConnectionError(f"Ping {self.base_url} failed: {e}")

    async def _request(
        self,
        method: str,
        path: str,
        *,
        json: Any | BaseModel | None = None, 
        data: Any | None = None,
        params: dict[str, Any] | None = None,
        **kw,
    ) -> aiohttp.ClientResponse:
        if not self.is_connected:
            await self.connect()

        assert self._connection is not None            # for mypy
        url = URL(self.base_url) / path.lstrip("/")
        logger.debug("%s %s", method.upper(), url)
        return await self._connection.request(
            method.upper(),
            url,
            json=self._to_json_compatible(json), 
            data=data,
            params=params,
            **kw,
        )

    async def get_json(self, path: str, **kw) -> Any:
        response_model: type[BaseModel] | None = kw.pop("response_model", None)
        resp = await self._request("GET", path, **kw)
        resp.raise_for_status()
        data = await resp.json()
        return self._parse_as(data, response_model)

    async def get_bytes(self, path: str, **kw) -> bytes:
        resp = await self._request("GET", path, **kw)
        resp.raise_for_status()
        return await resp.read()
    
    async def post_json(
        self,
        path: str,
        payload: Any | BaseModel,
        *,
        response_model: type[BaseModel] | None = None,
        **kw,
    ) -> Any | BaseModel:
        resp = await self._request("POST", path, json=payload, **kw)
        
        try:
            data = await resp.json()
        except Exception:
            data = None
        
        if resp.status >= 400:
            # Extract detailed error from response body
            detail = ""
            if data:
                detail = data.get("output") or data.get("message") or data.get("error") or ""
            error_msg = f"{resp.status}, message='{resp.reason}'"
            if detail:
                error_msg += f", detail='{detail}'"
            raise aiohttp.ClientResponseError(
                resp.request_info,
                resp.history,
                status=resp.status,
                message=error_msg,
            )
        
        return self._parse_as(data, response_model)

    async def request(self, method: str, path: str, **kw) -> aiohttp.ClientResponse:
        return await self._request(method, path, **kw)

    async def invoke(self, name: str, params: dict[str, Any]) -> Any:
        """
        Generic tool-invocation mapping for HTTP back-ends.

        name rule (case-insensitive):
        - "GET /path"          -> GET, return JSON
        - "GET_TEXT /path"     -> GET, return str
        - "GET_BYTES /path"    -> GET, return bytes
        - "POST /path"         -> POST, payload = params (JSON)
        - other                -> default POST /{name}, payload = params
        
        If PUT/PATCH/DELETE is needed in the future, it can be reused in _handle_other_json.
        """
        verb_path = name.strip().split(maxsplit=1)
        verb = verb_path[0].upper()
        path = verb_path[1] if len(verb_path) == 2 else verb_path[0]

        if verb == "GET_BYTES":
            return await self.get_bytes(path, params=params)

        if verb == "GET_TEXT":
            resp = await self._request("GET", path, params=params)
            resp.raise_for_status()
            return await resp.text()

        if verb in {"GET", "POST"} and len(verb_path) == 2:
            if verb == "GET":
                return await self.get_json(path, params=params)
            return await self.post_json(path, payload=params)

        if verb in {"PUT", "PATCH", "DELETE"} and len(verb_path) == 2:
            return await self._handle_other_json(verb, path, params)

        return await self.post_json(name, payload=params)

    async def _handle_other_json(self, method: str, path: str, params: dict[str, Any]):
        """Fallback implementation for PUT/PATCH/DELETE returning JSON/text, can be overridden by subclasses."""
        resp = await self._request(method, path, json=params)
        resp.raise_for_status()
        try:
            return await resp.json()
        except Exception:
            return await resp.text()

================================================
FILE: anytool/grounding/core/transport/connectors/base.py
================================================
"""
Base connector abstraction.

A connector is a very thin wrapper-class that owns a *connection manager*
(e.g. AioHttpConnectionManager, AsyncContextConnectionManager, …).
It exposes a unified `connect / disconnect / is_connected` lifecycle and
defines an abstract `request()` method which concrete back-ends must
implement.
"""
import asyncio
from abc import ABC, abstractmethod
from typing import Any, Generic, TypeVar, Type
from pydantic import BaseModel
from ..task_managers import BaseConnectionManager

T = TypeVar("T")        # The object returned by manager.start(): session / connection


class BaseConnector(ABC, Generic[T]):
    """
    Generic connector that delegates the heavy lifting to the supplied
    *connection manager*. Concrete subclasses only need to implement
    their own `request()` method.
    """

    def __init__(self, connection_manager: BaseConnectionManager[T]):
        self._connection_manager = connection_manager        # e.g. AioHttpConnectionManager instance
        # The raw connection object returned by the manager, for reusing the established long-term connection
        self._connection: T | None = None
        self._connected = False

    async def connect(self) -> None:
        """Create the underlying session/connection via the manager."""
        if self._connected:
            return
        
        try:
            # Hook: before connection
            await self._before_connect()
            
            # Start the connection manager
            self._connection = await self._connection_manager.start()
            
            # Hook: after connection established
            await self._after_connect()
            
            # Mark as connected
            self._connected = True
        except Exception:
            # Clean up on failure
            await self._cleanup_on_connect_failure()
            raise

    async def disconnect(self) -> None:
        """Close the session/connection and reset state.
        
        Ensures proper cleanup of all resources including aiohttp sessions.
        """
        if not self._connected:
            return
        
        # Hook: before disconnection
        await self._before_disconnect()
        
        # Stop the connection manager
        if self._connection_manager:
            await self._connection_manager.stop()
            self._connection = None
        
        # Hook: after disconnection
        await self._after_disconnect()
        
        self._connected = False

    async def _before_connect(self) -> None:
        """Hook called before establishing connection. Override in subclasses if needed."""
        pass

    async def _after_connect(self) -> None:
        """Hook called after connection is established. Override in subclasses if needed."""
        pass

    async def _cleanup_on_connect_failure(self) -> None:
        """Hook called when connection fails. Override in subclasses if needed."""
        if self._connection_manager:
            try:
                await self._connection_manager.stop()
            except Exception:
                pass
        self._connection = None

    async def _before_disconnect(self) -> None:
        """Hook called before disconnection. Override in subclasses if needed."""
        pass

    async def _after_disconnect(self) -> None:
        """Hook called after disconnection. Override in subclasses if needed."""
        pass

    @property
    def is_connected(self) -> bool:
        """Return True iff `connect()` has completed successfully."""
        return self._connected

    @staticmethod
    def _to_json_compatible(obj: Any) -> Any:
        """
        Convert a Pydantic BaseModel to a JSON-serialisable dict (by_alias=True).
        Leave all other types unchanged.
        """
        if isinstance(obj, BaseModel):
            return obj.model_dump(by_alias=True)
        return obj

    @staticmethod
    def _parse_as(data: Any, model_cls: "Type[BaseModel] | None" = None) -> Any:
        """
        Try to parse *data* into *model_cls* (a subclass of BaseModel).  
        If `model_cls` is None or not a subclass of BaseModel, return the original data.
        """
        if model_cls is None:
            return data
        if isinstance(model_cls, type) and issubclass(model_cls, BaseModel):
            return model_cls.model_validate(data)
        return data
    
    @abstractmethod
    async def invoke(self, name: str, params: dict[str, Any]) -> Any:
        """
        Unified RPC entry for all tools.
        Sub-class maps this to its actual RPC like call_tool / run_cmd.
        """
        raise NotImplementedError

    @abstractmethod
    async def request(self, *args: Any, **kwargs: Any) -> Any:
        """Abstract RPC / HTTP / WS request method to be implemented by child classes."""
        raise NotImplementedError("This connector has not implemented 'request'")

================================================
FILE: anytool/grounding/core/transport/task_managers/__init__.py
================================================
from .base import BaseConnectionManager
from .aiohttp_connection_manager import AioHttpConnectionManager
from .async_ctx import AsyncContextConnectionManager
from .placeholder import PlaceholderConnectionManager
from .noop import NoOpConnectionManager

__all__ = [
    "BaseConnectionManager", 
    "AioHttpConnectionManager",
    "AsyncContextConnectionManager",
    "PlaceholderConnectionManager",
    "NoOpConnectionManager",
]

================================================
FILE: anytool/grounding/core/transport/task_managers/aiohttp_connection_manager.py
================================================
"""
Long-lived aiohttp ClientSession manager based on AsyncContextConnectionManager.

It keeps a single ClientSession open during the lifetime of a backend
session, saving the overhead of creating and closing a TCP connection
for every request.
"""
from typing import Optional
import aiohttp

from .async_ctx import AsyncContextConnectionManager


class AioHttpConnectionManager(
    AsyncContextConnectionManager[aiohttp.ClientSession, ...]
):
    """Manage a persistent aiohttp.ClientSession."""

    def __init__(
        self,
        base_url: str,
        headers: Optional[dict[str, str]] = None,
        timeout: float = 30,
    ):
        self.base_url = base_url.rstrip("/")
        timeout_cfg = aiohttp.ClientTimeout(total=timeout)
        super().__init__(
            aiohttp.ClientSession,
            timeout=timeout_cfg,
            headers=headers or {},
        )
        self._logger.debug(
            "Init AioHttpConnectionManager base_url=%s timeout=%s", self.base_url, timeout
        )

    async def _establish_connection(self) -> aiohttp.ClientSession:
        """Create and enter the aiohttp.ClientSession context."""
        session = await super()._establish_connection()
        self._logger.debug("aiohttp ClientSession created")
        return session

    async def _close_connection(self) -> None:
        """Close the session and then call the parent cleanup.
        
        Ensures proper cleanup even if close() fails.
        """
        if self._ctx:
            try:
                self._logger.debug("Closing aiohttp ClientSession")
                await self._ctx.close()
                # Give aiohttp time to finish its internal cleanup callbacks
                import asyncio
                await asyncio.sleep(0.1)
            except Exception as e:
                self._logger.warning(f"Error closing aiohttp ClientSession: {e}")
        await super()._close_connection()

================================================
FILE: anytool/grounding/core/transport/task_managers/async_ctx.py
================================================
"""
Generic connection manager based on an *async context manager*.
Give it any factory that returns an async–context-manager.  
"""
import sys
from typing import Any, Callable, Generic, Optional, ParamSpec, TypeVar
from .base import BaseConnectionManager

# BaseExceptionGroup only exists in Python 3.11+
if sys.version_info >= (3, 11):
    _BaseExceptionGroup = BaseExceptionGroup
else:
    # Dummy class for older Python versions
    class _BaseExceptionGroup(Exception):
        pass

T = TypeVar("T")                # Return type of the async context
P = ParamSpec("P")              # Parameter specification of the factory


class AsyncContextConnectionManager(Generic[T, P], BaseConnectionManager[T]):
    def __init__(self,
                 ctx_factory: Callable[P, Any],
                 *args: P.args,
                 **kwargs: P.kwargs):
        super().__init__()
        self._factory = ctx_factory
        self._factory_args = args
        self._factory_kwargs = kwargs
        self._ctx: Optional[Any] = None 

    async def _establish_connection(self) -> T:
        """Create the context manager and enter it."""
        self._logger.debug("Creating context via %s", self._factory.__name__)
        try:
            self._ctx = self._factory(*self._factory_args, **self._factory_kwargs)
            result: T = await self._ctx.__aenter__()
            self._logger.debug("Context %s entered successfully", self._factory.__name__)
            return result
        except Exception as e:
            # Check if this is a benign ExceptionGroup/TaskGroup error
            # These occur during concurrent initialization and cleanup
            error_msg = str(e).lower()
            is_taskgroup_error = (
                "unhandled errors in a taskgroup" in error_msg or
                "cancel scope in a different task" in error_msg or
                "exceptiongroup" in type(e).__name__.lower()
            )
            
            if is_taskgroup_error:
                # This is a benign race condition during concurrent connection setup
                # Log at debug level and re-raise to trigger retry logic
                self._logger.debug(
                    f"Benign TaskGroup race condition during {self._factory.__name__} connection: {type(e).__name__}"
                )
                # Clean up the partially created context
                if self._ctx is not None:
                    try:
                        await self._ctx.__aexit__(None, None, None)
                    except Exception:
                        pass  # Ignore cleanup errors
                    self._ctx = None
                raise
            else:
                # Real error - log at error level
                self._logger.error(f"Error establishing connection via {self._factory.__name__}: {e}")
                raise

    async def _close_connection(self) -> None:
        """Exit the context manager if it exists.
        
        Uses try-finally to ensure ctx is cleared even if __aexit__ fails.
        This prevents resource leaks when cleanup encounters errors.
        """
        if self._ctx is not None:
            try:
                self._logger.debug("Exiting context %s", self._factory.__name__)
                
                # Give subprocesses a moment to flush buffers before closing
                import asyncio
                await asyncio.sleep(0.05)
                
                # Try to exit the context, but catch all possible exceptions
                try:
                    await self._ctx.__aexit__(None, None, None)
                except BaseException as e:
                    # Catch absolutely everything including SystemExit, KeyboardInterrupt, etc.
                    # Check if it's a benign error
                    benign_error_types = (
                        BrokenPipeError, ConnectionResetError, ValueError, 
                        OSError, IOError, ProcessLookupError, RuntimeError,
                        GeneratorExit
                    )
                    
                    is_benign = False
                    
                    # Check direct exception type
                    if isinstance(e, benign_error_types):
                        is_benign = True
                    # Check for BaseExceptionGroup (Python 3.11+)
                    elif hasattr(e, 'exceptions'):
                        # It's an exception group, check all sub-exceptions
                        is_benign = all(isinstance(sub_e, benign_error_types) for sub_e in e.exceptions)
                    
                    if is_benign:
                        self._logger.debug(f"Benign cleanup error for {self._factory.__name__}: {type(e).__name__}")
                    else:
                        self._logger.warning(f"Error during context exit for {self._factory.__name__}: {type(e).__name__}: {e}")
                    
                    # Don't re-raise - we want cleanup to complete
                    
            except Exception as e:
                # Catch any other unexpected errors in the outer try block
                self._logger.warning(f"Unexpected error during cleanup for {self._factory.__name__}: {e}")
            finally:
                self._ctx = None   

================================================
FILE: anytool/grounding/core/transport/task_managers/base.py
================================================
"""
Base connection manager for all backend connectors.

This module provides an abstract base class for different types of connection
managers used in all backend connectors.

Flow: start() → launch_connection_task() → call subclass _establish_connection() → notify ready → maintain connection until stop() → call subclass _close_connection() → cleanup
"""
import asyncio
from abc import ABC, abstractmethod
from typing import Generic, TypeVar
from anytool.utils.logging import Logger

T = TypeVar("T")


class BaseConnectionManager(Generic[T], ABC):
    """Abstract base class for connection managers.

    This class defines the interface for different types of connection managers
    used with all backend connectors.
    """

    def __init__(self):
        """Initialize a new connection manager."""
        self._ready_event = asyncio.Event()
        self._done_event = asyncio.Event()
        self._exception: Exception | None = None
        self._connection: T | None = None
        self._task: asyncio.Task | None = None
        self._logger = Logger.get_logger(f"{__name__}.{self.__class__.__name__}")

    @abstractmethod
    async def _establish_connection(self) -> T:
        """Establish the connection.

        This method should be implemented by subclasses to establish
        the specific type of connection needed.

        Returns:
            The established connection.

        Raises:
            Exception: If connection cannot be established.
        """
        pass

    @abstractmethod
    async def _close_connection(self) -> None:
        """Close the connection.

        This method should be implemented by subclasses to close
        the specific type of connection.

        """
        pass

    async def start(self, timeout: float | None = None) -> T:
        """Start the connection manager and establish a connection.

        Args:
            timeout: Optional timeout in seconds. If None, waits indefinitely.
                     If specified, will cancel the background task on timeout.

        Returns:
            The established connection.

        Raises:
            TimeoutError: If connection establishment times out.
            Exception: If connection cannot be established.
        """
        # Reset state
        self._ready_event.clear()
        self._done_event.clear()
        self._exception = None

        # Create a task to establish and maintain the connection
        self._task = asyncio.create_task(self._connection_task(), name=f"{self.__class__.__name__}_task")

        # Wait for the connection to be ready or fail (with optional timeout)
        try:
            if timeout is not None:
                await asyncio.wait_for(self._ready_event.wait(), timeout=timeout)
            else:
                await self._ready_event.wait()
        except asyncio.TimeoutError:
            # Timeout! Cancel the background task
            self._logger.warning(f"Connection establishment timed out after {timeout}s, cancelling...")
            if self._task and not self._task.done():
                self._task.cancel()
                try:
                    await asyncio.wait_for(self._task, timeout=2.0)  # Give it 2s to cleanup
                except (asyncio.CancelledError, asyncio.TimeoutError):
                    pass
                except Exception as e:
                    self._logger.debug(f"Error during task cancellation: {e}")
            raise TimeoutError(f"Connection establishment timed out after {timeout}s")

        # If there was an exception, raise it
        if self._exception:
            # Check if this is a benign TaskGroup race condition
            error_msg = str(self._exception).lower()
            is_benign_taskgroup_error = (
                "unhandled errors in a taskgroup" in error_msg or
                "cancel scope in a different task" in error_msg or
                "exceptiongroup" in type(self._exception).__name__.lower()
            )
            
            if is_benign_taskgroup_error:
                # Log as debug - this is expected and will be retried
                self._logger.debug(f"Benign TaskGroup race condition, will retry: {type(self._exception).__name__}")
            else:
                # Real error - log at error level
                self._logger.error(f"Failed to start connection: {self._exception}")
            
            raise self._exception

        # Return the connection
        if self._connection is None:
            error_msg = "Connection was not established"
            self._logger.error(error_msg)
            raise RuntimeError(error_msg)
            
        self._logger.info("Connection manager started successfully")
        return self._connection

    async def stop(self, timeout: float = 5.0) -> None:
        """Stop the connection manager and close the connection.
        
        Args:
            timeout: Maximum time to wait for cleanup (default 5s).
        
        Ensures all async resources (including aiohttp sessions) are properly closed.
        """
        if self._task and not self._task.done():
            self._task.cancel()
            try:
                await asyncio.wait_for(self._task, timeout=timeout)
            except asyncio.TimeoutError:
                self._logger.warning(f"Task cleanup timed out after {timeout}s")
            except asyncio.CancelledError:
                pass  # Expected
            except Exception as e:
                self._logger.warning(f"Error stopping task: {e}")

        # Wait for the connection to be done (with timeout)
        try:
            await asyncio.wait_for(self._done_event.wait(), timeout=timeout)
        except asyncio.TimeoutError:
            self._logger.warning(f"Done event wait timed out after {timeout}s")
        
        self._logger.info("Connection manager stopped")

    def get_streams(self) -> T | None:
        """Get the current connection streams.

        Returns:
            The current connection (typically a tuple of read_stream, write_stream) or None if not connected.
        """
        return self._connection

    async def _connection_task(self) -> None:
        """Run the connection task.

        This task establishes and maintains the connection until cancelled.
        """
        try:
            # Establish the connection
            self._connection = await self._establish_connection()
            self._logger.debug("Connection established")

            # Signal that the connection is ready
            self._ready_event.set()

            # Wait indefinitely until cancelled
            try:
                await asyncio.Event().wait()
            except asyncio.CancelledError:
                raise

        except asyncio.CancelledError:
            raise
        except Exception as e:
            # Store the exception
            self._exception = e
            
            # Check if this is a benign TaskGroup race condition
            error_msg = str(e).lower()
            is_benign_taskgroup_error = (
                "unhandled errors in a taskgroup" in error_msg or
                "cancel scope in a different task" in error_msg or
                "exceptiongroup" in type(e).__name__.lower()
            )
            
            if is_benign_taskgroup_error:
                # Log as debug - this is expected during concurrent connection setup
                self._logger.debug(f"Benign TaskGroup race condition in connection task: {type(e).__name__}")
            else:
                # Real error - log at error level
                self._logger.error(f"Connection task failed: {e}")
            
            # Signal that the connection is ready (with error)
            self._ready_event.set()

        finally:
            # Close the connection if it was established
            if self._connection is not None:
                try:
                    await self._close_connection()
                except Exception as e:
                    self._logger.warning(f"Error closing connection: {e}")
                self._connection = None

            # Signal that the connection is done
            self._done_event.set()

================================================
FILE: anytool/grounding/core/transport/task_managers/noop.py
================================================
"""No-op connection manager for local (in-process) connectors.

Local connectors execute commands directly via subprocess, so they don't
need a real network connection. This manager satisfies the
BaseConnectionManager interface that BaseConnector requires.
"""
import asyncio
from typing import Any
from .base import BaseConnectionManager


class NoOpConnectionManager(BaseConnectionManager[Any]):
    """Connection manager that immediately reports 'ready' without
    establishing any real connection.
    
    Used by LocalShellConnector and LocalGUIConnector.
    """

    async def _establish_connection(self) -> Any:
        """No-op: return a sentinel value."""
        return True

    async def _close_connection(self) -> None:
        """No-op: nothing to close."""
        pass


================================================
FILE: anytool/grounding/core/transport/task_managers/placeholder.py
================================================
from typing import Any
from .base import BaseConnectionManager


class PlaceholderConnectionManager(BaseConnectionManager[Any]):
    """A placeholder connection manager that does nothing.
    
    This is used by connectors that set up their real connection manager
    during the connect() phase.
    """
    
    async def _establish_connection(self) -> Any:
        """Establish the connection (placeholder implementation)."""
        raise NotImplementedError("PlaceholderConnectionManager should be replaced before use")
    
    async def _close_connection(self) -> None:
        """Close the connection (placeholder implementation)."""
        pass

================================================
FILE: anytool/grounding/core/types.py
================================================
from enum import Enum
from datetime import datetime
from typing import Any, Dict, Generic, List, TypeVar, Optional
import jsonschema
from pydantic import BaseModel, Field, ConfigDict

# Pydantic v2 compatibility
try:
    from pydantic import RootModel
    PYDANTIC_V2 = True
except ImportError:
    PYDANTIC_V2 = False


class BackendType(str, Enum):
    MCP = "mcp"
    SHELL = "shell"
    WEB = "web"
    GUI = "gui"
    SYSTEM = "system"
    NOT_SET = "not_set"


class ToolStatus(str, Enum):
    SUCCESS = "success"
    ERROR = "error"


class SessionStatus(str, Enum):
    CONNECTED = "connected"
    DISCONNECTED = "disconnected"
    CONNECTING = "connecting"
    
    
ProgressToken = str | int
RequestId = str | int

RequestParamsT = TypeVar("RequestParamsT", bound=BaseModel | Dict[str, Any] | None)
NotificationParamsT = TypeVar("NotificationParamsT", bound=BaseModel | Dict[str, Any] | None)
MethodT = TypeVar("MethodT", bound=str)


class BaseEntity(BaseModel):
    metadata: Dict[str, Any] = Field(default_factory=dict)
    model_config = ConfigDict(extra="allow")


class JsonRpcBase(BaseEntity):
    jsonrpc: str = "2.0"


class RpcMessage(JsonRpcBase, Generic[MethodT, RequestParamsT]):
    method: MethodT
    params: RequestParamsT


class Request(RpcMessage[MethodT, RequestParamsT]):
    id: RequestId | None = None  # id is None means Notification


class Notification(RpcMessage[MethodT, NotificationParamsT]):
    pass


class Result(JsonRpcBase):
    pass


class ErrorData(BaseEntity):
    code: int
    message: str
    data: Any | None = None


class ToolResult(Result):
    """Tool execution result"""
    status: ToolStatus
    content: Any = ""
    error: ErrorData | str | None = None
    execution_time: float | None = None

    @property
    def is_success(self) -> bool: return self.status == ToolStatus.SUCCESS
    
    @property
    def is_error(self) -> bool: return self.status == ToolStatus.ERROR


class SecurityPolicy(BaseEntity):
    allow_shell_commands: bool = True
    allow_network_access: bool = True
    allow_file_access: bool = True
    allowed_domains: List[str] = Field(default_factory=list)
    blocked_commands: List[str] = Field(default_factory=list)
    sandbox_enabled: bool = False
    
    @classmethod
    def from_dict(cls, data: Dict) -> "SecurityPolicy":
        """
        Create SecurityPolicy from configuration dict.
        
        Supports two formats for blocked_commands:
        1. List format (applies to all OS): ["cmd1", "cmd2"]
        2. Dict format (OS-specific):
           {
               "common": ["cmd1", "cmd2"],
               "linux": ["cmd3"],
               "darwin": ["cmd4"],
               "windows": ["cmd5"]
           }
        
        When using dict format, merges 'common' commands with current OS-specific commands.
        """
        import sys
        import platform
        
        processed_data = {}
        for k, v in data.items():
            if k not in cls.model_fields:
                continue
            
            # Special handling for blocked_commands
            if k == "blocked_commands":
                if isinstance(v, dict):
                    # Dict format: merge common + OS-specific
                    blocked_list = list(v.get("common", []))
                    
                    # Determine current OS
                    system = sys.platform
                    if system.startswith("linux"):
                        os_key = "linux"
                    elif system == "darwin":
                        os_key = "darwin"
                    elif system.startswith("win"):
                        os_key = "windows"
                    else:
                        os_key = None
                    
                    # Merge OS-specific commands
                    if os_key and os_key in v:
                        blocked_list.extend(v[os_key])
                    
                    processed_data[k] = blocked_list
                elif isinstance(v, list):
                    # List format: use as-is
                    processed_data[k] = v
                else:
                    # Invalid format, use empty list
                    processed_data[k] = []
            else:
                processed_data[k] = v
        
        return cls(**processed_data)

    def check(self, *, command: str | None = None, domain: str | None = None) -> bool:
        """
        return True if allowed, False if denied.
        Command check uses token-level matching to prevent simple space/escape bypasses.
        """
        import shlex

        # Shell / Python command check
        if command:
            if not self.allow_shell_commands:
                return False

            tokens = [t.lower() for t in shlex.split(command, posix=True)]
            blocked_set = {b.lower() for b in self.blocked_commands}
            if any(tok in blocked_set for tok in tokens):
                return False

        # Network access check
        if domain:
            if not self.allow_network_access:
                return False
            if self.allowed_domains and domain not in self.allowed_domains:
                return False

        return True

    def find_dangerous_tokens(self, command: str) -> List[str]:
        """
        Find and return all dangerous tokens in the command.
        Returns empty list if no dangerous tokens found.
        """
        import shlex
        
        if not command:
            return []
        
        try:
            tokens = [t.lower() for t in shlex.split(command, posix=True)]
        except ValueError:
            # If shlex.split fails, fall back to simple split
            tokens = [t.lower() for t in command.split()]
        
        blocked_set = {b.lower() for b in self.blocked_commands}
        dangerous = [tok for tok in tokens if tok in blocked_set]
        
        return dangerous


class ToolSchema(BaseEntity):
    name: str
    description: str | None = None
    parameters: Dict[str, Any] = Field(default_factory=dict)  # JSON Schema, optional
    return_schema: Dict[str, Any] = Field(default_factory=dict)
    examples: List[dict] = Field(default_factory=list)
    usage_hint: str | None = None
    latency_hint: str | None = None
    backend_type: BackendType
    security_policy: SecurityPolicy | None = None

    def validate_parameters(self, params: Dict[str, Any], *, raise_exc: bool = False) -> bool:
        """use jsonschema to validate parameters
        
        Returns True if parameters are valid or if tool has no parameters.
        """
        # If tool has no parameters defined and no parameters are provided, validation passes
        if not self.parameters and not params:
            return True
        
        # If tool has no parameters defined but parameters are provided, validation fails
        if not self.parameters and params:
            if raise_exc:
                raise ValueError(f"Tool '{self.name}' does not accept any parameters, but got: {list(params.keys())}")
            return False
        
        try:
            jsonschema.validate(params, self.parameters)
            return True
        except jsonschema.ValidationError:
            if raise_exc:
                raise
            return False

    def is_allowed(self, *, command: str | None = None, domain: str | None = None) -> bool:
        """check security policy"""
        return self.security_policy.check(command=command, domain=domain) if self.security_policy else True


class SessionConfig(BaseEntity):
    session_name: str
    backend_type: BackendType
    connection_params: Dict[str, Any] = Field(default_factory=dict)
    timeout: int = 30
    max_retries: int = 3
    auto_reconnect: bool = True
    auto_connect: bool = True
    health_check_interval: int = 5
    custom_settings: Dict[str, Any] = Field(default_factory=dict)


class SessionInfo(SessionConfig):
    status: SessionStatus
    created_at: datetime
    last_activity: datetime


class SandboxOptions(BaseEntity):
    api_key: str
    """Direct API key for sandbox provider (e.g., E2B API key).
    If not provided, will use E2B_API_KEY environment variable."""
    
    sandbox_template_id: Optional[str] = None
    """Template ID for the sandbox environment.
    Default: 'base'"""

    supergateway_command: Optional[str] = None
    """Command to run supergateway.
    Default: 'npx -y supergateway'"""


# ClientMessage: Only available in Pydantic v2
if PYDANTIC_V2:
    class ClientMessage(
        RootModel[
            Request[Any, str] | Notification[Any, str]
        ]
    ):
        """
        Unified deserialization entry: `ClientMessage.model_validate_json(raw_bytes)`
        """
else:
    # Pydantic v1 fallback: not used in current codebase
    ClientMessage = None  # type: ignore

================================================
FILE: anytool/llm/__init__.py
================================================
from .client import LLMClient

================================================
FILE: anytool/llm/client.py
================================================
import litellm
import json
import asyncio
import time
from typing import List, Sequence, Union, Dict, Optional
from dotenv import load_dotenv
from openai.types.chat import ChatCompletionToolParam

from anytool.grounding.core.types import ToolSchema, ToolResult, ToolStatus
from anytool.grounding.core.tool import BaseTool
from anytool.utils.logging import Logger

load_dotenv()

# Disable LiteLLM verbose logging to prevent stdout blocking with large tool schemas
litellm.set_verbose = False
litellm.suppress_debug_info = True

logger = Logger.get_logger(__name__)


def _sanitize_schema(params: Dict) -> Dict:
    """Sanitize tool parameter schema to comply with Claude API requirements.
    
    Fixes common issues:
    - Empty object schemas (no properties, no required)
    - Missing required fields for Claude compatibility
    """
    if not params:
        return {"type": "object", "properties": {}, "required": []}
    
    # Deep copy to avoid modifying the original
    import copy
    sanitized = copy.deepcopy(params)
    
    # Anthropic API requires top-level type to be 'object'
    # If it's not an object, wrap the schema as a property of an object
    top_level_type = sanitized.get("type")
    if top_level_type and top_level_type != "object":
        # Wrap non-object schema as a single property called "value"
        logger.debug(f"[SCHEMA_SANITIZE] Wrapping non-object schema (type={top_level_type}) into object")
        wrapped = {
            "type": "object",
            "properties": {
                "value": sanitized  # The original schema becomes a property
            },
            "required": ["value"]  # Make it required
        }
        sanitized = wrapped
    
    # If type is object but missing properties/required, add them
    if sanitized.get("type") == "object":
        if "properties" not in sanitized:
            sanitized["properties"] = {}
        if "required" not in sanitized:
            sanitized["required"] = []
    
    # Remove non-standard fields that may cause issues (like 'title')
    sanitized.pop("title", None)
    
    # Recursively sanitize nested properties
    if "properties" in sanitized and isinstance(sanitized["properties"], dict):
        for prop_name, prop_schema in list(sanitized["properties"].items()):
            if isinstance(prop_schema, dict):
                # Remove title from nested properties
                prop_schema.pop("title", None)
    
    return sanitized


def _schema_to_openai(schema: ToolSchema) -> ChatCompletionToolParam:
    """Convert ToolSchema to OpenAI ChatCompletion tool format"""
    function_def = {
        "name": schema.name,
        "description": schema.description or "",
    }
    
    # Sanitize and add parameters
    if schema.parameters:
        sanitized = _sanitize_schema(schema.parameters)
        function_def["parameters"] = sanitized
        # Debug: verify sanitization worked
        if "title" in schema.parameters and "title" not in sanitized:
            logger.debug(f"Sanitized tool '{schema.name}': removed title")
    else:
        # Claude requires parameters field even if empty
        function_def["parameters"] = {"type": "object", "properties": {}, "required": []}
    
    return { 
        "type": "function",
        "function": function_def
    }
    
def _prepare_tools_for_llmclient(
    tools: List[BaseTool] | None,
    fmt: str = "openai",
) -> tuple[Sequence[Union[ToolSchema, ChatCompletionToolParam]], Dict[str, BaseTool]]:
    """Convert BaseTool list to LLMClient usable format, with deduplication.
    
    Args:
        tools: BaseTool instance list (should be obtained from GroundingClient and bound to runtime_info)
                if None or empty list, return empty list
        fmt: output format, "openai" for OpenAI format
    """
    if not tools:
        return [], {}
    
    if fmt == "openai":
        result = []
        tool_map = {}  # llm_name -> BaseTool
        name_count = {}
        
        for tool in tools:
            name = tool.schema.name
            name_count[name] = name_count.get(name, 0) + 1
        

        seen_names = set()
        for tool in tools:
            original_name = tool.schema.name
            
            if name_count[original_name] > 1:
                server_name = "unknown"
                if tool.is_bound and tool.runtime_info and tool.runtime_info.server_name:
                    server_name = tool.runtime_info.server_name
                llm_name = f"{server_name}__{original_name}"
            else:
                llm_name = original_name
            
            if llm_name in seen_names:
                logger.warning(f"[TOOL_DEDUP] Skipping duplicate tool: {llm_name}")
                continue
            seen_names.add(llm_name)
            
            tool_param = _schema_to_openai(tool.schema)
            tool_param["function"]["name"] = llm_name 
            result.append(tool_param)
            
            tool_map[llm_name] = tool
            
            if llm_name != original_name:
                logger.info(f"[TOOL_RENAME] {original_name} -> {llm_name}")
        
        logger.info(f"[SCHEMA_SANITIZE] Prepared {len(result)} tools for LLM (from {len(tools)} total)")
        return result, tool_map
    
    tool_map = {tool.schema.name: tool for tool in tools}
    return [tool.schema for tool in tools], tool_map

DEFAULT_SUMMARIZE_THRESHOLD_CHARS = 200000  # ~50K tokens, lowered from 400K to prevent context overflow
MAX_TOOL_RESULT_CHARS = 200000  # Fallback truncation limit when summarization fails (~50K tokens)

async def _summarize_tool_result(
    content: str,
    tool_name: str,
    task: str = "",
    model: str = "openrouter/anthropic/claude-sonnet-4.5",
    timeout: float = 60.0
) -> str:
    """Use LLM to summarize large tool results."""
    try:
        logger.info(f"Summarizing tool result from '{tool_name}': {len(content):,} chars")
        
        # Pre-truncate if content is too large for the model (leave room for prompt + output)
        # Assuming ~4 chars per token, 200K tokens limit, 8K output, ~500 tokens for prompt
        # Safe input limit: (200K - 8K - 0.5K) * 4 = ~766K chars, but be conservative at 400K
        max_input_chars = 400000
        if len(content) > max_input_chars:
            logger.warning(f"Pre-truncating content for summarization: {len(content):,} -> {max_input_chars:,} chars")
            content = content[:max_input_chars] + f"\n\n[TRUNCATED for summarization: original was {len(content):,} chars]"
        
        task_hint = f"\n\nUser's task: {task}\nSummarize with focus on information relevant to this task." if task else ""
        
        prompt = f"""Tool '{tool_name}' returned a large result ({len(content):,} chars). Summarize it concisely.{task_hint}

**Guidelines:**
- Structured data (coordinates, steps, etc.): Keep key summary (totals, start/end), omit repetitive details.
- Markup content (HTML, XML): Extract text and key data only, ignore tags/scripts.
- Long documents: Keep structure outline and essential sections.
- Lists/arrays: Summarize count and most relevant items.
- Always preserve: numbers, URLs, file paths, IDs, key identifiers.

Content:
{content}

Concise summary:"""
        
        response = await asyncio.wait_for(
            litellm.acompletion(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                timeout=timeout
            ),
            timeout=timeout + 5
        )
        
        summary = response.choices[0].message.content.strip()
        result = f"[SUMMARY of {len(content):,} chars]\n{summary}"
        
        logger.info(f"Tool result summarized: {len(content):,} -> {len(result):,} chars")
        return result
        
    except Exception as e:
        logger.warning(f"Summarization failed for '{tool_name}': {e}")
        return None


async def _tool_result_to_message_async(
    result: ToolResult, 
    *, 
    tool_call_id: str, 
    tool_name: str,
    task: str = "",
    summarize_threshold: int = DEFAULT_SUMMARIZE_THRESHOLD_CHARS,
    summarize_model: str = "openrouter/anthropic/claude-sonnet-4.5",
    enable_summarization: bool = True
) -> Dict:
    """Convert ToolResult to LLMClient usable message format with LLM summarization for large results.

    Args:
        result: Tool execution result
        tool_call_id: OpenAI tool_call ID
        tool_name: Tool name
        task: User's original task for context-aware summarization
        summarize_threshold: If content exceeds this, use LLM summarization
        summarize_model: Model to use for summarization
        enable_summarization: Whether to enable LLM summarization
        
    Returns:
        OpenAI ChatCompletion tool message (text only)
    """
    if result.is_error:
        text_content = f"[ERROR] {result.error or 'unknown error'}"
    else:
        text_content = (
            result.content
            if isinstance(result.content, str)
            else json.dumps(result.content, ensure_ascii=False, default=str)
        )
    
    original_len = len(text_content)
    
    # Use LLM summarization if content exceeds threshold
    if original_len > summarize_threshold and enable_summarization:
        summary = await _summarize_tool_result(text_content, tool_name, task, summarize_model)
        if summary:
            text_content = summary
        elif original_len > MAX_TOOL_RESULT_CHARS:
            # Fallback: truncate if summarization failed and content is too large
            truncate_msg = f"\n\n[TRUNCATED: Original content was {original_len:,} chars, showing first {MAX_TOOL_RESULT_CHARS:,}]"
            text_content = text_content[:MAX_TOOL_RESULT_CHARS - len(truncate_msg)] + truncate_msg
            logger.warning(f"Tool result truncated for '{tool_name}': {original_len:,} -> {len(text_content):,} chars (summarization failed)")
    
    return {
        "role": "tool",
        "name": tool_name,
        "content": text_content,
        "tool_call_id": tool_call_id,
    }

async def _execute_tool_call(
    tool: BaseTool,
    openai_tool_call: Dict,
) -> ToolResult:
    """Execute LLMClient returned tool_call

    Args:
        tool: BaseTool instance (must be obtained from GroundingClient and bound to runtime_info)
        openai_tool_call: LLMClient usable tool_call object, contains id, type, function etc. fields
    """
    if not tool.is_bound:
        raise ValueError(
            f"Tool '{tool.schema.name}' is not bound to runtime_info. "
            f"Please ensure tools are obtained from GroundingClient.list_tools() "
            f"with bind_runtime_info=True"
        )
    
    func = openai_tool_call["function"]
    arguments = func.get("arguments", "{}")
    if isinstance(arguments, str):
        arguments = json.loads(arguments or "{}")
    
    # Filter out parameters that are not in the tool's schema
    if isinstance(arguments, dict) and tool.schema.parameters:
        # Get valid parameter names from tool schema (JSON Schema format)
        schema_params = tool.schema.parameters
        valid_params = set()
        
        if isinstance(schema_params, dict) and "properties" in schema_params:
            valid_params = set(schema_params["properties"].keys())
        
        # Check for invalid parameters
        invalid_params = []
        for param_name in list(arguments.keys()):
            if param_name == "skip_visual_analysis":
                invalid_params.append(param_name)
                continue
            
            # Check if parameter is in the tool's schema
            if valid_params and param_name not in valid_params:
                invalid_params.append(param_name)
        
        # Remove invalid parameters
        for param in invalid_params:
            arguments.pop(param)
            logger.debug(
                f"Removed parameter '{param}' from {tool.schema.name} "
                f"(not in tool schema)"
            )

    return await tool.invoke(
        parameters=arguments,
        keep_session=True
    )


class LLMClient:
    """LLMClient class for single round call"""
    def __init__(
        self, 
        model: str = "openrouter/anthropic/claude-sonnet-4.5", 
        enable_thinking: bool = False,
        rate_limit_delay: float = 0.0,
        max_retries: int = 3,
        retry_delay: float = 1.0,
        timeout: float = 120.0,
        summarize_threshold_chars: int = DEFAULT_SUMMARIZE_THRESHOLD_CHARS,
        enable_tool_result_summarization: bool = True,
        **litellm_kwargs
    ):
        """
        Args:
            model: LLM model identifier
            enable_thinking: Whether to enable extended thinking mode
            rate_limit_delay: Minimum delay between API calls in seconds (0 = no delay)
            max_retries: Maximum number of retries on rate limit errors
            retry_delay: Initial delay between retries in seconds (exponential backoff)
            timeout: Request timeout in seconds (default: 120s)
            summarize_threshold_chars: If tool result exceeds this threshold, use LLM to 
                                       summarize the result (default: 50000 chars ≈ 12.5K tokens)
            enable_tool_result_summarization: Whether to enable LLM-based summarization for 
                                              large tool results (default: True)
            **litellm_kwargs: Additional litellm parameters
        """
        self.model = model
        self.enable_thinking = enable_thinking
        self.rate_limit_delay = rate_limit_delay
        self.max_retries = max_retries
        self.retry_delay = retry_delay
        self.timeout = timeout
        self.summarize_threshold_chars = summarize_threshold_chars
        self.enable_tool_result_summarization = enable_tool_result_summarization
        self.litellm_kwargs = litellm_kwargs
        self._logger = Logger.get_logger(__name__)
        self._last_call_time = 0.0
    
    async def _rate_limit(self):
        """Apply rate limiting by adding delay between API calls"""
        if self.rate_limit_delay > 0:
            current_time = time.time()
            time_since_last_call = current_time - self._last_call_time
            
            if time_since_last_call < self.rate_limit_delay:
                sleep_time = self.rate_limit_delay - time_since_last_call
                self._logger.debug(f"Rate limiting: waiting {sleep_time:.2f}s before next API call")
                await asyncio.sleep(sleep_time)
            
            self._last_call_time = time.time()
    
    async def _call_with_retry(self, **completion_kwargs):
        """Call LLM with backoff retry on rate limit errors
        
        Timeout and retry strategy:
        - Single call timeout: self.timeout (default 120s)
        - Rate limit retry delays: 60s, 90s, 120s
        - Total max time: timeout * max_retries + sum(retry_delays)
        """
        last_exception = None
        
        for attempt in range(self.max_retries):
            try:
                # Add timeout to the completion call
                response = await asyncio.wait_for(
                    litellm.acompletion(**completion_kwargs),
                    timeout=self.timeout
                )
                return response
            except asyncio.TimeoutError:
                self._logger.error(
                    f"LLM call timed out after {self.timeout}s (attempt {attempt + 1}/{self.max_retries})"
                )
                last_exception = TimeoutError(f"LLM call timed out after {self.timeout}s")
                if attempt < self.max_retries - 1:
                    # Retry on timeout with shorter delay
                    self._logger.info(f"Retrying after {self.retry_delay}s delay...")
                    await asyncio.sleep(self.retry_delay)
                    continue
                else:
                    raise last_exception
            except Exception as e:
                last_exception = e
                error_str = str(e).lower()
                
                # Check if it's a retryable error
                is_rate_limit = any(
                    keyword in error_str 
                    for keyword in ['rate limit', 'rate_limit', 'too many requests', '429']
                )
                
                is_overloaded = any(
                    keyword in error_str
                    for keyword in ['overloaded', '500', '502', '503', '504', 'internal server error', 'service unavailable']
                )
                
                if attempt < self.max_retries - 1 and (is_rate_limit or is_overloaded):
                    # Determine backoff delay based on error type
                    if is_rate_limit:
                        # Use longer backoff for rate limits to cross rate limit windows
                        backoff_delay = 60 + (attempt * 30)  # 60s, 90s, 120s
                        error_type = "Rate limit"
                    else:  # is_overloaded
                        # Use exponential backoff for server errors
                        backoff_delay = min(5 * (2 ** attempt), 60)  # 5s, 10s, 20s, max 60s
                        error_type = "Server overload"
                    
                    self._logger.warning(
                        f"{error_type} error (attempt {attempt + 1}/{self.max_retries}), "
                        f"waiting {backoff_delay}s before retry..."
                    )
                    await asyncio.sleep(backoff_delay)
                    continue
                else:
                    # Not a retryable error, or max retries reached
                    if attempt >= self.max_retries - 1:
                        self._logger.error(f"Max retries ({self.max_retries}) reached, giving up")
                    raise
        
        raise last_exception
    
    async def complete(
        self,
        messages: List[Dict] | str, 
        tools: List[BaseTool] | None = None,
        execute_tools: bool = True,
        summary_prompt: Optional[str] = None,
        tool_result_callback: Optional[callable] = None,
        **kwargs
    ) -> Dict:
        """
        Single-round LLM call with optional tool execution.
        
        Args:
            messages: conversation history (List[Dict] for standard OpenAI format, or str for text format)
            tools: BaseTool instance list (must be obtained from GroundingClient and bound to runtime_info)
                if None or empty list, only perform conversation, no tools
            execute_tools: if LLM returns tool_calls, whether to automatically execute tools
            summary_prompt: Optional custom prompt for requesting iteration summary. 
                If provided, will request summary after tool execution.
                If None, no summary will be requested.
            tool_result_callback: Optional async callback to process tool results after execution.
                Signature: async def callback(result: ToolResult, tool_name: str, tool_call: Dict, backend: str) -> ToolResult
            **kwargs: additional parameters for litellm completion
        """
        # 1. Process messages
        if isinstance(messages, str):
            current_messages = [{"role": "user", "content": messages}]
            user_task = messages
        elif isinstance(messages, list):
            current_messages = messages.copy()
            # Extract first user message as task for context-aware summarization
            user_task = next(
                (m.get("content", "") for m in messages if m.get("role") == "user"),
                ""
            )
        else:
            raise ValueError("messages must be List[Dict] or str")
        
        # 2. prepare base litellm completion kwargs
        completion_kwargs = {
            "model": kwargs.get("model", self.model),
            **self.litellm_kwargs,
        }
        
        # Add thinking/reasoning_effort only if explicitly enabled and not using tools
        enable_thinking = kwargs.get("enable_thinking", self.enable_thinking)
        
        # 3. if tools are provided, add them to the request
        llm_tools = None
        tool_map = {}  # llm_name -> BaseTool
        if tools:
            llm_tools, tool_map = _prepare_tools_for_llmclient(tools, fmt="openai")
            if llm_tools:
                completion_kwargs["tools"] = llm_tools
                completion_kwargs["tool_choice"] = kwargs.get("tool_choice", "auto")
                # Disable thinking when using tools to avoid format conflicts
                enable_thinking = False
                self._logger.debug(f"Prepared {len(llm_tools)} tools for LLM")
            else:
                self._logger.warning("Tools provided but none could be prepared for LLM")
        
        # Add thinking parameters if enabled
        if enable_thinking:
            completion_kwargs["reasoning_effort"] = kwargs.get("reasoning_effort", "medium")
        
        # 4. Apply rate limiting
        await self._rate_limit()
        
        # 5. Call LLM with retry (single round)
        completion_kwargs["messages"] = current_messages
        response = await self._call_with_retry(**completion_kwargs)
        
        if not response.choices:
            raise ValueError("LLM response has no choices")
        
        response_message = response.choices[0].message
        
        # 6. Build assistant message
        assistant_message = {
            "role": "assistant",
            "content": response_message.content or "",
        }
        
        tool_calls = getattr(response_message, 'tool_calls', None)
        if tool_calls:
            assistant_message["tool_calls"] = [
                {
                    "id": tc.id,
                    "type": "function",
                    "function": {
                        "name": tc.function.name,
                        "arguments": tc.function.arguments
                    }
                }
                for tc in tool_calls
            ]
        
        # Add assistant message to conversation
        current_messages.append(assistant_message)
        
        # 7. Execute tools if requested
        tool_results = []
        if execute_tools and tool_calls and tools:
            self._logger.info(f"Executing {len(tool_calls)} tool calls...")
            
            for tool_call in tool_calls:
                tool_name = tool_call.function.name
                
                # Extract tool metadata and check visual analysis request
                tool_obj = tool_map.get(tool_name)
                backend = None
                server_name = None
                
                if tool_obj:
                    try:
                        # Prefer runtime_info if bound
                        if getattr(tool_obj, 'is_bound', False) and getattr(tool_obj, 'runtime_info', None):
                            backend = tool_obj.runtime_info.backend.value
                            server_name = tool_obj.runtime_info.server_name
                        else:
                            backend = tool_obj.backend_type.value if hasattr(tool_obj, 'backend_type') else None
                    except Exception:
                        pass
                
                # Log tool execution
                try:
                    if isinstance(tool_call.function.arguments, str):
                        safe_args_str = tool_call.function.arguments.strip() or "{}"
                        args = json.loads(safe_args_str)
                    else:
                        args = tool_call.function.arguments
                    
                    args_str = json.dumps(args, ensure_ascii=False)[:200]
                    self._logger.info(f"Calling {tool_name} with args: {args_str}")
                except:
                    pass
                
                if tool_name not in tool_map:
                    result = ToolResult(
                        status=ToolStatus.ERROR,
                        error=f"Tool '{tool_name}' not found"
                    )
                else:
                    try:
                        result = await _execute_tool_call(
                            tool=tool_map[tool_name],
                            openai_tool_call={
                                "id": tool_call.id,
                                "type": "function",
                                "function": {
                                    "name": tool_call.function.name,
                                    "arguments": tool_call.function.arguments
                                }
                            }
                        )

                        # Apply tool result callback if provided
                        if tool_result_callback and not result.is_error:
                            try:
                                result = await tool_result_callback(
                                    result=result,
                                    tool_name=tool_name,
                                    tool_call=tool_call,
                                    backend=backend
                                )
                            except Exception as e:
                                self._logger.warning(f"Tool result callback failed for {tool_name}: {e}")
                    except Exception as e:
                        result = ToolResult(
                            status=ToolStatus.ERROR,
                            error=str(e)
                        )
                
                # Use async version with LLM summarization for large results
                tool_message = await _tool_result_to_message_async(
                    result, 
                    tool_call_id=tool_call.id, 
                    tool_name=tool_name,
                    task=user_task,
                    summarize_threshold=self.summarize_threshold_chars,
                    summarize_model=self.model,
                    enable_summarization=self.enable_tool_result_summarization
                )
                current_messages.append(tool_message)
                
                # Store result
                tool_results.append({
                    "tool_call": tool_call,
                    "result": result,
                    "message": tool_message,
                    "backend": backend,
                    "server_name": server_name,
                })
            
            self._logger.info(f"Tool execution completed, {len(tool_results)} tools executed")
        
        # 8. Request summary if provided and tools were executed
        iteration_summary = None
        
        if summary_prompt and tool_results:
            self._logger.debug("Requesting iteration summary from LLM")
            summary_message = {
                "role": "system",
                "content": summary_prompt
            }
            current_messages.append(summary_message)
            
            # Apply rate limiting before summary call
            await self._rate_limit()
            
            # Call LLM to generate summary (without tools)
            summary_kwargs = {
                **self.litellm_kwargs,
                "model": self.model,
                "messages": current_messages,
                "tools": [], 
                "tool_choice": "none",
            }
            
            summary_response = await self._call_with_retry(**summary_kwargs)
            
            if summary_response.choices:
                summary_message = summary_response.choices[0].message
                iteration_summary = summary_message.content or ""
                
                # Add summary response to messages
                current_messages.append({
                    "role": "assistant",
                    "content": iteration_summary
                })
                
                self._logger.debug(f"Generated iteration summary: {iteration_summary[:100]}...")
                
        # 9. Return single-round result        
        return {
            "message": assistant_message,
            "tool_results": tool_results,
            "messages": current_messages,
            "has_tool_calls": bool(tool_calls),
            "iteration_summary": iteration_summary
        }
    
    @staticmethod
    def format_messages_to_text(messages: List[Dict]) -> str:
        """Format conversation history to readable text (for logging/debugging)"""
        formatted = ""
        for msg in messages:
            role = msg.get("role", "unknown").upper()
            content = msg.get("content", "")
            formatted += f"[{role}]\n{content}\n\n"
        return formatted

================================================
FILE: anytool/local_server/README.md
================================================
# AnyTool Local Server (Desktop Version)

## 1. Introduction

The AnyTool Local Server is a **lightweight, cross-platform** Flask service that launches on the host workstation and exposes a uniform HTTP interface for controlling the native desktop environment. By translating REST calls into deterministic GUI actions—mouse and keyboard synthesis, window management, screenshot capture, file I/O—it enables higher-level AnyTool agents to interact with real software instead of simulated environments.

**Supported platforms:** Windows 10/11, macOS 11+ (Intel & Apple Silicon) and mainstream Linux distributions (X11/Wayland).

## 2. System Architecture

* **PlatformAdapter** abstracts OS-specific primitives (Windows, macOS, Linux).
* **Accessibility Helper** queries the UI accessibility tree for semantic information.
* **Screenshot Helper** captures full or partial screenshots (PNG).
* **Recorder** streams screen recordings for offline analysis.
* **Health / Feature Checker** validates runtime capabilities and permissions.

## 3. REST Endpoints

| Path | Method | Semantics |
|------|--------|-----------|
| `/` | GET | Liveness probe |
| `/platform` | GET | Return host OS metadata |
| `/execute` | POST | Execute a PyAutoGUI script fragment |
| `/execute_with_verification` | POST | Execute fragment and verify via template matching |
| `/run_python` | POST | Run arbitrary Python within a sandbox |
| `/run_bash_script` | POST | Run shell script (optional conda activation) |
| `/screenshot` | GET | Return PNG screenshot (full or ROI) |
| `/cursor_position` | GET | Current mouse coordinates |
| `/screen_size` | GET/POST | Query or set virtual screen resolution |
| `/list_directory` | POST | List directory contents |

*see* `main.py` *for ~20 additional endpoints.*

## 4. Setup & Launch

> [!NOTE]  
> python=3.12  
> Accessibility / screen-record permissions (macOS: *System Settings ▸ Privacy & Security*).

### Dependency Installation
```bash
cd anytool/local_server
pip install -r requirements.txt
```

### Launching the Server
*Python entry point*
```bash
python -m anytool.local_server.main \
       --host 127.0.0.1 --port 5000   # flags optional; defaults read from config.json
```

*Bash helper script*
```bash
./run.sh              # reads config.json then starts the service
```

Press `Ctrl+C` at any time to gracefully stop the server.

---

## 5. Configuration
Runtime options live in `config.json`:
```json
{
  "server": {
    "host": "127.0.0.1",    // listening address (0.0.0.0 for all interfaces)
    "port": 5000,           // default port
    "debug": false          // verbose Flask logs
  }
}
```

================================================
FILE: anytool/local_server/__init__.py
================================================
from .main import app, run_server

__all__ = ["app", "run_server"]

================================================
FILE: anytool/local_server/config.json
================================================
{
  "server": {
    "host": "127.0.0.1",
    "port": 5000,
    "debug": false,
    "threaded": true
  }
}


================================================
FILE: anytool/local_server/feature_checker.py
================================================
import platform
import subprocess
import tempfile
from typing import Dict, Any

from anytool.utils.logging import Logger
logger = Logger.get_logger(__name__)

platform_name = platform.system()


class FeatureChecker:
    def __init__(self, platform_adapter=None, accessibility_helper=None):
        self.platform_adapter = platform_adapter
        self.accessibility_helper = accessibility_helper
        self.platform = platform_name
        self._cache = {} 
    
    def check_screenshot_available(self, use_cache: bool = True) -> bool:
        if use_cache and 'screenshot' in self._cache:
            return self._cache['screenshot']
        
        try:
            import pyautogui
            from PIL import Image
            
            size = pyautogui.size()
            result = size.width > 0 and size.height > 0
            
            self._cache['screenshot'] = result
            logger.info(f"Screenshot check: {'available' if result else 'unavailable'}")
            return result
            
        except ImportError as e:
            logger.warning(f"Screenshot unavailable - missing dependency: {e}")
            self._cache['screenshot'] = False
            return False
        except Exception as e:
            logger.error(f"Screenshot check failed: {e}")
            self._cache['screenshot'] = False
            return False
    
    def check_shell_available(self, use_cache: bool = True) -> bool:
        if use_cache and 'shell' in self._cache:
            return self._cache['shell']
        
        try:
            if self.platform == "Windows":
                cmd = ['cmd', '/c', 'echo', 'test']
            else:
                cmd = ['echo', 'test']
            
            result = subprocess.run(
                cmd,
                capture_output=True,
                timeout=2,
                text=True
            )
            
            available = result.returncode == 0
            self._cache['shell'] = available
            logger.info(f"Shell check: {'available' if available else 'unavailable'}")
            return available
            
        except FileNotFoundError as e:
            logger.warning(f"Shell check failed - command not found: {e}")
            self._cache['shell'] = False
            return False
        except Exception as e:
            logger.error(f"Shell check failed: {e}")
            self._cache['shell'] = False
            return False
    
    def check_python_available(self, use_cache: bool = True) -> bool:
        if use_cache and 'python' in self._cache:
            return self._cache['python']
        
        python_commands = []
        if self.platform == "Windows":
            python_commands = ['py', 'python', 'python3']
        else:
            python_commands = ['python3', 'python']
        
        for python_cmd in python_commands:
            try:
                result = subprocess.run(
                    [python_cmd, '--version'],
                    capture_output=True,
                    timeout=2,
                    text=True
                )
                
                if result.returncode == 0:
                    version = result.stdout.strip() or result.stderr.strip()
                    self._cache['python'] = True
                    logger.info(f"Python check: available ({python_cmd} - {version})")
                    return True
                    
            except FileNotFoundError:
                continue
            except Exception as e:
                logger.debug(f"Error testing {python_cmd}: {e}")
                continue
        
        logger.warning("Python check failed - no valid Python interpreter found")
        self._cache['python'] = False
        return False
    
    def check_file_ops_available(self, use_cache: bool = True) -> bool:
        if use_cache and 'file_ops' in self._cache:
            return self._cache['file_ops']
        
        try:
            with tempfile.NamedTemporaryFile(mode='w+b', delete=True) as tmp:
                test_data = b'test data'
                tmp.write(test_data)
                tmp.flush()
                
                tmp.seek(0)
                read_data = tmp.read()
                
                available = read_data == test_data
                self._cache['file_ops'] = available
                logger.info(f"File operations check: {'available' if available else 'unavailable'}")
                return available
                
        except PermissionError as e:
            logger.warning(f"File operations check failed - permission denied: {e}")
            self._cache['file_ops'] = False
            return False
        except Exception as e:
            logger.error(f"File operations check failed: {e}")
            self._cache['file_ops'] = False
            return False
    
    def check_window_mgmt_available(self, use_cache: bool = True) -> bool:
        if use_cache and 'window_mgmt' in self._cache:
            return self._cache['window_mgmt']
        
        try:
            if not self.platform_adapter:
                logger.warning("Window management check failed - no platform adapter loaded")
                self._cache['window_mgmt'] = False
                return False
            
            required_methods = ['activate_window', 'close_window', 'list_windows']
            available_methods = [
                method for method in required_methods 
                if hasattr(self.platform_adapter, method)
            ]
            
            available = len(available_methods) > 0
            self._cache['window_mgmt'] = available
            
            if available:
                logger.info(f"Window management check: {'available' if available else 'unavailable'} - supported methods: {', '.join(available_methods)}")
            else:
                logger.warning(f"Window management check failed - platform adapter missing required methods")
            
            return available
            
        except Exception as e:
            logger.error(f"Window management check failed: {e}")
            self._cache['window_mgmt'] = False
            return False
    
    def check_recording_available(self, use_cache: bool = True) -> bool:
        if use_cache and 'recording' in self._cache:
            return self._cache['recording']
        
        try:
            if not self.platform_adapter:
                logger.warning("Recording check failed - no platform adapter loaded")
                self._cache['recording'] = False
                return False
            
            available = (
                hasattr(self.platform_adapter, 'start_recording') and 
                hasattr(self.platform_adapter, 'stop_recording')
            )
            
            self._cache['recording'] = available
            logger.info(f"Recording check: {'available' if available else 'unavailable'}")
            return available
            
        except Exception as e:
            logger.error(f"Recording check failed: {e}")
            self._cache['recording'] = False
            return False
    
    def check_accessibility_available(self, use_cache: bool = True) -> bool:
        if use_cache and 'accessibility' in self._cache:
            return self._cache['accessibility']
        
        try:
            if not self.accessibility_helper:
                logger.warning("Accessibility check failed - no accessibility helper loaded")
                self._cache['accessibility'] = False
                return False
            
            available = self.accessibility_helper.is_available()
            self._cache['accessibility'] = available
            logger.info(f"Accessibility check: {'available' if available else 'unavailable'}")
            return available
            
        except Exception as e:
            logger.error(f"Accessibility check failed: {e}")
            self._cache['accessibility'] = False
            return False
    
    def check_platform_adapter_available(self, use_cache: bool = True) -> bool:
        if use_cache and 'platform_adapter' in self._cache:
            return self._cache['platform_adapter']
        
        available = self.platform_adapter is not None
        self._cache['platform_adapter'] = available
        logger.info(f"Platform adapter check: {'available' if available else 'unavailable'}")
        return available
    
    def check_all_features(self, use_cache: bool = True) -> Dict[str, bool]:
        logger.info(f"Checking all features (platform: {self.platform})")
        
        results = {
            'accessibility': self.check_accessibility_available(use_cache),
            'screenshot': self.check_screenshot_available(use_cache),
            'recording': self.check_recording_available(use_cache),
            'shell': self.check_shell_available(use_cache),
            'python': self.check_python_available(use_cache),
            'file_ops': self.check_file_ops_available(use_cache),
            'window_mgmt': self.check_window_mgmt_available(use_cache),
            'platform_adapter': self.check_platform_adapter_available(use_cache),
        }
        
        available_count = sum(1 for v in results.values() if v)
        total_count = len(results)
        logger.info(f"Feature check completed: {available_count}/{total_count} features available")
        
        return results
    
    def clear_cache(self):
        self._cache.clear()
        logger.debug("Feature check cache cleared")
    
    def get_feature_report(self) -> Dict[str, Any]:
        results = self.check_all_features()
        
        return {
            'platform': {
                'system': self.platform,
                'release': platform.release(),
                'version': platform.version(),
                'machine': platform.machine(),
                'processor': platform.processor(),
            },
            'features': results,
            'summary': {
                'total': len(results),
                'available': sum(1 for v in results.values() if v),
                'unavailable': sum(1 for v in results.values() if not v),
            }
        }

================================================
FILE: anytool/local_server/health_checker.py
================================================
import requests
import os
from pathlib import Path
from typing import Dict, Tuple, Optional
from anytool.utils.logging import Logger
from anytool.local_server.feature_checker import FeatureChecker

logger = Logger.get_logger(__name__)

from anytool.utils.display import colorize as _c


class HealthStatus:
    """Health status"""
    def __init__(self, feature_available: bool, endpoint_available: Optional[bool], 
                 endpoint_detail: str = ""):
        self.feature_available = feature_available
        self.endpoint_available = endpoint_available
        self.endpoint_detail = endpoint_detail
    
    @property
    def fully_available(self) -> bool:
        """Fully available: feature and endpoint are available"""
        return self.feature_available and (self.endpoint_available == True)
    
    def __str__(self):
        if not self.feature_available:
            return "Feature N/A"
        elif self.endpoint_available is None:
            return "Feature OK (endpoint not tested)"
        elif self.endpoint_available:
            return f"OK ({self.endpoint_detail})"
        else:
            return f"Endpoint failed: {self.endpoint_detail}"


class HealthChecker:
    """Health checker with functional testing"""
    
    def __init__(self, feature_checker: FeatureChecker, 
                 base_url: str = "http://127.0.0.1:5000",
                 auto_cleanup: bool = True,
                 test_output_dir: str = None):
        self.feature_checker = feature_checker
        self.base_url = base_url
        self.results = {}
        self.auto_cleanup = auto_cleanup
        
        # set the test output directory
        if test_output_dir:
            self.test_output_dir = Path(test_output_dir)
        else:
            current_dir = Path(__file__).parent
            self.test_output_dir = current_dir / "temp"
        
        # create the directory
        self.test_output_dir.mkdir(exist_ok=True)
        
        self.temp_files = []  # Track temporary files for cleanup
        
        logger.info(f"Health checker initialized. Test output: {self.test_output_dir}, Auto-cleanup: {auto_cleanup}")
    
    def _get_test_file_path(self, filename: str) -> str:
        """Get path for a test file"""
        filepath = str(self.test_output_dir / filename)
        self._register_temp_file(filepath)
        return filepath
    
    def _register_temp_file(self, filepath: str):
        """Register a temporary file for later cleanup"""
        if filepath and filepath not in self.temp_files:
            self.temp_files.append(filepath)
    
    def cleanup_temp_files(self):
        """Clean up all temporary test files"""
        if not self.auto_cleanup:
            logger.info(f"Auto-cleanup disabled. Test files kept in: {self.test_output_dir}")
            return
        
        cleaned = 0
        for filepath in self.temp_files:
            try:
                if os.path.exists(filepath):
                    os.remove(filepath)
                    cleaned += 1
                    logger.debug(f"Cleaned up: {filepath}")
            except Exception as e:
                logger.warning(f"Failed to clean up {filepath}: {e}")
        
        self.temp_files.clear()
        
        # if the directory is empty, delete it
        try:
            if self.test_output_dir.exists() and not any(self.test_output_dir.iterdir()):
                self.test_output_dir.rmdir()
                logger.debug(f"Removed empty directory: {self.test_output_dir}")
        except:
            pass
        
        if cleaned > 0:
            logger.info(f"Cleaned up {cleaned} test files")
    
    def check_screenshot(self) -> Tuple[bool, str]:
        """Functionally test screenshot - actually take a screenshot and verify"""
        # 1. Check feature first
        if not self.feature_checker.check_screenshot_available():
            return False, "Feature N/A"
        
        # 2. Save screenshot to test directory
        screenshot_path = self._get_test_file_path("test_screenshot.png")
        
        try:
            response = requests.get(f"{self.base_url}/screenshot", timeout=10)
            
            if response.status_code != 200:
                return False, f"HTTP {response.status_code}"
            
            # 3. Save to file
            with open(screenshot_path, 'wb') as f:
                f.write(response.content)
            
            # 4. Verify it's actually an image
            content_type = response.headers.get('Content-Type', '')
            if 'image' not in content_type:
                return False, f"Invalid content type: {content_type}"
            
            # 5. Check file size (should be > 1KB)
            size_kb = len(response.content) / 1024
            if size_kb < 1:
                return False, "Image too small"
            
            logger.info(f"Screenshot saved: {screenshot_path} ({size_kb:.1f}KB)")
            return True, f"OK ({size_kb:.1f}KB)"
            
        except requests.exceptions.Timeout:
            return False, "Timeout"
        except Exception as e:
            return False, f"Error: {str(e)[:30]}"
    
    def check_cursor_position(self) -> Tuple[bool, str]:
        """Test cursor position"""
        if not self.feature_checker.check_screenshot_available():
            return False, "Feature N/A"
        
        try:
            response = requests.get(f"{self.base_url}/cursor_position", timeout=5)
            if response.status_code != 200:
                return False, f"HTTP {response.status_code}"
            
            data = response.json()
            if 'x' in data and 'y' in data:
                return True, f"({data['x']}, {data['y']})"
            return False, "Invalid response"
        except Exception as e:
            return False, str(e)[:30]
    
    def check_screen_size(self) -> Tuple[bool, str]:
        """Test screen size"""
        if not self.feature_checker.check_screenshot_available():
            return False, "Feature N/A"
        
        try:
            response = requests.get(f"{self.base_url}/screen_size", timeout=5)
            if response.status_code != 200:
                return False, f"HTTP {response.status_code}"
            
            data = response.json()
            if 'width' in data and 'height' in data:
                return True, f"{data['width']}x{data['height']}"
            return False, "Invalid response"
        except Exception as e:
            return False, str(e)[:30]
    
    def check_shell_command(self) -> Tuple[bool, str]:
        """Functionally test shell command execution"""
        if not self.feature_checker.check_shell_available():
            return False, "Feature N/A"
        
        try:
            response = requests.post(
                f"{self.base_url}/execute",
                json={"command": "echo hello_test", "shell": True},
                timeout=5
            )
            
            if response.status_code != 200:
                return False, f"HTTP {response.status_code}"
            
            data = response.json()
            output = data.get('output', '').strip()
            
            # Verify the command actually executed
            if 'hello_test' in output:
                return True, "Command executed"
            return False, "Command failed"
            
        except Exception as e:
            return False, str(e)[:30]
    
    def check_python_execution(self) -> Tuple[bool, str]:
        """Functionally test Python code execution"""
        if not self.feature_checker.check_python_available():
            return False, "Feature N/A"
        
        try:
            test_code = 'print("test_output_123")'
            response = requests.post(
                f"{self.base_url}/run_python",
                json={"code": test_code},
                timeout=5
            )
            
            if response.status_code != 200:
                return False, f"HTTP {response.status_code}"
            
            data = response.json()
            content = data.get('content', '')
            
            # Verify Python executed correctly
            if 'test_output_123' in content:
                return True, "Python executed"
            return False, "Execution failed"
            
        except Exception as e:
            return False, str(e)[:30]
    
    def check_bash_script(self) -> Tuple[bool, str]:
        """Functionally test Bash script execution"""
        if not self.feature_checker.check_shell_available():
            return False, "Feature N/A"
        
        try:
            response = requests.post(
                f"{self.base_url}/run_bash_script",
                json={"script": "echo bash_test_456"},
                timeout=5
            )
            
            if response.status_code != 200:
                return False, f"HTTP {response.status_code}"
            
            data = response.json()
            output = data.get('output', '')
            
            if 'bash_test_456' in output:
                return True, "Bash executed"
            return False, "Execution failed"
            
        except Exception as e:
            return False, str(e)[:30]
    
    def check_file_operations(self) -> Tuple[bool, str]:
        """Test file operations"""
        if not self.feature_checker.check_file_ops_available():
            return False, "Feature N/A"
        
        try:
            # Test list directory
            response = requests.post(
                f"{self.base_url}/list_directory",
                json={"path": "."},
                timeout=5
            )
            
            if response.status_code != 200:
                return False, f"HTTP {response.status_code}"
            
            data = response.json()
            if 'items' in data and isinstance(data['items'], list):
                return True, f"{len(data['items'])} items"
            return False, "Invalid response"
            
        except Exception as e:
            return False, str(e)[:30]
    
    def check_desktop_path(self) -> Tuple[bool, str]:
        """Test desktop path"""
        if not self.feature_checker.check_file_ops_available():
            return False, "Feature N/A"
        
        try:
            response = requests.get(f"{self.base_url}/desktop_path", timeout=5)
            if response.status_code != 200:
                return False, f"HTTP {response.status_code}"
            
            data = response.json()
            path = data.get('path', '')
            if path and os.path.exists(path):
                return True, "Path valid"
            return False, "Path not found"
        except Exception as e:
            return False, str(e)[:30]
    
    def check_window_management(self) -> Tuple[bool, str]:
        """Test window management"""
        if not self.feature_checker.check_window_mgmt_available():
            return False, "Feature N/A"
        
        try:
            # Just test if endpoint responds (window may not exist)
            response = requests.post(
                f"{self.base_url}/setup/activate_window",
                json={"window_name": "NonExistentWindow"},
                timeout=5
            )
            
            # 200 (success), 404 (not found), 501 (not supported) are all acceptable
            if response.status_code in [200, 404, 501]:
                return True, f"API available"
            return False, f"HTTP {response.status_code}"
        except Exception as e:
            return False, str(e)[:30]
    
    def check_recording(self) -> Tuple[bool, str]:
        """Functionally test recording - actually start and stop recording"""
        if not self.feature_checker.check_recording_available():
            return False, "Feature N/A"
        
        recording_path = self._get_test_file_path("test_recording.mp4")
        
        try:
            # 1. Start recording
            response = requests.post(f"{self.base_url}/start_recording", json={}, timeout=10)
            
            if response.status_code == 501:
                return False, "Not supported"
            
            if response.status_code != 200:
                return False, f"Start failed: {response.status_code}"
            
            # 2. Wait a bit
            import time
            time.sleep(3.0)  # Record for 3 seconds
            
            # 3. Stop recording
            response = requests.post(f"{self.base_url}/end_recording", json={}, timeout=15)
            
            if response.status_code == 200:
                # Save the recording file
                with open(recording_path, 'wb') as f:
                    f.write(response.content)
                
                size_kb = len(response.content) / 1024
                logger.info(f"Recording saved: {recording_path} ({size_kb:.1f}KB)")
                return True, f"OK ({size_kb:.1f}KB)"
            else:
                return False, f"Stop failed: {response.status_code}"
                
        except Exception as e:
            # Try to stop recording in case of error
            try:
                requests.post(f"{self.base_url}/end_recording", json={}, timeout=5)
            except:
                pass
            return False, str(e)[:30]
    
    def check_accessibility(self) -> Tuple[bool, str]:
        """Test accessibility tree"""
        if not self.feature_checker.check_accessibility_available():
            return False, "Feature N/A"
        
        try:
            response = requests.get(f"{self.base_url}/accessibility?max_depth=1", timeout=10)
            
            if response.status_code != 200:
                return False, f"HTTP {response.status_code}"
            
            data = response.json()
            if 'error' in data:
                return False, "Permission denied"
            
            # Should have some tree structure
            if 'platform' in data or 'children' in data:
                return True, "Tree available"
            return False, "Invalid response"
            
        except Exception as e:
            return False, str(e)[:30]
    
    def check_health_endpoint(self) -> Tuple[bool, str]:
        """Test health check endpoint"""
        try:
            response = requests.get(f"{self.base_url}/", timeout=5)
            if response.status_code == 200:
                data = response.json()
                if data.get('status') == 'ok':
                    return True, "OK"
            return False, f"HTTP {response.status_code}"
        except Exception as e:
            return False, str(e)[:30]
    
    def check_platform_info(self) -> Tuple[bool, str]:
        """Test platform info endpoint"""
        try:
            response = requests.get(f"{self.base_url}/platform", timeout=5)
            if response.status_code == 200:
                data = response.json()
                if 'system' in data:
                    return True, data['system']
            return False, f"HTTP {response.status_code}"
        except Exception as e:
            return False, str(e)[:30]
    
    def check_all(self, test_endpoints: bool = True) -> Dict[str, HealthStatus]:
        """
        Check all features with functional testing
        
        Args:
            test_endpoints: Whether to test endpoints (False only checks features)
        
        Returns:
            {Feature name: HealthStatus}
        """
        results = {}
        
        if not test_endpoints:
            # Only check features, not endpoints
            feature_results = self.feature_checker.check_all_features()
            for name, available in feature_results.items():
                results[name] = HealthStatus(available, None, "")
            self.results = results
            return results
        
        # Functional tests
        test_functions = {
            'Health Check': self.check_health_endpoint,
            'Platform Info': self.check_platform_info,
            'Screenshot': self.check_screenshot,
            'Cursor Position': self.check_cursor_position,
            'Screen Size': self.check_screen_size,
            'Shell Command': self.check_shell_command,
            'Python Execution': self.check_python_execution,
            'Bash Script': self.check_bash_script,
            'File Operations': self.check_file_operations,
            'Desktop Path': self.check_desktop_path,
            'Window Management': self.check_window_management,
            'Recording': self.check_recording,
            'Accessibility': self.check_accessibility,
        }
        
        for name, test_func in test_functions.items():
            success, detail = test_func()
            
            # Determine feature availability
            if detail == "Feature N/A":
                feature_available = False
                endpoint_available = None
            else:
                feature_available = True
                endpoint_available = success
            
            results[name] = HealthStatus(feature_available, endpoint_available, detail)
        
        # Clean up temporary files
        self.cleanup_temp_files()
        
        self.results = results
        return results
    
    def print_results(self, results: Dict[str, HealthStatus] = None, 
                     show_endpoint_details: bool = False):
        """Print check results"""
        if results is None:
            results = self.results
        
        if not results:
            return
        
        total = len(results)
        feature_available = sum(1 for s in results.values() if s.feature_available)
        fully_available = sum(1 for s in results.values() if s.fully_available)
        
        # Categorize
        basic = ['Health Check', 'Platform Info']
        
        # Basic Features
        print()
        print(_c("  - Basic", 'c', bold=True))
        basic_items = []
        for name in basic:
            if name in results:
                status = results[name]
                # Use colored dot instead of emoji
                if status.fully_available:
                    icon = _c("●", 'g')
                elif not status.feature_available:
                    icon = _c("●", 'rd')
                elif status.endpoint_available is None:
                    icon = _c("●", 'y')
                else:
                    icon = _c("●", 'y')
                
                text = _c(name, 'gr' if not status.feature_available else '')
                basic_items.append((icon, text, status))
        
        # Display in rows of 4
        for i in range(0, len(basic_items), 4):
            line_items = []
            for j in range(4):
                if i + j < len(basic_items):
                    icon, text, status = basic_items[i + j]
                    line_items.append(f"{icon} {text:<15}")
            print("     " + " ".join(line_items))
        
        # Show details if requested
        if show_endpoint_details:
            for name in basic:
                if name in results:
                    status = results[name]
                    print(f"       {_c('·', 'gr')} {name}: {_c(str(status), 'gr')}")
        
        # Advanced Features
        print()
        print(_c("  - Advanced", 'c', bold=True))
        advanced_items = []
        for name, status in results.items():
            if name not in basic:
                # Use colored dot instead of emoji
                if status.fully_available:
                    icon = _c("●", 'g')
                elif not status.feature_available:
                    icon = _c("●", 'rd')
                elif status.endpoint_available is None:
                    icon = _c("●", 'y')
                else:
                    icon = _c("●", 'y')
                
                text = _c(name, 'gr' if not status.feature_available else '')
                advanced_items.append((icon, text, status))
        
        # Display in rows of 4
        for i in range(0, len(advanced_items), 4):
            line_items = []
            for j in range(4):
                if i + j < len(advanced_items):
                    icon, text, _ = advanced_items[i + j]
                    line_items.append(f"{icon} {text:<15}")
            print("     " + " ".join(line_items))
        
        # Show details if requested
        if show_endpoint_details:
            for name, status in results.items():
                if name not in basic:
                    print(f"       {_c('·', 'gr')} {name}: {_c(str(status), 'gr')}")
        
        # Summary
        from anytool.utils.display import print_separator
        print()
        print_separator()
        print(f"  {_c('Summary:', 'c', bold=True)} {_c(str(feature_available) + '/' + str(total), 'g' if feature_available == total else 'y')} features available", end='')
        if any(s.endpoint_available is not None for s in results.values()):
            print(f", {_c(str(fully_available) + '/' + str(total), 'g' if fully_available == total else 'y')} fully functional")
        else:
            print()
        print_separator()
        
        # Legend
        print(f"  {_c('Legend:', 'gr')} {_c('●', 'g')} Available  {_c('●', 'y')} Partial/Untested  {_c('●', 'rd')} Unavailable")
        
        # Test files info
        if self.temp_files and not self.auto_cleanup:
            print()
            print(f"  {_c('Test files saved:', 'y')} {self.test_output_dir}")
            print(f"  {_c(str(len(self.temp_files)) + ' file(s) available for inspection', 'gr')}")
        
        print()
    
    def get_summary(self) -> dict:
        """Get summary"""
        if not self.results:
            return {}
        
        total = len(self.results)
        feature_available = sum(1 for s in self.results.values() if s.feature_available)
        fully_available = sum(1 for s in self.results.values() if s.fully_available)
        
        return {
            'total': total,
            'feature_available': feature_available,
            'fully_available': fully_available,
            'details': {k: str(v) for k, v in self.results.items()}
        }
    
    def get_simple_features_dict(self) -> Dict[str, bool]:
        """Get simple feature dict (for banner display)"""
        return self.feature_checker.check_all_features()

================================================
FILE: anytool/local_server/main.py
================================================
import os
import platform
import shlex
import subprocess
import signal
import time
import json
import uuid
from datetime import datetime
from flask import Flask, request, jsonify, send_file, abort
import pyautogui
import threading
from io import BytesIO
import tempfile

from anytool.utils.logging import Logger
from anytool.local_server.utils import AccessibilityHelper, ScreenshotHelper
from anytool.local_server.platform_adapters import get_platform_adapter
from anytool.local_server.health_checker import HealthChecker
from anytool.local_server.feature_checker import FeatureChecker

platform_name = platform.system()

app = Flask(__name__)
app.config['MAX_CONTENT_LENGTH'] = 500 * 1024 * 1024  # 500MB

pyautogui.PAUSE = 0
if platform_name == "Darwin":
    pyautogui.DARWIN_CATCH_UP_TIME = 0

logger = Logger.get_logger(__name__)

TIMEOUT = 1800
recording_process = None

if platform_name == "Windows":
    recording_path = os.path.join(os.environ.get('TEMP', 'C:\\Temp'), 'recording.mp4')
else:
    recording_path = "/tmp/recording.mp4"

accessibility_helper = AccessibilityHelper()
screenshot_helper = ScreenshotHelper()
platform_adapter = get_platform_adapter()

feature_checker = FeatureChecker(
    platform_adapter=platform_adapter,
    accessibility_helper=accessibility_helper
)


def get_conda_activation_prefix(conda_env: str = None) -> str:
    """
    Generate platform-specific conda activation command prefix
    
    Args:
        conda_env: Conda environment name (e.g., 'myenv')
    
    Returns:
        Activation command prefix string, empty if no conda_env
    """
    if not conda_env:
        return ""
    
    if platform_name == "Windows":
        # Windows: use conda.bat or conda.exe
        # Try common conda installation paths
        conda_paths = [
            os.path.expandvars("%USERPROFILE%\\miniconda3\\Scripts\\activate.bat"),
            os.path.expandvars("%USERPROFILE%\\anaconda3\\Scripts\\activate.bat"),
            "C:\\ProgramData\\Miniconda3\\Scripts\\activate.bat",
            "C:\\ProgramData\\Anaconda3\\Scripts\\activate.bat",
        ]
        
        # Find first existing conda activate script
        activate_script = None
        for path in conda_paths:
            if os.path.exists(path):
                activate_script = path
                break
        
        if activate_script:
            return f'call "{activate_script}" {conda_env} && '
        else:
            # Fallback: assume conda is in PATH
            return f'conda activate {conda_env} && '
    
    else:
        # Linux/macOS: source conda.sh then activate
        conda_paths = [
            os.path.expanduser("~/miniconda3/etc/profile.d/conda.sh"),
            os.path.expanduser("~/anaconda3/etc/profile.d/conda.sh"),
            "/opt/conda/etc/profile.d/conda.sh",
            "/usr/local/miniconda3/etc/profile.d/conda.sh",
            "/usr/local/anaconda3/etc/profile.d/conda.sh",
        ]
        
        # Find first existing conda.sh
        conda_sh = None
        for path in conda_paths:
            if os.path.exists(path):
                conda_sh = path
                break
        
        if conda_sh:
            return f'source "{conda_sh}" && conda activate {conda_env} && '
        else:
            # Fallback: assume conda is already initialized in shell
            return f'conda activate {conda_env} && '


def wrap_script_with_conda(script: str, conda_env: str = None) -> str:
    """
    Wrap script with conda activation command.
    If conda is not available, returns original script without conda activation.
    """
    if not conda_env:
        return script
    
    if platform_name == "Windows":
        activation_prefix = get_conda_activation_prefix(conda_env)
        return f"{activation_prefix}{script}"
    else:
        conda_paths = [
            os.path.expanduser("~/miniconda3/etc/profile.d/conda.sh"),
            os.path.expanduser("~/anaconda3/etc/profile.d/conda.sh"),
            os.path.expanduser("~/opt/anaconda3/etc/profile.d/conda.sh"),
            "/opt/conda/etc/profile.d/conda.sh",
        ]
        
        conda_sh = None
        for path in conda_paths:
            if os.path.exists(path):
                conda_sh = path
                break
        
        if conda_sh:
            # Use bash -i -c to run interactively, or directly source conda.sh
            wrapped_script = f"""#!/bin/bash
# Initialize conda
if [ -f "{conda_sh}" ]; then
    . "{conda_sh}"
    conda activate {conda_env} 2>/dev/null || true
fi

# Run user script
{script}
"""
            return wrapped_script
        else:
            # Conda not found - log warning and execute script directly without conda
            logger.warning(f"Conda environment '{conda_env}' requested but conda not found. Executing with system Python.")
            return script


health_checker = None

@app.route('/', methods=['GET'])
def health_check():
    """Health check interface - return features information"""
    # Get features from health_checker
    if health_checker:
        features = health_checker.get_simple_features_dict()
    else:
        # Initial startup of health_checker may not have been initialized, fallback to feature_checker
        features = feature_checker.check_all_features(use_cache=True)
    
    return jsonify({
        'status': 'ok',
        'service': 'AnyTool Desktop Server',
        'version': '1.0.0',
        'platform': platform_name,
        'features': features,
        'timestamp': datetime.now().isoformat()
    })

@app.route('/platform', methods=['GET'])
def get_platform():
    info = {
        'system': platform_name,
        'release': platform.release(),
        'version': platform.version(),
        'machine': platform.machine(),
        'processor': platform.processor()
    }
    
    if platform_adapter and hasattr(platform_adapter, 'get_system_info'):
        info.update(platform_adapter.get_system_info())
    
    return jsonify(info)

@app.route('/execute', methods=['POST'])
@app.route('/setup/execute', methods=['POST'])
def execute_command():
    data = request.json
    # The 'command' key in the JSON request should contain the command to be executed.
    shell = data.get('shell', False)
    command = data.get('command', "" if shell else [])
    timeout = data.get('timeout', 120)
    
    if isinstance(command, str) and not shell:
        command = shlex.split(command)
    
    # Expand user directory
    if isinstance(command, list):
        for i, arg in enumerate(command):
            if arg.startswith("~/"):
                command[i] = os.path.expanduser(arg)
    
    try:
        if platform_name == "Windows":
            result = subprocess.run(
                command,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                shell=shell,
                text=True,
                timeout=timeout,
                creationflags=subprocess.CREATE_NO_WINDOW,
            )
        else:
            result = subprocess.run(
                command,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                shell=shell,
                text=True,
                timeout=timeout,
            )
        
        return jsonify({
            'status': 'success',
            'output': result.stdout,
            'error': result.stderr,
            'returncode': result.returncode
        })
    except subprocess.TimeoutExpired:
        return jsonify({
            'status': 'error',
            'message': f'Command timeout after {timeout} seconds'
        }), 408
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

@app.route('/execute_with_verification', methods=['POST'])
@app.route('/setup/execute_with_verification', methods=['POST'])
def execute_command_with_verification():
    """Execute command and verify the result based on provided verification criteria"""
    data = request.json
    shell = data.get('shell', False)
    command = data.get('command', "" if shell else [])
    verification = data.get('verification', {})
    max_wait_time = data.get('max_wait_time', 10) # Maximum wait time in seconds
    check_interval = data.get('check_interval', 1) # Check interval in seconds
    
    if isinstance(command, str) and not shell:
        command = shlex.split(command)
    
    # Expand user directory
    if isinstance(command, list):
        for i, arg in enumerate(command):
            if arg.startswith("~/"):
                command[i] = os.path.expanduser(arg)
    
    # Execute the main command
    try:
        if platform_name == "Windows":
            result = subprocess.run(
                command,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                shell=shell,
                text=True,
                timeout=120,
                creationflags=subprocess.CREATE_NO_WINDOW,
            )
        else:
            result = subprocess.run(
                command,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                shell=shell,
                text=True,
                timeout=120,
            )
        
        # If no verification is needed, return immediately
        if not verification:
            return jsonify({
                'status': 'success',
                'output': result.stdout,
                'error': result.stderr,
                'returncode': result.returncode
            })
        
        # Wait and verify the result
        start_time = time.time()
        while time.time() - start_time < max_wait_time:
            verification_passed = True
            
            # Check window existence if specified
            if 'window_exists' in verification:
                window_name = verification['window_exists']
                try:
                    if platform_name == 'Linux':
                        wmctrl_result = subprocess.run(
                            ['wmctrl', '-l'],
                            capture_output=True,
                            text=True,
                            check=True
                        )
                        if window_name.lower() not in wmctrl_result.stdout.lower():
                            verification_passed = False
                    elif platform_adapter:
                        # Use platform adapter to check window existence
                        windows = platform_adapter.list_windows() if hasattr(platform_adapter, 'list_windows') else []
                        if not any(window_name.lower() in str(w).lower() for w in windows):
                            verification_passed = False
                except:
                    verification_passed = False
            
            # Check command execution if specified
            if 'command_success' in verification:
                verify_cmd = verification['command_success']
                try:
                    verify_result = subprocess.run(
                        verify_cmd,
                        shell=True,
                        capture_output=True,
                        text=True,
                        timeout=5
                    )
                    if verify_result.returncode != 0:
                        verification_passed = False
                except:
                    verification_passed = False
            
            if verification_passed:
                return jsonify({
                    'status': 'success',
                    'output': result.stdout,
                    'error': result.stderr,
                    'returncode': result.returncode,
                    'verification': 'passed',
                    'wait_time': time.time() - start_time
                })
            
            time.sleep(check_interval)
        
        # Verification failed
        return jsonify({
            'status': 'verification_failed',
            'output': result.stdout,
            'error': result.stderr,
            'returncode': result.returncode,
            'verification': 'failed',
            'wait_time': max_wait_time
        }), 500
        
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

def _get_machine_architecture() -> str:
    """Get the machine architecture, e.g., x86_64, arm64, aarch64, i386, etc.
    Returns 'amd' for x86/AMD architectures, 'arm' for ARM architectures, or 'unknown'.
    """
    architecture = platform.machine().lower()
    if architecture in ['amd32', 'amd64', 'x86', 'x86_64', 'x86-64', 'x64', 'i386', 'i686']:
        return 'amd'
    elif architecture in ['arm64', 'aarch64', 'aarch32']:
        return 'arm'
    else:
        return 'unknown'

@app.route('/setup/launch', methods=["POST"])
def launch_app():
    data = request.json
    shell = data.get("shell", False)
    command = data.get("command", "" if shell else [])
    
    if isinstance(command, str) and not shell:
        command = shlex.split(command)
    
    # Expand user directory
    if isinstance(command, list):
        for i, arg in enumerate(command):
            if arg.startswith("~/"):
                command[i] = os.path.expanduser(arg)
    
    try:
        # ARM architecture compatibility: replace google-chrome with chromium
        # ARM64 Chrome is not available yet, can only use Chromium
        if isinstance(command, list) and 'google-chrome' in command and _get_machine_architecture() == 'arm':
            index = command.index('google-chrome')
            command[index] = 'chromium'
            logger.info("ARM architecture detected: replacing 'google-chrome' with 'chromium'")
        
        subprocess.Popen(command, shell=shell)
        cmd_str = command if shell else " ".join(command)
        logger.info(f"Application launched successfully: {cmd_str}")
        return jsonify({
            'status': 'success',
            'message': f'{cmd_str} launched successfully'
        })
    except Exception as e:
        logger.error(f"Application launch failed: {str(e)}")
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

@app.route("/run_python", methods=['POST'])
def run_python():
    data = request.json
    code = data.get('code', None)
    timeout = data.get('timeout', 30)
    working_dir = data.get('working_dir', None)
    env = data.get('env', None)
    conda_env = data.get('conda_env', None)
    
    if not code:
        return jsonify({'status': 'error', 'message': 'Code not supplied!'}), 400
    
    # Generate unique filename
    if platform_name == "Windows":
        temp_filename = os.path.join(tempfile.gettempdir(), f"python_exec_{uuid.uuid4().hex}.py")
    else:
        temp_filename = f"/tmp/python_exec_{uuid.uuid4().hex}.py"
    
    try:
        with open(temp_filename, 'w') as f:
            f.write(code)
        
        # Prepare environment variables
        exec_env = os.environ.copy()
        if env:
            exec_env.update(env)
        
        # If conda_env is specified, try to use bash/cmd to activate and run
        # If conda is not available, fall back to system Python
        if conda_env:
            activation_cmd = get_conda_activation_prefix(conda_env)
            # Check if conda activation command is empty (conda not found)
            if not activation_cmd:
                logger.warning(f"Conda environment '{conda_env}' requested but conda not found. Using system Python.")
                conda_env = None  # Disable conda and use default path
        
        if conda_env and get_conda_activation_prefix(conda_env):
            if platform_name == "Windows":
                # Windows: use cmd with activation
                activation_cmd = get_conda_activation_prefix(conda_env)
                full_cmd = f'{activation_cmd}python "{temp_filename}"'
                result = subprocess.run(
                    ['cmd', '/c', full_cmd],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    text=True,
                    timeout=timeout,
                    cwd=working_dir or os.getcwd(),
                    env=exec_env
                )
            else:
                # Linux/macOS: use bash with activation
                activation_cmd = get_conda_activation_prefix(conda_env)
                full_cmd = f'{activation_cmd}python3 "{temp_filename}"'
                result = subprocess.run(
                    ['/bin/bash', '-c', full_cmd],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    text=True,
                    timeout=timeout,
                    cwd=working_dir or os.getcwd(),
                    env=exec_env
                )
        else:
            # No conda activation needed
            python_cmd = 'python' if platform_name == "Windows" else 'python3'
            result = subprocess.run(
                [python_cmd, temp_filename],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                timeout=timeout,
                cwd=working_dir or os.getcwd(),
                env=exec_env
            )
        
        os.remove(temp_filename)
        
        output = result.stdout + result.stderr
        
        return jsonify({
            'status': 'success' if result.returncode == 0 else 'error',
            'content': output or "Code executed successfully (no output)",
            'returncode': result.returncode
        })
        
    except subprocess.TimeoutExpired:
        if os.path.exists(temp_filename):
            os.remove(temp_filename)
        return jsonify({
            'status': 'error',
            'message': f'Execution timeout after {timeout} seconds'
        }), 408
    except Exception as e:
        if os.path.exists(temp_filename):
            os.remove(temp_filename)
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

@app.route("/run_bash_script", methods=['POST'])
def run_bash_script():
    data = request.json
    script = data.get('script', None)
    timeout = data.get('timeout', 30)
    working_dir = data.get('working_dir', None)
    env = data.get('env', None)
    conda_env = data.get('conda_env', None)
    
    if not script:
        return jsonify({'status': 'error', 'message': 'Script not supplied!'}), 400
    
    # Generate unique filename
    if platform_name == "Windows":
        temp_filename = os.path.join(tempfile.gettempdir(), f"bash_exec_{uuid.uuid4().hex}.sh")
    else:
        temp_filename = f"/tmp/bash_exec_{uuid.uuid4().hex}.sh"
    
    try:
        # Wrap script with conda activation if needed
        final_script = wrap_script_with_conda(script, conda_env)
        
        with open(temp_filename, 'w') as f:
            f.write(final_script)
        
        os.chmod(temp_filename, 0o755)
        
        if platform_name == "Windows":
            shell_cmd = ['bash', temp_filename]
        else:
            shell_cmd = ['/bin/bash', temp_filename]
        
        # Prepare environment variables
        exec_env = os.environ.copy()
        if env:
            exec_env.update(env)
        
        result = subprocess.run(
            shell_cmd,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            text=True,
            timeout=timeout,
            cwd=working_dir or os.getcwd(),
            env=exec_env
        )
        
        os.unlink(temp_filename)
        
        return jsonify({
            'status': 'success' if result.returncode == 0 else 'error',
            'output': result.stdout,
            'error': "",
            'returncode': result.returncode
        })
        
    except subprocess.TimeoutExpired:
        if os.path.exists(temp_filename):
            os.unlink(temp_filename)
        return jsonify({
            'status': 'error',
            'output': f'Script execution timed out after {timeout} seconds',
            'error': "",
            'returncode': -1
        }), 500
    except Exception as e:
        if os.path.exists(temp_filename):
            try:
                os.unlink(temp_filename)
            except:
                pass
        return jsonify({
            'status': 'error',
            'output': f'Failed to execute script: {str(e)}',
            'error': "",
            'returncode': -1
        }), 500
        
@app.route('/screenshot', methods=['GET'])
def capture_screen_with_cursor():
    """Capture screenshot (including mouse cursor)"""
    try:
        buf = BytesIO()
        tmp_path = os.path.join(tempfile.gettempdir(), f"screenshot_{uuid.uuid4().hex}.png")
        if screenshot_helper.capture(tmp_path, with_cursor=True):
            with open(tmp_path, 'rb') as f:
                buf.write(f.read())
            os.remove(tmp_path)            
            buf.seek(0)
            return send_file(buf, mimetype='image/png')
        else:
            return jsonify({'status':'error','message':'Screenshot failed'}), 500
        
    except Exception as e:
        logger.error(f"Screenshot failed: {str(e)}")
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

@app.route('/cursor_position', methods=['GET'])
def get_cursor_position():
    """Get cursor position"""
    try:
        x, y = screenshot_helper.get_cursor_position()
        return jsonify({'x': x, 'y': y, 'status': 'success'})
    except Exception as e:
        return jsonify({'status': 'error', 'message': str(e)}), 500

@app.route('/screen_size', methods=['POST', 'GET'])
def get_screen_size():
    """Get screen size"""
    try:
        width, height = screenshot_helper.get_screen_size()
        return jsonify({'width': width, 'height': height, 'status': 'success'})
    except Exception as e:
        return jsonify({'status': 'error', 'message': str(e)}), 500

# Accessibility Tree
@app.route("/accessibility", methods=["GET"])
def get_accessibility_tree():
    """Get accessibility tree"""
    try:
        max_depth = request.args.get('max_depth', 10, type=int)
        tree = accessibility_helper.get_tree(max_depth=max_depth)
        return jsonify(tree)
    except Exception as e:
        logger.error(f"Failed to get accessibility tree: {str(e)}")
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

# File Operations
@app.route('/list_directory', methods=['POST'])
def list_directory():
    """List directory contents"""
    data = request.json
    path = data.get('path', '.')
    
    try:
        path = os.path.expanduser(path)
        items = []
        
        for item in os.listdir(path):
            item_path = os.path.join(path, item)
            items.append({
                'name': item,
                'is_dir': os.path.isdir(item_path),
                'is_file': os.path.isfile(item_path),
                'size': os.path.getsize(item_path) if os.path.isfile(item_path) else None
            })
        
        return jsonify({
            'status': 'success',
            'path': path,
            'items': items
        })
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

@app.route('/file', methods=['POST'])
def file_operation():
    """File operations"""
    data = request.json
    operation = data.get('operation', 'read')
    path = data.get('path')
    
    if not path:
        return jsonify({'status': 'error', 'message': 'Path required'}), 400
    
    path = os.path.expanduser(path)
    
    try:
        if operation == 'read':
            with open(path, 'r') as f:
                content = f.read()
            return jsonify({
                'status': 'success',
                'content': content
            })
        elif operation == 'exists':
            exists = os.path.exists(path)
            return jsonify({
                'status': 'success',
                'exists': exists
            })
        else:
            return jsonify({
                'status': 'error',
                'message': f'Unknown operation: {operation}'
            }), 400
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

@app.route('/desktop_path', methods=['POST', 'GET'])
def get_desktop_path():
    """Get desktop path"""
    try:
        desktop = os.path.expanduser("~/Desktop")
        return jsonify({
            'status': 'success',
            'path': desktop
        })
    except Exception as e:
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

@app.route("/setup/activate_window", methods=['POST'])
def activate_window():
    """Activate window"""
    data = request.json
    window_name = data.get("window_name")
    strict = data.get("strict", False)
    by_class_name = data.get("by_class", False)
    
    if not window_name:
        return jsonify({'status': 'error', 'message': 'window_name required'}), 400
    
    try:
        if platform_adapter and hasattr(platform_adapter, 'activate_window'):
            result = platform_adapter.activate_window(window_name, strict=strict)
            if result['status'] == 'success':
                return jsonify(result)
            else:
                return jsonify(result), 400
        else:
            return jsonify({
                'status': 'error',
                'message': f'Window activation not supported on {platform_name}'
            }), 501
    except Exception as e:
        logger.error(f"Window activation failed: {str(e)}")
        return jsonify({'status': 'error', 'message': str(e)}), 500

@app.route("/setup/close_window", methods=["POST"])
def close_window():
    """Close window"""
    data = request.json
    window_name = data.get("window_name")
    strict = data.get("strict", False)
    by_class_name = data.get("by_class", False)
    
    if not window_name:
        return jsonify({'status': 'error', 'message': 'window_name required'}), 400
    
    try:
        if platform_adapter and hasattr(platform_adapter, 'close_window'):
            result = platform_adapter.close_window(window_name, strict=strict)
            if result['status'] == 'success':
                return jsonify(result)
            else:
                return jsonify(result), 404
        else:
            return jsonify({
                'status': 'error',
                'message': f'Window closing not supported on {platform_name}'
            }), 501
    except Exception as e:
        logger.error(f"Window closing failed: {str(e)}")
        return jsonify({'status': 'error', 'message': str(e)}), 500

@app.route('/window_size', methods=['POST'])
def get_window_size():
    """Get window size"""
    try:
        width, height = screenshot_helper.get_screen_size()
        return jsonify({
            'status': 'success',
            'width': width,
            'height': height
        })
    except Exception as e:
        return jsonify({'status': 'error', 'message': str(e)}), 500

@app.route('/wallpaper', methods=['POST'])
@app.route('/setup/change_wallpaper', methods=['POST'])
def set_wallpaper():
    """Set wallpaper"""
    data = request.json
    image_path = data.get('path')
    
    if not image_path:
        return jsonify({'status': 'error', 'message': 'path required'}), 400
    
    try:
        if platform_adapter and hasattr(platform_adapter, 'set_wallpaper'):
            result = platform_adapter.set_wallpaper(image_path)
            if result['status'] == 'success':
                return jsonify(result)
            else:
                return jsonify(result), 400
        else:
            return jsonify({
                'status': 'error',
                'message': f'Wallpaper setting not supported on {platform_name}'
            }), 501
    except Exception as e:
        logger.error(f"Failed to set wallpaper: {str(e)}")
        return jsonify({'status': 'error', 'message': str(e)}), 500

# Screen Recording
@app.route('/start_recording', methods=['POST'])
def start_recording():
    """Start screen recording (supports Linux, macOS, Windows)"""
    global recording_process
    
    # Check if platform adapter supports recording
    if not platform_adapter or not hasattr(platform_adapter, 'start_recording'):
        return jsonify({
            'status': 'error',
            'message': f'Recording not supported on {platform_name}'
        }), 501
    
    # Check if recording is already in progress
    if recording_process and recording_process.poll() is None:
        return jsonify({
            'status': 'error',
            'message': 'Recording is already in progress.'
        }), 400
    
    # Clean up old recording file
    if os.path.exists(recording_path):
        try:
            os.remove(recording_path)
        except OSError as e:
            logger.error(f"Cannot delete old recording file: {e}")
    
    try:
        # Use platform adapter to start recording
        result = platform_adapter.start_recording(recording_path)
        
        if result['status'] == 'success':
            recording_process = result.get('process')
            logger.info("Recording started successfully")
            return jsonify({
                'status': 'success',
                'message': 'Recording started'
            })
        else:
            logger.error(f"Failed to start recording: {result.get('message', 'Unknown error')}")
            return jsonify({
                'status': 'error',
                'message': result.get('message', 'Failed to start recording')
            }), 500
            
    except Exception as e:
        logger.error(f"Failed to start recording: {str(e)}")
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

@app.route('/end_recording', methods=['POST'])
def end_recording():
    """End screen recording (supports Linux, macOS, Windows)"""
    global recording_process
    
    # Check if recording is in progress
    if not recording_process or recording_process.poll() is not None:
        recording_process = None
        return jsonify({
            'status': 'error',
            'message': 'No recording in progress'
        }), 400
    
    try:
        # Use platform adapter to stop recording
        if platform_adapter and hasattr(platform_adapter, 'stop_recording'):
            result = platform_adapter.stop_recording(recording_process)
            recording_process = None
            
            if result['status'] != 'success':
                logger.error(f"Failed to stop recording: {result.get('message', 'Unknown error')}")
                return jsonify(result), 500
        else:
            # Fallback: terminate process directly
            recording_process.send_signal(signal.SIGINT)
            try:
                recording_process.wait(timeout=15)
            except subprocess.TimeoutExpired:
                logger.warning("ffmpeg not responding, force terminating")
                recording_process.kill()
                recording_process.wait()
            recording_process = None
        
        # Check if recording file exists
        # wait for ffmpeg to write the file header
        for _ in range(10):
            if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
                break
            time.sleep(0.5)

        if os.path.exists(recording_path) and os.path.getsize(recording_path) > 0:
            logger.info("Recording ended, file saved")
            return send_file(recording_path, as_attachment=True)
        else:
            logger.error("Recording file is missing or empty")
            return abort(500, description="Recording file is missing or empty")
            
    except Exception as e:
        logger.error(f"Failed to end recording: {str(e)}")
        if recording_process:
            try:
                recording_process.kill()
                recording_process.wait()
            except:
                pass
            recording_process = None
        return jsonify({
            'status': 'error',
            'message': str(e)
        }), 500

@app.route('/terminal', methods=['GET'])
def get_terminal_output():
    """Get terminal output (supports Linux, macOS, Windows)"""
    try:
        if platform_adapter and hasattr(platform_adapter, 'get_terminal_output'):
            output = platform_adapter.get_terminal_output()
            if output:
                return jsonify({'output': output, 'status': 'success'})
            else:
                return jsonify({
                    'status': 'error',
                    'message': f'No terminal output available on {platform_name}',
                    'platform_note': 'Make sure a terminal window is open and active'
                }), 404
        else:
            return jsonify({
                'status': 'error',
                'message': f'Terminal output not supported on {platform_name}'
            }), 501
    except Exception as e:
        logger.error(f"Failed to get terminal output: {str(e)}")
        return jsonify({'status': 'error', 'message': str(e)}), 500


@app.route("/setup/upload", methods=["POST"])
def upload_file():
    """Upload file"""
    if 'file' not in request.files:
        return jsonify({'status': 'error', 'message': 'No file provided'}), 400
    
    file = request.files['file']
    if file.filename == '':
        return jsonify({'status': 'error', 'message': 'No file selected'}), 400
    
    try:
        # Get target path
        target_path = request.form.get('path', os.path.expanduser('~/Desktop'))
        target_path = os.path.expanduser(target_path)
        
        # Ensure directory exists
        os.makedirs(target_path, exist_ok=True)
        
        # Save file
        file_path = os.path.join(target_path, file.filename)
        file.save(file_path)
        
        logger.info(f"File uploaded successfully: {file_path}")
        return jsonify({
            'status': 'success',
            'path': file_path,
            'message': 'File uploaded successfully'
        })
    except Exception as e:
        logger.error(f"File upload failed: {str(e)}")
        return jsonify({'status': 'error', 'message': str(e)}), 500

@app.route("/setup/download_file", methods=["POST"])
def download_file():
    """Download file"""
    data = request.json
    path = data.get('path')
    
    if not path:
        return jsonify({'status': 'error', 'message': 'path required'}), 400
    
    try:
        path = os.path.expanduser(path)
        
        if not os.path.exists(path):
            return jsonify({'status': 'error', 'message': f'File not found: {path}'}), 404
        
        return send_file(path, as_attachment=True)
    except Exception as e:
        logger.error(f"File download failed: {str(e)}")
        return jsonify({'status': 'error', 'message': str(e)}), 500

@app.route("/setup/open_file", methods=['POST'])
def open_file():
    """Open file (using system default application)"""
    data = request.json
    path = data.get('path')
    
    if not path:
        return jsonify({'status': 'error', 'message': 'path required'}), 400
    
    try:
        path = os.path.expanduser(path)
        
        if not os.path.exists(path):
            return jsonify({'status': 'error', 'message': f'File not found: {path}'}), 404
        
        if platform_name == "Darwin":
            subprocess.Popen(['open', path])
        elif platform_name == "Linux":
            subprocess.Popen(['xdg-open', path])
        elif platform_name == "Windows":
            os.startfile(path)
        
        logger.info(f"File opened successfully: {path}")
        return jsonify({
            'status': 'success',
            'message': f'File opened: {path}'
        })
    except Exception as e:
        logger.error(f"File opening failed: {str(e)}")
        return jsonify({'status': 'error', 'message': str(e)}), 500

def print_banner(host: str = "127.0.0.1", port: int = 5000, debug: bool = False):
    """Print startup banner with server information"""
    from anytool.utils.display import print_banner as display_banner, print_section, print_separator, colorize
    
    # STARTUP INFORMATION
    display_banner("AnyTool · Local Server")
    
    server_url = f"http://{host}:{port}"
    
    # Server section
    info_lines = [
        colorize(server_url, 'g', bold=True),
    ]
    if host == '0.0.0.0':
        info_lines.append(f"{colorize('Listening on all interfaces', 'gr')} {colorize('(0.0.0.0:' + str(port) + ')', 'y')}")
    info_lines.append(f"{colorize(platform_name, 'gr')} · {colorize('Debug' if debug else 'Production', 'y' if debug else 'g')}")
    
    print_section("Server", info_lines)
    
    print()
    print_separator()
    print(f"  {colorize('Press Ctrl+C to stop', 'gr')}")
    print()

def run_health_check_async():
    """Asynchronous running health check"""
    def _run():
        from anytool.utils.display import colorize
        time.sleep(2)
        
        print(colorize("\n  - Starting health check...\n", 'c', bold=True))
        
        results = health_checker.check_all(test_endpoints=True)
        
        health_checker.print_results(results, show_endpoint_details=False)
        
        summary = health_checker.get_summary()
        logger.info(f"Health check completed: {summary['fully_available']}/{summary['total']} fully available")
    
    thread = threading.Thread(target=_run, daemon=True)
    thread.start()

def run_server(host: str = "127.0.0.1", port: int = 5000, debug: bool = False):
    """
    Start desktop control server
    
    Args:
        host: Listening address (127.0.0.1 for local, 0.0.0.0 for all interfaces)
        port: Listening port
        debug: Debug mode (display detailed logs)
    """
    global health_checker
    
    # Initialize health_checker
    base_url = f"http://{host if host != '0.0.0.0' else '127.0.0.1'}:{port}"
    health_checker = HealthChecker(feature_checker, base_url, auto_cleanup=False)
    
    print_banner(host, port, debug)

    if not debug:
        run_health_check_async()
    
    app.run(host=host, port=port, debug=debug, threaded=True)

def main():
    import argparse
    from anytool.config.utils import get_config_value
    
    parser = argparse.ArgumentParser(
        description='AnyTool Local Server - Desktop Control Server'
    )
    parser.add_argument('--host', type=str, default='127.0.0.1',
                       help='Server host (default: 127.0.0.1)')
    parser.add_argument('--port', type=int, default=5000,
                       help='Server port (default: 5000)')
    parser.add_argument('--debug', action='store_true',
                       help='Enable debug mode')
    parser.add_argument('--config', type=str,
                       help='Path to config.json file')
    
    args = parser.parse_args()
    
    config_path = args.config
    if not config_path:
        config_path = os.path.join(os.path.dirname(__file__), 'config.json')
    
    if os.path.exists(config_path):
        try:
            with open(config_path, 'r') as f:
                config = json.load(f)
                server_config = get_config_value(config, 'server', {})
                
                host = args.host if args.host != '127.0.0.1' else get_config_value(server_config, 'host', '127.0.0.1')
                port = args.port if args.port != 5000 else get_config_value(server_config, 'port', 5000)
                debug = args.debug or get_config_value(server_config, 'debug', False)
                
                run_server(host=host, port=port, debug=debug)
        except Exception as e:
            logger.error(f"Failed to load config: {e}")
            run_server(host=args.host, port=args.port, debug=args.debug)
    else:
        run_server(host=args.host, port=args.port, debug=args.debug)


if __name__ == "__main__":
    main()

================================================
FILE: anytool/local_server/platform_adapters/__init__.py
================================================
import platform
from typing import Optional, Any

platform_name = platform.system()

if platform_name == "Darwin":
    try:
        from .macos_adapter import MacOSAdapter as PlatformAdapter
        ADAPTER_AVAILABLE = True
    except ImportError:
        PlatformAdapter = None
        ADAPTER_AVAILABLE = False
elif platform_name == "Linux":
    try:
        from .linux_adapter import LinuxAdapter as PlatformAdapter
        ADAPTER_AVAILABLE = True
    except ImportError:
        PlatformAdapter = None
        ADAPTER_AVAILABLE = False
elif platform_name == "Windows":
    try:
        from .windows_adapter import WindowsAdapter as PlatformAdapter
        ADAPTER_AVAILABLE = True
    except ImportError:
        PlatformAdapter = None
        ADAPTER_AVAILABLE = False
else:
    PlatformAdapter = None
    ADAPTER_AVAILABLE = False

def get_platform_adapter() -> Optional[Any]:
    if ADAPTER_AVAILABLE and PlatformAdapter:
        return PlatformAdapter()
    return None

__all__ = ["PlatformAdapter", "get_platform_adapter", "ADAPTER_AVAILABLE"]


================================================
FILE: anytool/local_server/platform_adapters/linux_adapter.py
================================================
import subprocess
import os
from typing import Dict, Any, Optional, List
from anytool.utils.logging import Logger
from PIL import Image
import pyautogui

try:
    import pyatspi
    from pyatspi import Accessible, StateType, STATE_SHOWING
    import Xlib
    from Xlib import display, X
    LINUX_LIBS_AVAILABLE = True
except ImportError:
    LINUX_LIBS_AVAILABLE = False

logger = Logger.get_logger(__name__)


class LinuxAdapter:
    
    def __init__(self):
        if not LINUX_LIBS_AVAILABLE:
            logger.warning("Linux libraries are not fully installed, some features may not be available")
        self.available = LINUX_LIBS_AVAILABLE
    
    def capture_screenshot_with_cursor(self, output_path: str) -> bool:
        """
        Use pyautogui + pyxcursor to capture screenshot (including cursor)
        
        Args:
            output_path: Output file path
            
        Returns:
            Whether the screenshot is successful
        """
        try:
            # Use pyautogui to capture screenshot
            screenshot = pyautogui.screenshot()
            
            # Try to add cursor
            try:
                # Import pyxcursor (should be in the same directory)
                import sys
                import os
                sys.path.insert(0, os.path.dirname(__file__))
                
                from pyxcursor import Xcursor
                
                cursor_obj = Xcursor()
                imgarray = cursor_obj.getCursorImageArrayFast()
                cursor_img = Image.fromarray(imgarray)
                cursor_x, cursor_y = pyautogui.position()
                screenshot.paste(cursor_img, (cursor_x, cursor_y), cursor_img)
                logger.info("Linux screenshot successfully (with cursor)")
            except Exception as e:
                logger.warning(f"Failed to add cursor to screenshot: {e}")
                logger.info("Linux screenshot successfully (without cursor)")
            
            screenshot.save(output_path)
            return True
            
        except Exception as e:
            logger.error(f"Linux screenshot failed: {e}")
            return False
    
    def activate_window(self, window_name: str, strict: bool = False, by_class: bool = False) -> Dict[str, Any]:
        """
        Activate window (Linux uses wmctrl)
        
        Args:
            window_name: Window name
            strict: Whether to strictly match
            by_class: Whether to match by class name
            
        Returns:
            Result dictionary
        """
        try:
            # Build wmctrl command
            flags = f"-{'x' if by_class else ''}{'F' if strict else ''}a"
            cmd = ["wmctrl", flags, window_name]
            
            subprocess.run(cmd, check=True, timeout=5)
            logger.info(f"Linux window activated successfully: {window_name}")
            return {'status': 'success', 'message': 'Window activated'}
            
        except subprocess.CalledProcessError as e:
            logger.warning(f"wmctrl command execution failed: {e}")
            return {'status': 'error', 'message': f'Window {window_name} not found or wmctrl failed'}
        except FileNotFoundError:
            logger.error("wmctrl not installed, please install: sudo apt install wmctrl")
            return {'status': 'error', 'message': 'wmctrl not installed'}
        except Exception as e:
            logger.error(f"Linux window activation failed: {e}")
            return {'status': 'error', 'message': str(e)}
    
    def close_window(self, window_name: str, strict: bool = False, by_class: bool = False) -> Dict[str, Any]:
        """
        Close window (Linux uses wmctrl)
        
        Args:
            window_name: Window name
            strict: Whether to strictly match
            by_class: Whether to match by class name
            
        Returns:
            Result dictionary
        """
        try:
            # Build wmctrl command
            flags = f"-{'x' if by_class else ''}{'F' if strict else ''}c"
            cmd = ["wmctrl", flags, window_name]
            
            subprocess.run(cmd, check=True, timeout=5)
            logger.info(f"Linux window closed successfully: {window_name}")
            return {'status': 'success', 'message': 'Window closed'}
            
        except subprocess.CalledProcessError as e:
            logger.warning(f"wmctrl command execution failed: {e}")
            return {'status': 'error', 'message': f'Window {window_name} not found or wmctrl failed'}
        except FileNotFoundError:
            logger.error("wmctrl not installed")
            return {'status': 'error', 'message': 'wmctrl not installed'}
        except Exception as e:
            logger.error(f"Linux window close failed: {e}")
            return {'status': 'error', 'message': str(e)}
    
    def get_accessibility_tree(self, max_depth: int = 10, max_width: int = 50) -> Dict[str, Any]:
        """
        Get Linux accessibility tree (using AT-SPI)
        
        Args:
            max_depth: Maximum depth
            max_width: Maximum number of child elements per level
            
        Returns:
            Accessibility tree data
        """
        if not LINUX_LIBS_AVAILABLE:
            return {'error': 'Linux accessibility libraries not available'}
        
        try:
            # Get desktop root node
            desktop = pyatspi.Registry.getDesktop(0)
            
            # Serialize accessibility tree
            tree = self._serialize_atspi_element(
                desktop, 
                depth=0, 
                max_depth=max_depth,
                max_width=max_width
            )
            
            return {
                'tree': tree,
                'platform': 'Linux'
            }
            
        except Exception as e:
            logger.error(f"Linux get accessibility tree failed: {e}")
            return {'error': str(e)}
    
    def _serialize_atspi_element(
        self, 
        element: Accessible, 
        depth: int = 0, 
        max_depth: int = 10,
        max_width: int = 50
    ) -> Optional[Dict[str, Any]]:
        """
        Serialize AT-SPI element to dictionary
        
        Args:
            element: AT-SPI accessible element
            depth: Current depth
            max_depth: Maximum depth
            max_width: Maximum width
            
        Returns:
            Serialized dictionary
        """
        if depth > max_depth:
            return None
        
        try:
            result = {
                'depth': depth,
                'role': element.getRoleName(),
                'name': element.name,
            }
            
            # Get states
            try:
                states = element.getState().get_states()
                result['states'] = [StateType._enum_lookup[st].split('_', 1)[1].lower() 
                                   for st in states if st in StateType._enum_lookup]
            except:
                result['states'] = []
            
            # Get attributes
            try:
                attributes = element.get_attributes()
                if attributes:
                    result['attributes'] = dict(attributes)
            except:
                result['attributes'] = {}
            
            # Get position and size (if visible)
            if STATE_SHOWING in element.getState().get_states():
                try:
                    component = element.queryComponent()
                    bbox = component.getExtents(pyatspi.XY_SCREEN)
                    result['position'] = {'x': bbox[0], 'y': bbox[1]}
                    result['size'] = {'width': bbox[2], 'height': bbox[3]}
                except:
                    pass
            
            # Get text content
            try:
                text_obj = element.queryText()
                text = text_obj.getText(0, text_obj.characterCount)
                if text:
                    result['text'] = text.replace("\ufffc", "").replace("\ufffd", "")
            except:
                pass
            
            # Recursively get child elements
            result['children'] = []
            try:
                child_count = min(element.childCount, max_width)
                for i in range(child_count):
                    try:
                        child = element.getChildAtIndex(i)
                        child_data = self._serialize_atspi_element(
                            child, 
                            depth + 1, 
                            max_depth,
                            max_width
                        )
                        if child_data:
                            result['children'].append(child_data)
                    except Exception as e:
                        logger.debug(f"Cannot serialize child element {i}: {e}")
                        continue
            except Exception as e:
                logger.debug(f"Cannot get child elements: {e}")
            
            return result
            
        except Exception as e:
            logger.debug(f"Failed to serialize element (depth={depth}): {e}")
            return None
    
    def get_screen_size(self) -> Dict[str, int]:
        """
        Get screen size
        
        Returns:
            Screen size dictionary
        """
        try:
            if LINUX_LIBS_AVAILABLE:
                d = display.Display()
                screen = d.screen()
                return {
                    'width': screen.width_in_pixels,
                    'height': screen.height_in_pixels
                }
            else:
                # Use pyautogui as fallback
                size = pyautogui.size()
                return {'width': size.width, 'height': size.height}
                
        except Exception as e:
            logger.error(f"Failed to get screen size: {e}")
            return {'width': 1920, 'height': 1080}  # Default value
    
    def list_windows(self) -> List[Dict[str, Any]]:
        """
        List all windows
        
        Returns:
            Window list
        """
        try:
            result = subprocess.run(
                ['wmctrl', '-l'],
                capture_output=True,
                text=True,
                check=True
            )
            
            windows = []
            for line in result.stdout.strip().split('\n'):
                if line:
                    parts = line.split(None, 3)
                    if len(parts) >= 4:
                        windows.append({
                            'id': parts[0],
                            'desktop': parts[1],
                            'hostname': parts[2],
                            'title': parts[3]
                        })
            
            return windows
            
        except FileNotFoundError:
            logger.error("wmctrl not installed")
            return []
        except Exception as e:
            logger.error(f"List windows failed: {e}")
            return []
    
    def get_terminal_output(self) -> Optional[str]:
        """
        Get terminal output (GNOME Terminal)
        
        Returns:
            Terminal output content
        """
        if not LINUX_LIBS_AVAILABLE:
            return None
        
        try:
            desktop = pyatspi.Registry.getDesktop(0)
            
            # Find gnome-terminal-server
            for app in desktop:
                if app.getRoleName() == "application" and app.name == "gnome-terminal-server":
                    for frame in app:
                        if frame.getRoleName() == "frame" and frame.getState().contains(pyatspi.STATE_ACTIVE):
                            # Find terminal component
                            for component in self._find_terminals(frame):
                                try:
                                    text_obj = component.queryText()
                                    output = text_obj.getText(0, text_obj.characterCount)
                                    return output.rstrip() if output else None
                                except:
                                    continue
            
            return None
            
        except Exception as e:
            logger.error(f"Failed to get terminal output: {e}")
            return None
    
    def _find_terminals(self, element) -> List[Accessible]:
        """Recursively find terminal components"""
        terminals = []
        try:
            if element.getRoleName() == "terminal":
                terminals.append(element)
            
            for i in range(element.childCount):
                child = element.getChildAtIndex(i)
                terminals.extend(self._find_terminals(child))
        except:
            pass
        
        return terminals
    
    def set_wallpaper(self, image_path: str) -> Dict[str, Any]:
        """
        Set desktop wallpaper (GNOME)
        
        Args:
            image_path: Image path
            
        Returns:
            Result dictionary
        """
        try:
            image_path = os.path.expanduser(image_path)
            image_path = os.path.abspath(image_path)
            
            if not os.path.exists(image_path):
                return {'status': 'error', 'message': f'Image not found: {image_path}'}
            
            # Use gsettings to set wallpaper (GNOME)
            subprocess.run([
                'gsettings', 'set', 
                'org.gnome.desktop.background', 
                'picture-uri', 
                f'file://{image_path}'
            ], check=True, timeout=5)
            
            logger.info(f"Linux wallpaper set successfully: {image_path}")
            return {'status': 'success', 'message': 'Wallpaper set successfully'}
            
        except Exception as e:
            logger.error(f"Linux set wallpaper failed: {e}")
            return {'status': 'error', 'message': str(e)}
    
    def get_system_info(self) -> Dict[str, Any]:
        """
        Get Linux system information
        
        Returns:
            System information dictionary
        """
        try:
            # Get distribution information
            try:
                with open('/etc/os-release', 'r') as f:
                    os_info = {}
                    for line in f:
                        if '=' in line:
                            key, value = line.strip().split('=', 1)
                            os_info[key] = value.strip('"')
                distro = os_info.get('PRETTY_NAME', 'Unknown Linux')
            except:
                distro = 'Unknown Linux'
            
            # Get kernel version
            kernel = subprocess.run(
                ['uname', '-r'],
                capture_output=True,
                text=True
            ).stdout.strip()
            
            return {
                'platform': 'Linux',
                'distro': distro,
                'kernel': kernel,
                'available': self.available
            }
            
        except Exception as e:
            logger.error(f"Failed to get system information: {e}")
            return {
                'platform': 'Linux',
                'error': str(e)
            }
    
    def start_recording(self, output_path: str) -> Dict[str, Any]:
        try:
            try:
                subprocess.run(['ffmpeg', '-version'], 
                             capture_output=True, 
                             check=True,
                             timeout=5)
            except (subprocess.CalledProcessError, FileNotFoundError):
                return {
                    'status': 'error',
                    'message': 'ffmpeg not installed. Install with: sudo apt install ffmpeg'
                }
            
            try:
                if LINUX_LIBS_AVAILABLE:
                    from Xlib import display as xdisplay
                    d = xdisplay.Display()
                    screen_width = d.screen().width_in_pixels
                    screen_height = d.screen().height_in_pixels
                else:
                    # use pyautogui as fallback
                    size = pyautogui.size()
                    screen_width = size.width
                    screen_height = size.height
            except:
                screen_width, screen_height = 1920, 1080
            
            command = [
                'ffmpeg',
                '-y',  
                '-f', 'x11grab',
                '-draw_mouse', '1',
                '-s', f'{screen_width}x{screen_height}',  
                '-i', ':0.0',  
                '-c:v', 'libx264',  
                '-preset', 'ultrafast',  
                '-r', '30',  
                output_path
            ]
            
            process = subprocess.Popen(
                command,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.PIPE,
                text=True
            )
            
            import time
            time.sleep(1)
            
            if process.poll() is not None:
                error_output = process.stderr.read() if process.stderr else "Unknown error"
                return {
                    'status': 'error',
                    'message': f'Failed to start recording: {error_output}'
                }
            
            logger.info(f"Linux recording started: {output_path}")
            return {
                'status': 'success',
                'message': 'Recording started',
                'process': process
            }
            
        except Exception as e:
            logger.error(f"Linux start recording failed: {e}")
            return {
                'status': 'error',
                'message': str(e)
            }
    
    def stop_recording(self, process) -> Dict[str, Any]:
        try:
            import signal
            
            if not process or process.poll() is not None:
                return {
                    'status': 'error',
                    'message': 'No recording in progress'
                }
            
            process.send_signal(signal.SIGINT)
            
            try:
                process.wait(timeout=15)
            except subprocess.TimeoutExpired:
                logger.warning("ffmpeg did not respond to SIGINT, killing process")
                process.kill()
                process.wait()
            
            logger.info("Linux recording stopped successfully")
            return {
                'status': 'success',
                'message': 'Recording stopped'
            }
            
        except Exception as e:
            logger.error(f"Linux stop recording failed: {e}")
            return {
                'status': 'error',
                'message': str(e)
            }
    
    def get_running_applications(self) -> List[Dict[str, str]]:
        """
        Get list of all running applications
        
        Returns:
            Application list
        """
        try:
            import psutil
            
            apps = []
            seen_names = set()
            
            for proc in psutil.process_iter(['pid', 'name', 'exe', 'cmdline']):
                try:
                    pinfo = proc.info
                    name = pinfo['name']
                    exe = pinfo['exe']
                    
                    # Skip kernel processes and system daemons
                    if not exe or name.startswith('['):
                        continue
                    
                    # Skip duplicates
                    if name in seen_names:
                        continue
                    
                    seen_names.add(name)
                    
                    apps.append({
                        'name': name,
                        'pid': pinfo['pid'],
                        'path': exe or '',
                        'cmdline': ' '.join(pinfo.get('cmdline', []))
                    })
                    
                except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
                    pass
            
            return apps
            
        except ImportError:
            logger.warning("psutil not installed, cannot get running applications")
            return []
        except Exception as e:
            logger.error(f"Failed to get running applications list: {e}")
            return []

================================================
FILE: anytool/local_server/platform_adapters/macos_adapter.py
================================================
import subprocess
import os
from typing import Dict, Any, Optional, List
from anytool.utils.logging import Logger

try:
    import AppKit
    import atomacos
    MACOS_LIBS_AVAILABLE = True
except ImportError:
    MACOS_LIBS_AVAILABLE = False

logger = Logger.get_logger(__name__)

_warning_shown = False


class MacOSAdapter:
    def __init__(self):
        global _warning_shown
        if not MACOS_LIBS_AVAILABLE and not _warning_shown:
            logger.warning("macOS libraries are not fully installed, some features may not be available")
            logger.info("To install missing libraries, run: pip install pyobjc-framework-Cocoa atomacos")
            _warning_shown = True
        self.available = MACOS_LIBS_AVAILABLE
    
    def capture_screenshot_with_cursor(self, output_path: str) -> bool:
        """
        Capture screenshot with cursor using macOS native screencapture command
        
        Args:
            output_path: Output file path
            
        Returns:
            Whether successful
        """
        try:
            # -C parameter includes cursor, -x disables sound, -m captures main display
            subprocess.run(["screencapture", "-C", "-x", "-m", output_path], check=True)
            logger.info(f"macOS screenshot successfully: {output_path}")
            return True
        except Exception as e:
            logger.error(f"macOS screenshot failed: {e}")
            return False
    
    def activate_window(self, window_name: str, strict: bool = False) -> Dict[str, Any]:
        """
        Activate window (macOS uses AppleScript)
        
        Args:
            window_name: Window name or application name
            strict: Whether to strictly match
            
        Returns:
            Result dictionary
        """
        try:
            # Try to activate application
            script = f'''
            tell application "System Events"
                set appName to "{window_name}"
                try
                    -- Try to activate application by name
                    set frontmost of first process whose name is appName to true
                    return "success"
                on error
                    -- Try to find window by title
                    set foundWindow to false
                    repeat with theProcess in (every process whose visible is true)
                        try
                            tell theProcess
                                repeat with theWindow in windows
                                    if name of theWindow contains appName then
                                        set frontmost of theProcess to true
                                        set foundWindow to true
                                        exit repeat
                                    end if
                                end repeat
                            end tell
                        end try
                        if foundWindow then exit repeat
                    end repeat
                    
                    if foundWindow then
                        return "success"
                    else
                        return "not found"
                    end if
                end try
            end tell
            '''
            
            result = subprocess.run(
                ['osascript', '-e', script],
                capture_output=True,
                text=True,
                timeout=10
            )
            
            if "success" in result.stdout:
                logger.info(f"macOS window activated successfully: {window_name}")
                return {'status': 'success', 'message': 'Window activated'}
            else:
                logger.warning(f"macOS window not found: {window_name}")
                return {'status': 'error', 'message': f'Window {window_name} not found'}
                
        except Exception as e:
            logger.error(f"macOS window activation failed: {e}")
            return {'status': 'error', 'message': str(e)}
    
    def close_window(self, window_name: str, strict: bool = False) -> Dict[str, Any]:
        """
        Close window or application (macOS uses AppleScript)
        
        Args:
            window_name: Window name or application name
            strict: Whether to strictly match
            
        Returns:
            Result dictionary
        """
        try:
            # Try to exit application
            script = f'''
            tell application "{window_name}"
                quit
            end tell
            '''
            
            subprocess.run(['osascript', '-e', script], check=True, timeout=5)
            logger.info(f"macOS window/application closed successfully: {window_name}")
            return {'status': 'success', 'message': 'Window/Application closed'}
            
        except subprocess.TimeoutExpired:
            # If timeout, try to force terminate
            try:
                script_force = f'''
                tell application "{window_name}"
                    quit
                end tell
                do shell script "killall '{window_name}'"
                '''
                subprocess.run(['osascript', '-e', script_force], timeout=5)
                logger.info(f"macOS application force closed: {window_name}")
                return {'status': 'success', 'message': 'Application force closed'}
            except Exception as e2:
                logger.error(f"macOS force close failed: {e2}")
                return {'status': 'error', 'message': str(e2)}
                
        except Exception as e:
            logger.error(f"macOS close window failed: {e}")
            return {'status': 'error', 'message': str(e)}
    
    def get_accessibility_tree(self, max_depth: int = 10) -> Dict[str, Any]:
        """
        Get macOS accessibility tree
        
        Args:
            max_depth: Maximum depth
            
        Returns:
            Accessibility tree data
        """
        if not MACOS_LIBS_AVAILABLE:
            return {'error': 'macOS accessibility libraries not available'}
        
        try:
            # Get frontmost application
            workspace = AppKit.NSWorkspace.sharedWorkspace()
            active_app = workspace.activeApplication()
            
            if not active_app:
                return {'error': 'No active application'}
            
            app_name = active_app.get('NSApplicationName', 'Unknown')
            bundle_id = active_app.get('NSApplicationBundleIdentifier', '')
            
            logger.info(f"Getting accessibility tree: {app_name} ({bundle_id})")
            
            # Use atomacos to get application reference
            try:
                if bundle_id:
                    app_ref = atomacos.getAppRefByBundleId(bundle_id)
                else:
                    # If no bundle_id, try to find by name
                    return {'error': 'Cannot find application without bundle ID'}
                
                # Serialize accessibility tree
                tree = self._serialize_ax_element(app_ref, depth=0, max_depth=max_depth)
                
                return {
                    'app_name': app_name,
                    'bundle_id': bundle_id,
                    'tree': tree,
                    'platform': 'macOS'
                }
                
            except Exception as e:
                logger.error(f"Cannot get app reference: {e}")
                return {
                    'error': f'Cannot get app reference: {e}',
                    'app_name': app_name,
                    'bundle_id': bundle_id
                }
                
        except Exception as e:
            logger.error(f"macOS get accessibility tree failed: {e}")
            return {'error': str(e)}
    
    def _serialize_ax_element(self, element, depth: int = 0, max_depth: int = 10) -> Optional[Dict[str, Any]]:
        """
        Serialize macOS accessibility element to dictionary
        
        Args:
            element: AX element
            depth: Current depth
            max_depth: Maximum depth
            
        Returns:
            Serialized dictionary
        """
        if depth > max_depth:
            return None
        
        try:
            result = {
                'depth': depth
            }
            
            # Get common attributes
            try:
                result['role'] = element.AXRole if hasattr(element, 'AXRole') else 'unknown'
            except:
                result['role'] = 'unknown'
            
            try:
                result['title'] = element.AXTitle if hasattr(element, 'AXTitle') else ''
            except:
                result['title'] = ''
            
            try:
                result['description'] = element.AXDescription if hasattr(element, 'AXDescription') else ''
            except:
                result['description'] = ''
            
            try:
                result['value'] = str(element.AXValue) if hasattr(element, 'AXValue') else ''
            except:
                result['value'] = ''
            
            try:
                result['enabled'] = element.AXEnabled if hasattr(element, 'AXEnabled') else False
            except:
                result['enabled'] = False
            
            try:
                result['focused'] = element.AXFocused if hasattr(element, 'AXFocused') else False
            except:
                result['focused'] = False
            
            # Position and size
            try:
                if hasattr(element, 'AXPosition'):
                    pos = element.AXPosition
                    result['position'] = {'x': pos.x, 'y': pos.y}
            except:
                pass
            
            try:
                if hasattr(element, 'AXSize'):
                    size = element.AXSize
                    result['size'] = {'width': size.width, 'height': size.height}
            except:
                pass
            
            # Recursively get child elements (with limit)
            result['children'] = []
            try:
                if hasattr(element, 'AXChildren') and element.AXChildren:
                    for i, child in enumerate(element.AXChildren[:30]):  # Limit to max 30 child elements
                        try:
                            child_data = self._serialize_ax_element(child, depth + 1, max_depth)
                            if child_data:
                                result['children'].append(child_data)
                        except Exception as e:
                            logger.debug(f"Cannot serialize child element {i}: {e}")
                            continue
            except Exception as e:
                logger.debug(f"Cannot get child elements: {e}")
            
            return result
            
        except Exception as e:
            logger.debug(f"Failed to serialize element (depth={depth}): {e}")
            return None
    
    def get_running_applications(self) -> List[Dict[str, str]]:
        """
        Get list of all running applications
        
        Returns:
            Application list
        """
        try:
            workspace = AppKit.NSWorkspace.sharedWorkspace()
            running_apps = workspace.runningApplications()
            
            apps = []
            for app in running_apps:
                if app.activationPolicy() == AppKit.NSApplicationActivationPolicyRegular:
                    apps.append({
                        'name': app.localizedName() or 'Unknown',
                        'bundle_id': app.bundleIdentifier() or '',
                        'pid': app.processIdentifier(),
                        'active': app.isActive()
                    })
            
            return apps
            
        except Exception as e:
            logger.error(f"Failed to get running applications list: {e}")
            return []
    
    def set_wallpaper(self, image_path: str) -> Dict[str, Any]:
        """
        Set desktop wallpaper
        
        Args:
            image_path: Image path
            
        Returns:
            Result dictionary
        """
        try:
            image_path = os.path.expanduser(image_path)
            
            if not os.path.exists(image_path):
                return {'status': 'error', 'message': f'Image not found: {image_path}'}
            
            # Use AppleScript to set wallpaper
            script = f'''
            tell application "System Events"
                tell every desktop
                    set picture to "{image_path}"
                end tell
            end tell
            '''
            
            subprocess.run(['osascript', '-e', script], check=True, timeout=10)
            logger.info(f"macOS wallpaper set successfully: {image_path}")
            return {'status': 'success', 'message': 'Wallpaper set successfully'}
            
        except Exception as e:
            logger.error(f"macOS set wallpaper failed: {e}")
            return {'status': 'error', 'message': str(e)}
    
    def get_system_info(self) -> Dict[str, Any]:
        """
        Get macOS system information
        
        Returns:
            System information dictionary
        """
        try:
            # Get macOS version
            version = subprocess.run(
                ['sw_vers', '-productVersion'],
                capture_output=True,
                text=True
            ).stdout.strip()
            
            # Get hardware information
            model = subprocess.run(
                ['sysctl', '-n', 'hw.model'],
                capture_output=True,
                text=True
            ).stdout.strip()
            
            return {
                'platform': 'macOS',
                'version': version,
                'model': model,
                'available': self.available
            }
            
        except Exception as e:
            logger.error(f"Failed to get system information: {e}")
            return {
                'platform': 'macOS',
                'error': str(e)
            }
    
    def _detect_screen_device(self) -> str:
        """
        Return the screen device number of avfoundation, like '1:none'
        
        On macOS, ffmpeg -f avfoundation -list_devices true -i "" will list all devices:
        - AVFoundation video devices (usually the camera is [0])
        - AVFoundation audio devices  
        - The screen capture device usually displays as "Capture screen X", numbered from [1]
        """
        try:
            probe = subprocess.run(
                ['ffmpeg', '-f', 'avfoundation', '-list_devices', 'true', '-i', ''],
                stderr=subprocess.PIPE, text=True, timeout=5
            )
            
            # Find all "Capture screen" devices
            screen_devices = []
            for line in probe.stderr.splitlines():
                # Match lines like "[AVFoundation indev @ 0x...] [1] Capture screen 0"
                if 'Capture screen' in line and '[AVFoundation' in line:
                    # Extract device number from square brackets
                    import re
                    # Find pattern like "] [number] Capture screen"
                    match = re.search(r'\]\s*\[(\d+)\]\s*Capture screen', line)
                    if match:
                        device_id = match.group(1)
                        screen_devices.append(device_id)
                        logger.info(f"Found screen capture device: {device_id} - {line.strip()}")
            
            # Use first found screen capture device
            if screen_devices:
                device = f'{screen_devices[0]}:none'
                logger.info(f"Using screen capture device: {device}")
                return device
            else:
                logger.warning("No screen capture device found, using default '1:none'")
                return '1:none'  # Usually screen capture is device 1
                
        except Exception as e:
            logger.warning(f"Failed to detect screen device: {e}, using default '1:none'")
            return '1:none'

    def start_recording(self, output_path: str) -> Dict[str, Any]:
        try:
            # Check if libx264 encoder is available
            result = subprocess.run(
                ['ffmpeg', '-encoders'],
                capture_output=True,
                text=True,
                timeout=5
            )
            has_libx264 = 'libx264' in result.stdout
            
            # Get screen resolution
            try:
                if MACOS_LIBS_AVAILABLE:
                    from AppKit import NSScreen
                    screen = NSScreen.mainScreen()
                    frame = screen.frame()
                    width = int(frame.size.width)
                    height = int(frame.size.height)
                    logger.info(f"Screen resolution: {width}x{height}")
                else:
                    width, height = 1920, 1080
                    logger.info(f"Using default resolution: {width}x{height}")
            except:
                width, height = 1920, 1080
                logger.info(f"Using default resolution: {width}x{height}")
            
            # Detect screen capture device
            screen_dev = self._detect_screen_device()
            logger.info(f"Screen capture device: {screen_dev}")
            
            # Build ffmpeg command
            command = [
                'ffmpeg', '-y',
                '-f', 'avfoundation',
                '-capture_cursor', '1',
                '-capture_mouse_clicks', '1',
                '-framerate', '30',
                '-i', screen_dev,  # Use detected screen device
            ]
            
            if has_libx264:
                command.extend(['-c:v', 'libx264', '-pix_fmt', 'yuv420p'])
                logger.info("Using libx264 encoder")
            else:
                command.extend(['-c:v', 'mpeg4'])
                logger.info("Using mpeg4 encoder")
            
            command.extend(['-r', '30', output_path])
            
            logger.info(f"Starting recording with command: {' '.join(command)}")
            
            process = subprocess.Popen(
                command,
                stdin=subprocess.PIPE,  
                stdout=subprocess.DEVNULL,
                stderr=subprocess.PIPE,
                text=True
            )
            
            import time
            time.sleep(1.5)  # Wait for a longer time to ensure ffmpeg starts

            # Check if process exited early
            if process.poll() is not None:
                err = process.stderr.read() if process.stderr else ""
                logger.error(f"FFmpeg exited early with stderr: {err}")
                
                if "Operation not permitted" in err or "Screen Recording" in err:
                    return {
                        "status": "error",
                        "message": "Screen-recording permission denied. Please grant permission in System Settings → Privacy & Security → Screen Recording."
                    }
                
                # Check if it's a device error
                if "Input/output error" in err or "Invalid argument" in err or "does not exist" in err:
                    return {
                        "status": "error",
                        "message": f"Invalid screen capture device. Please ensure screen recording is enabled. Error: {err[:200]}"
                    }
                
                error_output = err or "Unknown error"
                return {
                    'status': 'error',
                    'message': f'Failed to start recording: {error_output[:300]}'
                }
            
            logger.info(f"macOS recording started successfully: {output_path}")
            return {
                'status': 'success',
                'message': 'Recording started',
                'process': process
            }
            
        except Exception as e:
            logger.error(f"macOS start recording failed: {e}")
            return {
                'status': 'error',
                'message': str(e)
            }
    
    def stop_recording(self, process) -> Dict[str, Any]:
        try:
            import signal
            import time
            
            if not process or process.poll() is not None:
                return {
                    'status': 'error',
                    'message': 'No recording in progress'
                }

            try:
                process.stdin.write('q')
                process.stdin.flush()
                logger.info("Sent 'q' command to ffmpeg")
                
                process.wait(timeout=5)
                logger.info("ffmpeg exited gracefully")
                time.sleep(0.2)   # give ffmpeg time to flush the file
            
            except subprocess.TimeoutExpired:
                logger.warning("ffmpeg did not respond to 'q', trying SIGINT")
                
                process.send_signal(signal.SIGINT)
                try:
                    process.wait(timeout=20)
                    logger.info("ffmpeg responded to SIGINT")
                except subprocess.TimeoutExpired:
                    logger.warning("ffmpeg did not respond to SIGINT, killing process")
                    process.kill()
                    process.wait()
            
            except Exception as e:
                logger.warning(f"Failed to send 'q': {e}, trying SIGINT")
                process.send_signal(signal.SIGINT)
                try:
                    process.wait(timeout=20)
                except subprocess.TimeoutExpired:
                    logger.warning("Killing ffmpeg")
                    process.kill()
                    process.wait()
            
            time.sleep(0.5)
            
            logger.info("macOS recording stopped successfully")
            return {
                'status': 'success',
                'message': 'Recording stopped'
            }
            
        except Exception as e:
            logger.error(f"macOS stop recording failed: {e}")
            return {
                'status': 'error',
                'message': str(e)
            }
    
    def list_windows(self) -> List[Dict[str, Any]]:
        """
        List all windows
        
        Returns:
            Window list
        """
        try:
            # Use AppleScript to get window list
            script = '''
            tell application "System Events"
                set windowList to {}
                repeat with theProcess in (every process whose visible is true)
                    try
                        set processName to name of theProcess
                        tell theProcess
                            repeat with theWindow in windows
                                try
                                    set windowTitle to name of theWindow
                                    set windowInfo to {processName, windowTitle}
                                    set end of windowList to windowInfo
                                end try
                            end repeat
                        end tell
                    end try
                end repeat
                return windowList
            end tell
            '''
            
            result = subprocess.run(
                ['osascript', '-e', script],
                capture_output=True,
                text=True,
                timeout=10
            )
            
            windows = []
            if result.returncode == 0 and result.stdout:
                # Parse AppleScript output: "app1, window1, app2, window2"
                output = result.stdout.strip()
                if output:
                    # AppleScript returns comma-separated list
                    items = [item.strip() for item in output.split(',')]
                    # Group by pairs (app, window)
                    for i in range(0, len(items), 2):
                        if i + 1 < len(items):
                            windows.append({
                                'app_name': items[i],
                                'window_title': items[i + 1]
                            })
            
            return windows
            
        except Exception as e:
            logger.error(f"List windows failed: {e}")
            return []
    
    def get_terminal_output(self) -> Optional[str]:
        """
        Get terminal output (macOS Terminal.app or iTerm2)
        
        Returns:
            Terminal output content
        """
        try:
            # Try to get Terminal.app output first
            script = '''
            tell application "Terminal"
                if (count of windows) > 0 then
                    try
                        set currentTab to selected tab of front window
                        set terminalOutput to contents of currentTab
                        return terminalOutput
                    on error
                        return ""
                    end try
                else
                    return ""
                end if
            end tell
            '''
            
            result = subprocess.run(
                ['osascript', '-e', script],
                capture_output=True,
                text=True,
                timeout=5
            )
            
            if result.returncode == 0 and result.stdout:
                output = result.stdout.strip()
                if output:
                    return output
            
            # Try iTerm2 if Terminal.app failed
            iterm_script = '''
            tell application "iTerm"
                if (count of windows) > 0 then
                    try
                        tell current session of current window
                            set terminalOutput to contents
                            return terminalOutput
                        end tell
                    on error
                        return ""
                    end try
                else
                    return ""
                end if
            end tell
            '''
            
            result = subprocess.run(
                ['osascript', '-e', iterm_script],
                capture_output=True,
                text=True,
                timeout=5
            )
            
            if result.returncode == 0 and result.stdout:
                output = result.stdout.strip()
                if output:
                    return output
            
            return None
            
        except Exception as e:
            logger.error(f"Failed to get terminal output: {e}")
            return None

================================================
FILE: anytool/local_server/platform_adapters/pyxcursor.py
================================================
import os
import ctypes
import ctypes.util
import numpy as np

# A helper function to convert data from Xlib to byte array.
import struct, array

# Define ctypes version of XFixesCursorImage structure.
PIXEL_DATA_PTR = ctypes.POINTER(ctypes.c_ulong)
Atom = ctypes.c_ulong


class XFixesCursorImage(ctypes.Structure):
    """
    See /usr/include/X11/extensions/Xfixes.h

    typedef struct {
        short	    x, y;
        unsigned short  width, height;
        unsigned short  xhot, yhot;
        unsigned long   cursor_serial;
        unsigned long   *pixels;
    if XFIXES_MAJOR >= 2
        Atom	    atom;	/* Version >= 2 only */
        const char	*name;	/* Version >= 2 only */
    endif
    } XFixesCursorImage;
    """
    _fields_ = [('x', ctypes.c_short),
                ('y', ctypes.c_short),
                ('width', ctypes.c_ushort),
                ('height', ctypes.c_ushort),
                ('xhot', ctypes.c_ushort),
                ('yhot', ctypes.c_ushort),
                ('cursor_serial', ctypes.c_ulong),
                ('pixels', PIXEL_DATA_PTR),
                ('atom', Atom),
                ('name', ctypes.c_char_p)]


class Display(ctypes.Structure):
    pass


class Xcursor:
    display = None

    def __init__(self, display=None):
        if not display:
            try:
                display = os.environ["DISPLAY"].encode("utf-8")
            except KeyError:
                raise Exception("$DISPLAY not set.")

        # XFixeslib = ctypes.CDLL('libXfixes.so')
        XFixes = ctypes.util.find_library("Xfixes")
        if not XFixes:
            raise Exception("No XFixes library found.")
        self.XFixeslib = ctypes.cdll.LoadLibrary(XFixes)

        # xlib = ctypes.CDLL('libX11.so.6')
        x11 = ctypes.util.find_library("X11")
        if not x11:
            raise Exception("No X11 library found.")
        self.xlib = ctypes.cdll.LoadLibrary(x11)

        # Define ctypes' version of XFixesGetCursorImage function
        XFixesGetCursorImage = self.XFixeslib.XFixesGetCursorImage
        XFixesGetCursorImage.restype = ctypes.POINTER(XFixesCursorImage)
        XFixesGetCursorImage.argtypes = [ctypes.POINTER(Display)]
        self.XFixesGetCursorImage = XFixesGetCursorImage

        XOpenDisplay = self.xlib.XOpenDisplay
        XOpenDisplay.restype = ctypes.POINTER(Display)
        XOpenDisplay.argtypes = [ctypes.c_char_p]

        if not self.display:
            self.display = self.xlib.XOpenDisplay(display)  # (display) or (None)

    def argbdata_to_pixdata(self, data, len):
        if data == None or len < 1: return None

        # Create byte array
        b = array.array('b', b'\x00' * 4 * len)

        offset, i = 0, 0
        while i < len:
            argb = data[i] & 0xffffffff
            rgba = (argb << 8) | (argb >> 24)
            b1 = (rgba >> 24) & 0xff
            b2 = (rgba >> 16) & 0xff
            b3 = (rgba >> 8) & 0xff
            b4 = rgba & 0xff

            struct.pack_into("=BBBB", b, offset, b1, b2, b3, b4)
            offset = offset + 4
            i = i + 1

        return b

    def getCursorImageData(self):
        # Call the function. Read data of cursor/mouse-pointer.
        cursor_data = self.XFixesGetCursorImage(self.display)

        if not (cursor_data and cursor_data[0]):
            raise Exception("Cannot read XFixesGetCursorImage()")

        # Note: cursor_data is a pointer, take cursor_data[0]
        return cursor_data[0]

    def getCursorImageArray(self):
        data = self.getCursorImageData()
        # x, y = data.x, data.y
        height, width = data.height, data.width

        bytearr = self.argbdata_to_pixdata(data.pixels, height * width)

        imgarray = np.array(bytearr, dtype=np.uint8)
        imgarray = imgarray.reshape(height, width, 4)
        del bytearr

        return imgarray

    def getCursorImageArrayFast(self):
        data = self.getCursorImageData()
        # x, y = data.x, data.y
        height, width = data.height, data.width

        bytearr = ctypes.cast(data.pixels, ctypes.POINTER(ctypes.c_ulong * height * width))[0]
        imgarray = np.array(bytearray(bytearr))
        imgarray = imgarray.reshape(height, width, 8)[:, :, (0, 1, 2, 3)]
        del bytearr

        return imgarray

    def saveImage(self, imgarray, text):
        from PIL import Image
        img = Image.fromarray(imgarray)
        img.save(text)


if __name__ == "__main__":
    cursor = Xcursor()
    imgarray = cursor.getCursorImageArrayFast()
    cursor.saveImage(imgarray, 'cursor_image.png')


================================================
FILE: anytool/local_server/platform_adapters/windows_adapter.py
================================================
import os
import ctypes
import subprocess
from typing import Dict, Any, Optional, List
from anytool.utils.logging import Logger
from PIL import Image, ImageGrab

try:
    from pywinauto import Desktop
    import win32ui
    import win32gui
    import win32con
    import pygetwindow as gw
    WINDOWS_LIBS_AVAILABLE = True
except ImportError:
    WINDOWS_LIBS_AVAILABLE = False

logger = Logger.get_logger(__name__)


class WindowsAdapter:
    """Windows platform-specific functionality adapter"""
    
    def __init__(self):
        if not WINDOWS_LIBS_AVAILABLE:
            logger.warning("Windows libraries are not fully installed, some features may not be available")
        self.available = WINDOWS_LIBS_AVAILABLE
    
    def capture_screenshot_with_cursor(self, output_path: str) -> bool:
        """
        Capture screenshot using ImageGrab (including cursor)
        
        Args:
            output_path: Output file path
            
        Returns:
            Whether successful
        """
        try:
            # Use ImageGrab to capture screenshot
            img = ImageGrab.grab(bbox=None, include_layered_windows=True)
            
            # Try to add cursor
            try:
                if WINDOWS_LIBS_AVAILABLE:
                    cursor, hotspot = self._get_cursor()
                    if cursor:
                        # Get scaling ratio
                        ratio = ctypes.windll.shcore.GetScaleFactorForDevice(0) / 100
                        pos_win = win32gui.GetCursorPos()
                        pos = (
                            round(pos_win[0] * ratio - hotspot[0]),
                            round(pos_win[1] * ratio - hotspot[1])
                        )
                        img.paste(cursor, pos, cursor)
                        logger.info("Windows screenshot successfully (with cursor)")
                    else:
                        logger.info("Windows screenshot successfully (without cursor)")
            except Exception as e:
                logger.warning(f"Cannot add cursor to screenshot: {e}")
                logger.info("Windows screenshot successfully (without cursor)")
            
            img.save(output_path)
            return True
            
        except Exception as e:
            logger.error(f"Windows screenshot failed: {e}")
            return False
    
    def _get_cursor(self) -> tuple:
        """
        Get current cursor image and hotspot
        
        Returns:
            (cursor_image, (hotspot_x, hotspot_y))
        """
        try:
            hcursor = win32gui.GetCursorInfo()[1]
            hdc = win32ui.CreateDCFromHandle(win32gui.GetDC(0))
            hbmp = win32ui.CreateBitmap()
            hbmp.CreateCompatibleBitmap(hdc, 36, 36)
            hdc_compatible = hdc.CreateCompatibleDC()
            hdc_compatible.SelectObject(hbmp)
            hdc_compatible.DrawIcon((0, 0), hcursor)
            
            bmpinfo = hbmp.GetInfo()
            bmpstr = hbmp.GetBitmapBits(True)
            cursor = Image.frombuffer(
                'RGB',
                (bmpinfo['bmWidth'], bmpinfo['bmHeight']),
                bmpstr, 'raw', 'BGRX', 0, 1
            ).convert("RGBA")
            
            win32gui.DestroyIcon(hcursor)
            win32gui.DeleteObject(hbmp.GetHandle())
            hdc_compatible.DeleteDC()
            
            # Make black pixels transparent
            pixdata = cursor.load()
            width, height = cursor.size
            for y in range(height):
                for x in range(width):
                    if pixdata[x, y] == (0, 0, 0, 255):
                        pixdata[x, y] = (0, 0, 0, 0)
            
            hotspot = win32gui.GetIconInfo(hcursor)[1:3]
            
            return (cursor, hotspot)
            
        except Exception as e:
            logger.debug(f"Failed to get cursor image: {e}")
            return (None, (0, 0))
    
    def activate_window(self, window_name: str, strict: bool = False) -> Dict[str, Any]:
        """
        Activate window (Windows uses pygetwindow)
        
        Args:
            window_name: Window title
            strict: Whether to strictly match
            
        Returns:
            Result dictionary
        """
        if not WINDOWS_LIBS_AVAILABLE:
            return {'status': 'error', 'message': 'Windows libraries not available'}
        
        try:
            windows = gw.getWindowsWithTitle(window_name)
            
            if not windows:
                logger.warning(f"Window not found: {window_name}")
                return {'status': 'error', 'message': f'Window {window_name} not found'}
            
            window = None
            if strict:
                # Strict match
                for wnd in windows:
                    if wnd.title == window_name:
                        window = wnd
                        break
                if not window:
                    return {'status': 'error', 'message': f'Window {window_name} not found (strict mode)'}
            else:
                window = windows[0]
            
            window.activate()
            logger.info(f"Windows window activated successfully: {window_name}")
            return {'status': 'success', 'message': 'Window activated'}
            
        except Exception as e:
            logger.error(f"Windows window activation failed: {e}")
            return {'status': 'error', 'message': str(e)}
    
    def close_window(self, window_name: str, strict: bool = False) -> Dict[str, Any]:
        """
        Close window (Windows uses pygetwindow)
        
        Args:
            window_name: Window title
            strict: Whether to strictly match
            
        Returns:
            Result dictionary
        """
        if not WINDOWS_LIBS_AVAILABLE:
            return {'status': 'error', 'message': 'Windows libraries not available'}
        
        try:
            windows = gw.getWindowsWithTitle(window_name)
            
            if not windows:
                logger.warning(f"Window not found: {window_name}")
                return {'status': 'error', 'message': f'Window {window_name} not found'}
            
            window = None
            if strict:
                for wnd in windows:
                    if wnd.title == window_name:
                        window = wnd
                        break
                if not window:
                    return {'status': 'error', 'message': f'Window {window_name} not found (strict mode)'}
            else:
                window = windows[0]
            
            window.close()
            logger.info(f"Windows window closed successfully: {window_name}")
            return {'status': 'success', 'message': 'Window closed'}
            
        except Exception as e:
            logger.error(f"Windows window close failed: {e}")
            return {'status': 'error', 'message': str(e)}
    
    def get_accessibility_tree(self, max_depth: int = 10, max_width: int = 50) -> Dict[str, Any]:
        """
        Get Windows accessibility tree (using pywinauto)
        
        Args:
            max_depth: Maximum depth
            max_width: Maximum number of child elements per level
            
        Returns:
            Accessibility tree data
        """
        if not WINDOWS_LIBS_AVAILABLE:
            return {'error': 'Windows accessibility libraries not available'}
        
        try:
            # Get desktop
            desktop = Desktop(backend="uia")
            
            # Serialize accessibility tree
            tree = self._serialize_uia_element(
                desktop, 
                depth=0, 
                max_depth=max_depth,
                max_width=max_width,
                visited=set()
            )
            
            return {
                'tree': tree,
                'platform': 'Windows'
            }
            
        except Exception as e:
            logger.error(f"Windows get accessibility tree failed: {e}")
            return {'error': str(e)}
    
    def _serialize_uia_element(
        self, 
        element, 
        depth: int = 0, 
        max_depth: int = 10,
        max_width: int = 50,
        visited: set = None
    ) -> Optional[Dict[str, Any]]:
        """
        Serialize Windows UIA element to dictionary
        
        Args:
            element: UIA element
            depth: Current depth
            max_depth: Maximum depth
            max_width: Maximum width
            visited: Set of visited elements
            
        Returns:
            Serialized dictionary
        """
        if visited is None:
            visited = set()
        
        if depth > max_depth or element in visited:
            return None
        
        visited.add(element)
        
        try:
            result = {
                'depth': depth
            }
            
            # Get basic attributes
            try:
                result['class_name'] = element.class_name()
            except:
                result['class_name'] = 'unknown'
            
            try:
                result['name'] = element.window_text()
            except:
                result['name'] = ''
            
            # Get states
            states = {}
            state_methods = [
                'is_enabled', 'is_visible', 'is_minimized', 'is_maximized',
                'is_focused', 'is_checked', 'is_selected'
            ]
            
            for method_name in state_methods:
                if hasattr(element, method_name):
                    try:
                        method = getattr(element, method_name)
                        states[method_name] = method()
                    except:
                        pass
            
            if states:
                result['states'] = states
            
            # Get position and size
            try:
                rectangle = element.rectangle()
                result['position'] = {
                    'left': rectangle.left,
                    'top': rectangle.top
                }
                result['size'] = {
                    'width': rectangle.width(),
                    'height': rectangle.height()
                }
            except:
                pass
            
            # Recursively get child elements
            result['children'] = []
            try:
                children = element.children()
                for i, child in enumerate(children[:max_width]):
                    try:
                        child_data = self._serialize_uia_element(
                            child, 
                            depth + 1, 
                            max_depth,
                            max_width,
                            visited
                        )
                        if child_data:
                            result['children'].append(child_data)
                    except Exception as e:
                        logger.debug(f"Cannot serialize child element {i}: {e}")
                        continue
            except Exception as e:
                logger.debug(f"Cannot get child elements: {e}")
            
            return result
            
        except Exception as e:
            logger.debug(f"Failed to serialize element (depth={depth}): {e}")
            return None
    
    def list_windows(self) -> List[Dict[str, Any]]:
        """
        List all windows
        
        Returns:
            Window list
        """
        if not WINDOWS_LIBS_AVAILABLE:
            return []
        
        try:
            windows = gw.getAllWindows()
            
            return [
                {
                    'title': win.title,
                    'left': win.left,
                    'top': win.top,
                    'width': win.width,
                    'height': win.height,
                    'visible': win.visible,
                    'active': win.isActive
                }
                for win in windows
                if win.title  # Only return windows with titles
            ]
            
        except Exception as e:
            logger.error(f"List windows failed: {e}")
            return []
    
    def set_wallpaper(self, image_path: str) -> Dict[str, Any]:
        """
        Set desktop wallpaper
        
        Args:
            image_path: Image path
            
        Returns:
            Result dictionary
        """
        try:
            image_path = os.path.expanduser(image_path)
            image_path = os.path.abspath(image_path)
            
            if not os.path.exists(image_path):
                return {'status': 'error', 'message': f'Image not found: {image_path}'}
            
            # Use Windows API to set wallpaper
            SPI_SETDESKWALLPAPER = 20
            ctypes.windll.user32.SystemParametersInfoW(
                SPI_SETDESKWALLPAPER,
                0,
                image_path,
                3  # SPIF_UPDATEINIFILE | SPIF_SENDCHANGE
            )
            
            logger.info(f"Windows wallpaper set successfully: {image_path}")
            return {'status': 'success', 'message': 'Wallpaper set successfully'}
            
        except Exception as e:
            logger.error(f"Windows set wallpaper failed: {e}")
            return {'status': 'error', 'message': str(e)}
    
    def get_system_info(self) -> Dict[str, Any]:
        """
        Get Windows system information
        
        Returns:
            System information dictionary
        """
        try:
            import platform as plat
            
            return {
                'platform': 'Windows',
                'version': plat.version(),
                'release': plat.release(),
                'edition': plat.win32_edition() if hasattr(plat, 'win32_edition') else 'Unknown',
                'available': self.available
            }
            
        except Exception as e:
            logger.error(f"Failed to get system information: {e}")
            return {
                'platform': 'Windows',
                'error': str(e)
            }
    
    def start_recording(self, output_path: str) -> Dict[str, Any]:
        try:
            try:
                result = subprocess.run(['ffmpeg', '-version'], 
                                      capture_output=True, 
                                      check=True,
                                      timeout=5,
                                      creationflags=subprocess.CREATE_NO_WINDOW)
            except (subprocess.CalledProcessError, FileNotFoundError):
                return {
                    'status': 'error',
                    'message': 'ffmpeg not installed. Download from: https://ffmpeg.org/download.html'
                }
            try:
                user32 = ctypes.windll.user32
                width = user32.GetSystemMetrics(0)  # SM_CXSCREEN
                height = user32.GetSystemMetrics(1)  # SM_CYSCREEN
            except:
                width, height = 1920, 1080
            
            command = [
                'ffmpeg',
                '-y',  
                '-f', 'gdigrab',  
                '-draw_mouse', '1',  
                '-framerate', '30',
                '-video_size', f'{width}x{height}',
                '-i', 'desktop',  
                '-c:v', 'libx264',
                '-preset', 'ultrafast', 
                '-pix_fmt', 'yuv420p', 
                '-r', '30', 
                output_path
            ]
            
            process = subprocess.Popen(
                command,
                stdout=subprocess.DEVNULL,
                stderr=subprocess.PIPE,
                text=True,
                creationflags=subprocess.CREATE_NO_WINDOW
            )
            
            import time
            time.sleep(1)
            
            if process.poll() is not None:
                error_output = process.stderr.read() if process.stderr else "Unknown error"
                return {
                    'status': 'error',
                    'message': f'Failed to start recording: {error_output}'
                }
            
            logger.info(f"Windows recording started: {output_path}")
            return {
                'status': 'success',
                'message': 'Recording started',
                'process': process
            }
            
        except Exception as e:
            logger.error(f"Windows start recording failed: {e}")
            return {
                'status': 'error',
                'message': str(e)
            }
    
    def stop_recording(self, process) -> Dict[str, Any]:
        try:
            if not process or process.poll() is not None:
                return {
                    'status': 'error',
                    'message': 'No recording in progress'
                }
            
            import signal
            try:
                process.send_signal(signal.CTRL_C_EVENT)
            except:
                process.terminate()
                
            try:
                process.wait(timeout=15)
            except subprocess.TimeoutExpired:
                logger.warning("ffmpeg did not respond, killing process")
                process.kill()
                process.wait()
            
            logger.info("Windows recording stopped successfully")
            return {
                'status': 'success',
                'message': 'Recording stopped'
            }
            
        except Exception as e:
            logger.error(f"Windows stop recording failed: {e}")
            return {
                'status': 'error',
                'message': str(e)
            }
    
    def get_running_applications(self) -> List[Dict[str, str]]:
        """
        Get list of all running applications
        
        Returns:
            Application list
        """
        if not WINDOWS_LIBS_AVAILABLE:
            return []
        
        try:
            import psutil
            
            apps = []
            seen_names = set()
            
            for proc in psutil.process_iter(['pid', 'name', 'exe']):
                try:
                    pinfo = proc.info
                    name = pinfo['name']
                    exe = pinfo['exe']
                    
                    # Skip system processes
                    if not exe or name in ['System', 'Registry', 'svchost.exe', 'csrss.exe']:
                        continue
                    
                    # Skip duplicates
                    if name in seen_names:
                        continue
                    
                    seen_names.add(name)
                    
                    apps.append({
                        'name': name,
                        'pid': pinfo['pid'],
                        'path': exe or ''
                    })
                    
                except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
                    pass
            
            return apps
            
        except ImportError:
            logger.warning("psutil not installed, cannot get running applications")
            return []
        except Exception as e:
            logger.error(f"Failed to get running applications list: {e}")
            return []
    
    def get_screen_size(self) -> Dict[str, int]:
        """
        Get screen size
        
        Returns:
            Screen size dictionary
        """
        try:
            user32 = ctypes.windll.user32
            width = user32.GetSystemMetrics(0)  # SM_CXSCREEN
            height = user32.GetSystemMetrics(1)  # SM_CYSCREEN
            return {'width': width, 'height': height}
        except Exception as e:
            logger.error(f"Failed to get screen size: {e}")
            return {'width': 1920, 'height': 1080}  # Default value
    
    def get_terminal_output(self) -> Optional[str]:
        """
        Get terminal output (Windows Command Prompt, PowerShell, or Windows Terminal)
        
        Note: Due to Windows architecture, getting terminal output is complex.
        This method attempts to find active console windows.
        
        Returns:
            Terminal output content (limited functionality on Windows)
        """
        try:
            # Windows doesn't provide easy access to terminal content like Linux/macOS
            # This is a limitation of the Windows platform
            # We can try to use PowerShell to get recent command history
            
            # Try to get PowerShell history
            try:
                history_path = os.path.expanduser(
                    '~\\AppData\\Roaming\\Microsoft\\Windows\\PowerShell\\PSReadLine\\ConsoleHost_history.txt'
                )
                if os.path.exists(history_path):
                    with open(history_path, 'r', encoding='utf-8', errors='ignore') as f:
                        # Get last 50 lines
                        lines = f.readlines()
                        recent_history = ''.join(lines[-50:])
                        if recent_history:
                            return f"PowerShell History (last 50 commands):\n{recent_history}"
            except Exception as e:
                logger.debug(f"Cannot read PowerShell history: {e}")
            
            # Try to get Command Prompt history using doskey
            try:
                result = subprocess.run(
                    ['doskey', '/history'],
                    capture_output=True,
                    text=True,
                    timeout=2,
                    creationflags=subprocess.CREATE_NO_WINDOW
                )
                if result.returncode == 0 and result.stdout:
                    return f"Command Prompt History:\n{result.stdout}"
            except Exception as e:
                logger.debug(f"Cannot get Command Prompt history: {e}")
            
            logger.warning("Windows terminal output is limited - only command history available")
            return None
            
        except Exception as e:
            logger.error(f"Failed to get terminal output: {e}")
            return None


================================================
FILE: anytool/local_server/requirements.txt
================================================
# Local server dependencies (cross-platform)
flask>=3.1.0
pyautogui>=0.9.54
pydantic>=2.12.0
requests>=2.32.0

# # macOS-specific dependencies (local server)
# pyobjc-core>=12.0; sys_platform == 'darwin'
# pyobjc-framework-cocoa>=12.0; sys_platform == 'darwin'
# pyobjc-framework-quartz>=12.0; sys_platform == 'darwin'
# atomacos>=3.2.0; sys_platform == 'darwin'

# # Linux-specific dependencies (local server)
# python-xlib>=0.33; sys_platform == 'linux'
# pyatspi>=2.38.0; sys_platform == 'linux'
# numpy>=1.24.0; sys_platform == 'linux'

# # Windows-specific dependencies (local server)
# pywinauto>=0.6.8; sys_platform == 'win32'
# pywin32>=306; sys_platform == 'win32'
# PyGetWindow>=0.0.9; sys_platform == 'win32'

================================================
FILE: anytool/local_server/run.sh
================================================
#!/bin/bash
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
PROJECT_ROOT="$( cd "$SCRIPT_DIR/../.." && pwd )"

# Check Python
if ! command -v python3 &> /dev/null; then
    echo "Error: python3 not installed"
    exit 1
fi

# Check if dependencies are installed
if ! python3 -c "import flask" &> /dev/null; then
    echo "Installing dependencies..."
    pip3 install -q -r "$SCRIPT_DIR/requirements.txt" || {
        echo "Failed to install dependencies"
        exit 1
    }
fi

# Set PYTHONPATH and start server
export PYTHONPATH="$PROJECT_ROOT:$PYTHONPATH"
cd "$PROJECT_ROOT"
python3 -m anytool.local_server.main

================================================
FILE: anytool/local_server/utils/__init__.py
================================================
from .accessibility import AccessibilityHelper
from .screenshot import ScreenshotHelper

__all__ = ["AccessibilityHelper", "ScreenshotHelper"]

================================================
FILE: anytool/local_server/utils/accessibility.py
================================================
import platform
from anytool.utils.logging import Logger
from typing import Dict, Any, Optional

logger = Logger.get_logger(__name__)

platform_name = platform.system()


class AccessibilityHelper:
    def __init__(self):
        self.platform = platform_name
        self.adapter = None
        
        try:
            if platform_name == "Darwin":
                from ..platform_adapters.macos_adapter import MacOSAdapter
                self.adapter = MacOSAdapter()
            elif platform_name == "Linux":
                from ..platform_adapters.linux_adapter import LinuxAdapter
                self.adapter = LinuxAdapter()
            elif platform_name == "Windows":
                from ..platform_adapters.windows_adapter import WindowsAdapter
                self.adapter = WindowsAdapter()
        except ImportError as e:
            logger.warning(f"Failed to import platform adapter: {e}")
    
    def get_tree(self, max_depth: int = 10) -> Dict[str, Any]:
        if not self.adapter:
            return {
                'error': f'No adapter available for {self.platform}',
                'platform': self.platform
            }
        
        try:
            return self.adapter.get_accessibility_tree(max_depth=max_depth)
        except Exception as e:
            logger.error(f"Failed to get accessibility tree: {e}")
            return {
                'error': str(e),
                'platform': self.platform
            }
    
    def is_available(self) -> bool:
        return self.adapter is not None and hasattr(self.adapter, 'available') and self.adapter.available
    
    def find_element_by_name(self, tree: Dict[str, Any], name: str) -> Optional[Dict[str, Any]]:
        if not tree or 'tree' not in tree:
            return None
        
        return self._search_tree(tree['tree'], 'name', name)
    
    def find_element_by_role(self, tree: Dict[str, Any], role: str) -> Optional[Dict[str, Any]]:
        if not tree or 'tree' not in tree:
            return None
        
        return self._search_tree(tree['tree'], 'role', role)
    
    def _search_tree(self, node: Dict[str, Any], key: str, value: str) -> Optional[Dict[str, Any]]:
        if not node:
            return None
        
        # Check current node
        if key in node and node[key] == value:
            return node
        
        # Recursively search child nodes
        if 'children' in node:
            for child in node['children']:
                result = self._search_tree(child, key, value)
                if result:
                    return result
        
        return None
    
    def flatten_tree(self, tree: Dict[str, Any]) -> list:
        if not tree or 'tree' not in tree:
            return []
        
        result = []
        self._flatten_node(tree['tree'], result)
        return result
    
    def _flatten_node(self, node: Dict[str, Any], result: list):
        """Recursively flatten nodes"""
        if not node:
            return
        
        # Add current node (remove children)
        node_copy = {k: v for k, v in node.items() if k != 'children'}
        result.append(node_copy)
        
        # Recursively process child nodes
        if 'children' in node:
            for child in node['children']:
                self._flatten_node(child, result)
    
    def get_visible_elements(self, tree: Dict[str, Any]) -> list:
        all_elements = self.flatten_tree(tree)
        
        visible = []
        for element in all_elements:
            if self.platform == "Linux":
                if 'states' in element and 'showing' in element.get('states', []):
                    visible.append(element)
            elif self.platform == "Darwin":
                if element.get('enabled', False):
                    visible.append(element)
            elif self.platform == "Windows":
                if element.get('states', {}).get('is_visible', False):
                    visible.append(element)
        
        return visible
    
    def get_clickable_elements(self, tree: Dict[str, Any]) -> list:
        all_elements = self.flatten_tree(tree)
        
        clickable_roles = [
            'button', 'push-button', 'toggle-button', 'radio-button',
            'link', 'menu-item', 'AXButton', 'AXLink', 'AXMenuItem'
        ]
        
        clickable = []
        for element in all_elements:
            role = element.get('role', '').lower()
            if any(cr in role for cr in clickable_roles):
                clickable.append(element)
        
        return clickable
    
    def get_statistics(self, tree: Dict[str, Any]) -> Dict[str, Any]:
        all_elements = self.flatten_tree(tree)
        
        # Count roles
        roles = {}
        for element in all_elements:
            role = element.get('role', 'unknown')
            roles[role] = roles.get(role, 0) + 1
        
        return {
            'total_elements': len(all_elements),
            'visible_elements': len(self.get_visible_elements(tree)),
            'clickable_elements': len(self.get_clickable_elements(tree)),
            'roles': roles,
            'platform': self.platform
        }


================================================
FILE: anytool/local_server/utils/screenshot.py
================================================
import platform
import os
import logging
from typing import Optional, Tuple
from PIL import Image
import pyautogui

logger = logging.getLogger(__name__)

platform_name = platform.system()


class ScreenshotHelper:
    def __init__(self):
        self.platform = platform_name
        self.adapter = None
        
        try:
            if platform_name == "Darwin":
                from ..platform_adapters.macos_adapter import MacOSAdapter
                self.adapter = MacOSAdapter()
            elif platform_name == "Linux":
                from ..platform_adapters.linux_adapter import LinuxAdapter
                self.adapter = LinuxAdapter()
            elif platform_name == "Windows":
                from ..platform_adapters.windows_adapter import WindowsAdapter
                self.adapter = WindowsAdapter()
        except ImportError as e:
            logger.warning(f"Failed to import platform adapter: {e}")
    
    def capture(self, output_path: str, with_cursor: bool = True) -> bool:
        try:
            # Ensure directory exists
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            
            if with_cursor and self.adapter:
                # Use platform-specific method to capture screenshot (with cursor)
                return self.adapter.capture_screenshot_with_cursor(output_path)
            else:
                # Use pyautogui to capture screenshot (without cursor)
                screenshot = pyautogui.screenshot()
                screenshot.save(output_path)
                logger.info(f"Screenshot successfully (without cursor): {output_path}")
                return True
                
        except Exception as e:
            logger.error(f"Screenshot failed: {e}")
            return False
    
    def capture_region(
        self, 
        output_path: str, 
        x: int, 
        y: int, 
        width: int, 
        height: int
    ) -> bool:
        """
        Capture specified screen region
        
        Args:
            output_path: Output path
            x: Starting x coordinate
            y: Starting y coordinate
            width: Width
            height: Height
            
        Returns:
            Whether successful
        """
        try:
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            
            screenshot = pyautogui.screenshot(region=(x, y, width, height))
            screenshot.save(output_path)
            logger.info(f"Region screenshot successfully: {output_path}")
            return True
            
        except Exception as e:
            logger.error(f"Region screenshot failed: {e}")
            return False
    
    def get_screen_size(self) -> Tuple[int, int]:
        """
        Get screen size
        
        Returns:
            (width, height)
        """
        try:
            size = pyautogui.size()
            return (size.width, size.height)
        except Exception as e:
            logger.error(f"Failed to get screen size: {e}")
            return (1920, 1080)  # Default value
    
    def get_cursor_position(self) -> Tuple[int, int]:
        """
        Get cursor position
        
        Returns:
            (x, y)
        """
        try:
            pos = pyautogui.position()
            return (pos.x, pos.y)
        except Exception as e:
            logger.error(f"Failed to get cursor position: {e}")
            return (0, 0)
    
    def capture_to_base64(self, with_cursor: bool = True) -> Optional[str]:
        """
        Capture screenshot and convert to base64
        
        Args:
            with_cursor: Whether to include cursor
            
        Returns:
            Base64 encoded image string
        """
        import tempfile
        import base64
        
        try:
            # Create temporary file
            with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
                tmp_path = tmp.name
            
            # Capture screenshot
            if self.capture(tmp_path, with_cursor):
                # Read and encode
                with open(tmp_path, 'rb') as f:
                    img_data = f.read()
                    img_base64 = base64.b64encode(img_data).decode('utf-8')
                
                # Delete temporary file
                os.remove(tmp_path)
                
                return img_base64
            else:
                if os.path.exists(tmp_path):
                    os.remove(tmp_path)
                return None
                
        except Exception as e:
            logger.error(f"Failed to convert screenshot to base64: {e}")
            return None
    
    def compare_screenshots(self, path1: str, path2: str) -> float:
        """
        Compare similarity between two screenshots
        
        Args:
            path1: First image path
            path2: Second image path
            
        Returns:
            Similarity (0-1), 1 means identical
        """
        try:
            from PIL import ImageChops
            import math
            import operator
            from functools import reduce
            
            img1 = Image.open(path1)
            img2 = Image.open(path2)
            
            # Ensure same size
            if img1.size != img2.size:
                # Resize to same size
                img2 = img2.resize(img1.size)
            
            # Calculate difference
            diff = ImageChops.difference(img1, img2)
            
            # Calculate statistics
            stat = diff.histogram()
            sum_of_squares = reduce(
                operator.add,
                map(lambda h, i: h * (i ** 2), stat, range(len(stat)))
            )
            
            # Calculate RMS
            rms = math.sqrt(sum_of_squares / float(img1.size[0] * img1.size[1]))
            
            # Normalize to 0-1, RMS max value is approximately 441 (for RGB)
            similarity = 1 - (rms / 441.0)
            
            return max(0, min(1, similarity))
            
        except Exception as e:
            logger.error(f"Failed to compare screenshots: {e}")
            return 0.0
    
    def annotate_screenshot(
        self, 
        input_path: str, 
        output_path: str, 
        annotations: list
    ) -> bool:
        """
        Add annotations to screenshot
        
        Args:
            input_path: Input image path
            output_path: Output image path
            annotations: List of annotations, each annotation is a dict:
                        {'type': 'rectangle'/'text', 'x': int, 'y': int, 
                         'width': int, 'height': int, 'text': str, 'color': tuple}
            
        Returns:
            Whether successful
        """
        try:
            from PIL import ImageDraw, ImageFont
            
            img = Image.open(input_path)
            draw = ImageDraw.Draw(img)
            
            for annotation in annotations:
                ann_type = annotation.get('type', 'rectangle')
                color = annotation.get('color', (255, 0, 0))
                
                if ann_type == 'rectangle':
                    x = annotation.get('x', 0)
                    y = annotation.get('y', 0)
                    width = annotation.get('width', 100)
                    height = annotation.get('height', 100)
                    
                    draw.rectangle(
                        [(x, y), (x + width, y + height)],
                        outline=color,
                        width=2
                    )
                    
                elif ann_type == 'text':
                    x = annotation.get('x', 0)
                    y = annotation.get('y', 0)
                    text = annotation.get('text', '')
                    
                    try:
                        font = ImageFont.truetype("Arial.ttf", 20)
                    except:
                        font = ImageFont.load_default()
                    
                    draw.text((x, y), text, fill=color, font=font)
            
            img.save(output_path)
            logger.info(f"Annotated screenshot successfully: {output_path}")
            return True
            
        except Exception as e:
            logger.error(f"Failed to annotate screenshot: {e}")
            return False

================================================
FILE: anytool/platform/__init__.py
================================================
from .system_info import SystemInfoClient, get_system_info, get_screen_size
from .recording import RecordingClient, RecordingContextManager
from .screenshot import ScreenshotClient, AutoScreenshotWrapper
from .config import get_local_server_config, get_client_base_url

__all__ = [
    # System Info
    "SystemInfoClient",
    "get_system_info",
    "get_screen_size",
            
    # Recording
    "RecordingClient",
    "RecordingContextManager",
    
    # Screenshot
    "ScreenshotClient",
    "AutoScreenshotWrapper",
    
    # Config
    "get_local_server_config",
    "get_client_base_url",
]

================================================
FILE: anytool/platform/config.py
================================================
import os
import json
from typing import Dict, Any
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)

def get_local_server_config() -> Dict[str, Any]:
    """
    Read local server configuration.
    
    Priority:
    1. Environment variable LOCAL_SERVER_URL (parsed into host/port)
    2. Config file local_server/config.json
    3. Defaults (127.0.0.1:5000)
    
    Returns:
        Dict with 'host' and 'port' from server config
    """
    # Check environment variable first (for OSWorld/remote VM integration)
    env_url = os.getenv("LOCAL_SERVER_URL")
    if env_url:
        try:
            # Parse URL like "http://localhost:5000"
            from urllib.parse import urlparse
            parsed = urlparse(env_url)
            host = parsed.hostname or '127.0.0.1'
            port = parsed.port or 5000
            logger.debug(f"Using LOCAL_SERVER_URL: {host}:{port}")
            return {
                'host': host,
                'port': port,
                'debug': False,
            }
        except Exception as e:
            logger.warning(f"Failed to parse LOCAL_SERVER_URL: {e}")
    
    # Find local_server config file
    try:
        # Try relative path from this file
        current_dir = os.path.dirname(__file__)
        config_path = os.path.join(current_dir, '../local_server/config.json')
        config_path = os.path.abspath(config_path)
        
        if os.path.exists(config_path):
            with open(config_path, 'r') as f:
                config = json.load(f)
                server_config = config.get('server', {})
                return {
                    'host': server_config.get('host', '127.0.0.1'),
                    'port': server_config.get('port', 5000),
                    'debug': server_config.get('debug', False),
                }
    except Exception as e:
        logger.debug(f"Failed to read local server config: {e}")
    
    # Return defaults
    return {
        'host': '127.0.0.1',
        'port': 5000,
        'debug': False,
    }


def get_client_base_url() -> str:
    """
    Get base URL for connecting to local server.
    
    Priority:
    1. Environment variable LOCAL_SERVER_URL
    2. Read from local_server/config.json
    3. Default http://localhost:5000
    
    Returns:
        Base URL string
    """
    # Check environment variable first
    env_url = os.getenv("LOCAL_SERVER_URL")
    if env_url:
        return env_url
    
    # Read from config file
    config = get_local_server_config()
    host = config['host']
    port = config['port']
    
    # Convert 0.0.0.0 to localhost for client
    if host == '0.0.0.0':
        host = 'localhost'
    
    return f"http://{host}:{port}"

================================================
FILE: anytool/platform/recording.py
================================================
import aiohttp
from typing import Optional
from anytool.utils.logging import Logger
from .config import get_client_base_url

logger = Logger.get_logger(__name__)


class RecordingClient:
    """
    Client for screen recording via HTTP API.
    
    This client directly calls the local server's recording endpoints:
    - POST /start_recording
    - POST /end_recording
    """
    
    def __init__(
        self,
        base_url: Optional[str] = None,
        timeout: int = 30
    ):
        """
        Initialize recording client.
        
        Args:
            base_url: Base URL of the local server
                     (default: read from local_server/config.json or env LOCAL_SERVER_URL)
            timeout: Request timeout in seconds
        """
        # Get base_url: priority is explicit > env > config file
        if base_url is None:
            base_url = get_client_base_url()
        
        self.base_url = base_url.rstrip("/")
        self.timeout = timeout
        self._session: Optional[aiohttp.ClientSession] = None
    
    async def _get_session(self) -> aiohttp.ClientSession:
        """Get or create aiohttp session."""
        if self._session is None or self._session.closed:
            self._session = aiohttp.ClientSession(
                timeout=aiohttp.ClientTimeout(total=self.timeout)
            )
        return self._session
    
    async def start_recording(self, auto_cleanup: bool = True) -> bool:
        """
        Start screen recording.
        
        Args:
            auto_cleanup: If True, automatically end previous recording if one is in progress
        """
        try:
            session = await self._get_session()
            url = f"{self.base_url}/start_recording"
            
            async with session.post(url) as response:
                if response.status == 200:
                    logger.info("Screen recording started")
                    return True
                elif response.status == 400 and auto_cleanup:
                    # Check if error is due to recording already in progress
                    error_text = await response.text()
                    if "already in progress" in error_text.lower():
                        logger.warning("Recording already in progress, stopping previous recording...")
                        
                        # Try to end the previous recording
                        video_bytes = await self.end_recording()
                        if video_bytes:
                            logger.info("Previous recording ended successfully, retrying start...")
                        else:
                            logger.warning("Failed to end previous recording, but will retry start anyway...")
                        
                        # Retry starting recording (without auto_cleanup to avoid infinite loop)
                        return await self.start_recording(auto_cleanup=False)
                    else:
                        logger.error(f"Failed to start recording: HTTP {response.status} - {error_text}")
                        return False
                else:
                    error_text = await response.text()
                    logger.error(f"Failed to start recording: HTTP {response.status} - {error_text}")
                    return False
        
        except Exception as e:
            logger.error(f"Failed to start recording: {e}")
            return False
    
    async def end_recording(self, dest: Optional[str] = None) -> Optional[bytes]:
        """
        End screen recording and optionally save to file.
        """
        try:
            session = await self._get_session()
            url = f"{self.base_url}/end_recording"
            
            # Use longer timeout for end_recording (file may be large)
            async with session.post(url, timeout=aiohttp.ClientTimeout(total=60)) as response:
                if response.status == 200:
                    video_bytes = await response.read()
                    
                    # Save to file if destination provided
                    if dest:
                        try:
                            with open(dest, "wb") as f:
                                f.write(video_bytes)
                            logger.info(f"Recording saved to: {dest}")
                        except Exception as e:
                            logger.error(f"Failed to save recording file: {e}")
                            return None
                    
                    logger.info("Screen recording ended")
                    return video_bytes
                else:
                    error_text = await response.text()
                    logger.error(f"Failed to end recording: HTTP {response.status} - {error_text}")
                    return None
        
        except Exception as e:
            logger.error(f"Failed to end recording: {e}")
            return None
    
    async def close(self):
        """Close the HTTP session."""
        if self._session and not self._session.closed:
            await self._session.close()
            # Give aiohttp time to finish cleanup callbacks
            import asyncio
            await asyncio.sleep(0.25)
            logger.debug("Recording client session closed")
    
    async def __aenter__(self):
        """Context manager entry."""
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        await self.close()
        return False


class RecordingContextManager:

    def __init__(
        self,
        base_url: Optional[str] = None,
        output_path: Optional[str] = None,
        timeout: Optional[int] = None
    ):
        """
        Initialize recording context manager.
        
        Args:
            base_url: Base URL of the local server (default: from config)
            output_path: Path to save recording (default: from config)
            timeout: Request timeout in seconds (default: from config)
        """
        # Load output_path from config if not provided
        if output_path is None:
            try:
                from anytool.config import get_config
                config = get_config()
                if config.recording.screen_recording_path:
                    output_path = config.recording.screen_recording_path
            except Exception:
                pass
        
        self.client = RecordingClient(base_url=base_url, timeout=timeout)
        self.output_path = output_path
        self.recording_started = False
    
    async def __aenter__(self) -> RecordingClient:
        """Start recording on context entry."""
        success = await self.client.start_recording()
        if success:
            self.recording_started = True
            logger.info("Recording context started")
        else:
            logger.warning("Failed to start recording in context")
        
        return self.client
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Stop recording on context exit."""
        if self.recording_started:
            try:
                await self.client.end_recording(dest=self.output_path)
                logger.info("Recording context ended")
            except Exception as e:
                logger.error(f"Failed to end recording in context: {e}")
        
        await self.client.close()
        return False

================================================
FILE: anytool/platform/screenshot.py
================================================
"""
Screenshot client for capturing screens via HTTP API.

This module provides a screenshot client that captures screenshots by calling
the local_server's /screenshot endpoint.

Always uses HTTP API (like RecordingClient):
- Local: http://127.0.0.1:5000/screenshot
- Remote: http://remote-vm:5000/screenshot
"""
import aiohttp
from typing import Optional
from anytool.utils.logging import Logger
from .config import get_client_base_url

logger = Logger.get_logger(__name__)


class ScreenshotClient:
    
    def __init__(
        self, 
        base_url: Optional[str] = None,
        timeout: int = 10
    ):
        """
        Initialize screenshot client.
        
        Args:
            base_url: Base URL of local_server 
                     (default: read from config/env, typically http://127.0.0.1:5000)
            timeout: Request timeout (seconds)
        """
        # Get base_url from config if not provided
        if base_url is None:
            base_url = get_client_base_url()
        
        self.base_url = base_url.rstrip("/")
        self.timeout = timeout
        self._session = None
        
        logger.debug(f"ScreenshotClient initialized: {self.base_url}")
    
    async def _get_session(self) -> aiohttp.ClientSession:
        """Get or create aiohttp session."""
        if self._session is None or self._session.closed:
            self._session = aiohttp.ClientSession(
                timeout=aiohttp.ClientTimeout(total=self.timeout)
            )
        return self._session
    
    @staticmethod
    def _is_valid_image_response(content_type: str, data: Optional[bytes]) -> bool:
        """
        Validate image response using magic bytes.
        
        Args:
            content_type: HTTP Content-Type header
            data: Response data bytes
        
        Returns:
            True if data is valid PNG/JPEG image
        """
        if not isinstance(data, (bytes, bytearray)) or not data:
            return False
        
        # PNG magic bytes: \x89PNG\r\n\x1a\n
        if len(data) >= 8 and data[:8] == b"\x89PNG\r\n\x1a\n":
            return True
        
        # JPEG magic bytes: \xff\xd8\xff
        if len(data) >= 3 and data[:3] == b"\xff\xd8\xff":
            return True
        
        # Fallback to content-type check
        if content_type and ("image/png" in content_type or "image/jpeg" in content_type):
            return True
        
        return False
    
    async def capture(self) -> Optional[bytes]:
        """
        Capture screenshot via HTTP API.
        
        Calls: GET {base_url}/screenshot
        
        Returns:
            PNG image bytes, or None on failure
        """
        try:
            session = await self._get_session()
            url = f"{self.base_url}/screenshot"
            
            logger.debug(f"Requesting screenshot: {url}")
            
            async with session.get(url) as response:
                if response.status == 200:
                    content_type = response.headers.get("Content-Type", "")
                    screenshot_bytes = await response.read()
                    
                    # Validate image format
                    if self._is_valid_image_response(content_type, screenshot_bytes):
                        logger.debug(f"Screenshot captured: {len(screenshot_bytes)} bytes")
                        return screenshot_bytes
                    else:
                        logger.error("Invalid screenshot format received")
                        return None
                else:
                    error_text = await response.text()
                    logger.error(f"Failed to capture screenshot: HTTP {response.status} - {error_text}")
                    return None
        
        except Exception as e:
            logger.error(f"Failed to capture screenshot: {e}")
            return None
    
    async def capture_to_file(self, output_path: str) -> bool:
        try:
            screenshot = await self.capture()
            if screenshot:
                import os
                os.makedirs(os.path.dirname(output_path) or '.', exist_ok=True)
                with open(output_path, 'wb') as f:
                    f.write(screenshot)
                logger.info(f"Screenshot saved to: {output_path}")
                return True
            return False
        except Exception as e:
            logger.error(f"Failed to save screenshot to file: {e}")
            return False
    
    async def get_screen_size(self) -> tuple[int, int]:
        """
        Get screen size via HTTP API.
        
        Calls: GET {base_url}/screen_size
        
        Returns:
            (width, height)
        """
        try:
            session = await self._get_session()
            url = f"{self.base_url}/screen_size"
            
            async with session.get(url) as response:
                if response.status == 200:
                    data = await response.json()
                    width = data.get('width', 1920)
                    height = data.get('height', 1080)
                    logger.debug(f"Screen size: {width}x{height}")
                    return (width, height)
                else:
                    logger.warning("Failed to get screen size, using default")
                    return (1920, 1080)
        
        except Exception as e:
            logger.error(f"Failed to get screen size: {e}")
            return (1920, 1080)
    
    async def close(self):
        """Close HTTP session."""
        if self._session and not self._session.closed:
            await self._session.close()
            logger.debug("Screenshot client session closed")
    
    async def __aenter__(self):
        """Context manager entry."""
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        await self.close()
        return False


class AutoScreenshotWrapper:
    """
    Wrapper that automatically captures screenshots after backend calls.
    
    This wrapper can be used to wrap any backend tool/session and automatically
    capture screenshots after each operation.
    
    Usage:
        # Wrap a backend tool
        wrapped_tool = AutoScreenshotWrapper(
            tool=gui_tool,
            screenshot_client=screenshot_client,
            on_screenshot=lambda screenshot: recorder.record_step(...)
        )
        
        # Use wrapped tool normally
        result = await wrapped_tool.execute(...)
        # Screenshot is automatically captured and handled
    """
    
    def __init__(
        self,
        tool,
        screenshot_client: Optional[ScreenshotClient] = None,
        on_screenshot=None,
        enabled: bool = True
    ):
        """
        Initialize auto-screenshot wrapper.
        
        Args:
            tool: The tool/session to wrap
            screenshot_client: Screenshot client to use (created if None)
            on_screenshot: Callback function(screenshot_bytes) called after each screenshot
            enabled: Whether auto-screenshot is enabled
        """
        self._tool = tool
        self._screenshot_client = screenshot_client or ScreenshotClient()
        self._on_screenshot = on_screenshot
        self._enabled = enabled
    
    def __getattr__(self, name):
        """Delegate attribute access to wrapped tool."""
        return getattr(self._tool, name)
    
    async def _capture_and_notify(self):
        """Capture screenshot and notify callback."""
        if not self._enabled:
            return
        
        try:
            screenshot = await self._screenshot_client.capture()
            if screenshot and self._on_screenshot:
                await self._on_screenshot(screenshot)
        except Exception as e:
            logger.warning(f"Failed to auto-capture screenshot: {e}")
    
    async def execute(self, *args, **kwargs):
        """
        Execute tool and auto-capture screenshot.
        """
        # Execute original method
        result = await self._tool.execute(*args, **kwargs)
        
        # Capture screenshot after execution
        await self._capture_and_notify()
        
        return result
    
    async def _arun(self, *args, **kwargs):
        """
        Run tool and auto-capture screenshot.
        """
        # Execute original method
        result = await self._tool._arun(*args, **kwargs)
        
        # Capture screenshot after execution
        await self._capture_and_notify()
        
        return result
    
    def enable(self):
        """Enable auto-screenshot."""
        self._enabled = True
    
    def disable(self):
        """Disable auto-screenshot."""
        self._enabled = False

================================================
FILE: anytool/platform/system_info.py
================================================
import aiohttp
from typing import Optional, Dict, Any
from anytool.utils.logging import Logger
from .config import get_client_base_url

logger = Logger.get_logger(__name__)


class SystemInfoClient:
    """
    This client provides simple methods to get:
    - Platform info (OS, architecture, version, etc.)
    - Screen size
    - Cursor position  
    """
    
    def __init__(
        self,
        base_url: Optional[str] = None,
        timeout: int = 10
    ):
        """
        Initialize system info client.
        
        Args:
            base_url: Base URL of the local server
                     (default: read from local_server/config.json or env LOCAL_SERVER_URL)
            timeout: Request timeout in seconds
        """
        # Get base_url: priority is explicit > env > config file
        if base_url is None:
            base_url = get_client_base_url()
        
        self.base_url = base_url.rstrip("/")
        self.timeout = timeout
        self._session: Optional[aiohttp.ClientSession] = None
        self._cached_info: Optional[Dict[str, Any]] = None
    
    async def _get_session(self) -> aiohttp.ClientSession:
        """Get or create aiohttp session."""
        if self._session is None or self._session.closed:
            self._session = aiohttp.ClientSession(
                timeout=aiohttp.ClientTimeout(total=self.timeout)
            )
        return self._session
    
    async def get_system_info(self, use_cache: bool = True) -> Optional[Dict[str, Any]]:
        """
        Get comprehensive system information.
        
        Returns information including:
        - system: OS name (Linux, Darwin, Windows)
        - release: OS release version
        - version: Detailed version string
        - machine: Architecture (x86_64, arm64, etc.)
        - processor: Processor type
        - Additional platform-specific info
        
        Args:
            use_cache: Whether to use cached info (default: True)
        """
        # Check cache
        if use_cache and self._cached_info:
            logger.debug("Using cached system info")
            return self._cached_info
        
        try:
            session = await self._get_session()
            url = f"{self.base_url}/platform"
            
            async with session.get(url) as response:
                if response.status == 200:
                    info = await response.json()
                    
                    # Cache the result
                    if use_cache:
                        self._cached_info = info
                    
                    logger.debug(f"System info retrieved: {info.get('system')}")
                    return info
                else:
                    error_text = await response.text()
                    logger.error(f"Failed to get system info: HTTP {response.status} - {error_text}")
                    return None
        
        except Exception as e:
            logger.error(f"Failed to get system info: {e}")
            return None
    
    async def get_screen_size(self) -> Optional[Dict[str, int]]:
        """
        Get screen size.
        
        Returns:
            Dict with 'width' and 'height', or None on failure
        """
        try:
            session = await self._get_session()
            url = f"{self.base_url}/screen_size"
            
            async with session.get(url) as response:
                if response.status == 200:
                    size = await response.json()
                    logger.debug(f"Screen size: {size.get('width')}x{size.get('height')}")
                    return {
                        "width": size.get("width"),
                        "height": size.get("height")
                    }
                else:
                    error_text = await response.text()
                    logger.error(f"Failed to get screen size: HTTP {response.status} - {error_text}")
                    return None
        
        except Exception as e:
            logger.error(f"Failed to get screen size: {e}")
            return None
    
    async def get_cursor_position(self) -> Optional[Dict[str, int]]:
        """
        Get current cursor position.
        
        Returns:
            Dict with 'x' and 'y', or None on failure
        """
        try:
            session = await self._get_session()
            url = f"{self.base_url}/cursor_position"
            
            async with session.get(url) as response:
                if response.status == 200:
                    pos = await response.json()
                    return {
                        "x": pos.get("x"),
                        "y": pos.get("y")
                    }
                else:
                    error_text = await response.text()
                    logger.error(f"Failed to get cursor position: HTTP {response.status} - {error_text}")
                    return None
        
        except Exception as e:
            logger.error(f"Failed to get cursor position: {e}")
            return None
    
    def clear_cache(self):
        """Clear cached system information."""
        self._cached_info = None
        logger.debug("System info cache cleared")
    
    async def close(self):
        """Close the HTTP session."""
        if self._session and not self._session.closed:
            await self._session.close()
            logger.debug("System info client session closed")
    
    async def __aenter__(self):
        """Context manager entry."""
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        await self.close()
        return False

async def get_system_info(base_url: Optional[str] = None) -> Optional[Dict[str, Any]]:
    async with SystemInfoClient(base_url=base_url) as client:
        return await client.get_system_info(use_cache=False)


async def get_screen_size(base_url: Optional[str] = None) -> Optional[Dict[str, int]]:
    async with SystemInfoClient(base_url=base_url) as client:
        return await client.get_screen_size()

================================================
FILE: anytool/prompts/__init__.py
================================================
from anytool.prompts.grounding_agent_prompts import GroundingAgentPrompts

__all__ = ["GroundingAgentPrompts"]

================================================
FILE: anytool/prompts/grounding_agent_prompts.py
================================================
from typing import List


class GroundingAgentPrompts:
    
    TASK_COMPLETE = "<COMPLETE>"
    
    SYSTEM_PROMPT = f"""You are a Grounding Agent. Execute tasks using tools.

# Tool Execution

- Select appropriate tools from descriptions and schemas
- Provide correct parameters
- Call multiple tools if needed
- Tools execute immediately, results appear in next iteration
- If you need results to decide next action, wait for next iteration

# Tool Selection Tips

- **MCP tools** and **Shell tools** are typically faster and more accurate when applicable
- **GUI tools** offer finer-grained control and can handle tasks not covered by MCP/shell tools
- Choose based on the task requirements and tool availability; prefer MCP/shell when they fit well

# Visual Analysis Control

GUI tools auto-analyze screenshots to extract information.

To skip analysis when NOT needed, add parameter:
```json
{{"task_description": "...", "skip_visual_analysis": true}}
```

**Decision Rule:**
- Task goal is OPERATIONAL (open/navigate/click/show): Skip analysis
- Task goal requires KNOWLEDGE EXTRACTION (read/extract/save data): Keep analysis

**Examples:**
- "Open settings page": Operational only, skip analysis
- "Open settings and record all values": Needs knowledge, keep analysis
- "Navigate to GitHub homepage": Operational only, skip analysis
- "Search Python tutorials and save top 5 titles": Needs knowledge, keep analysis

**Key principle:** If you need to extract information FROM the screen for subsequent steps or user reporting, keep analysis (don't skip).
**Note:** Only GUI tools support this parameter. Other backend tools ignore it.

# Task Completion

After each iteration, evaluate if the task is complete:

**If task is COMPLETE:**
- Write a response summarizing what was accomplished
- Include the completion token `{TASK_COMPLETE}` on a new line at the end of your response
- Example response format:
  ```
  I have successfully completed the task. The file has been created at /path/to/file.txt with the requested content.
  
  {TASK_COMPLETE}
  ```

**If task is NOT complete:**
- Continue by calling the appropriate tools
- Do NOT output `{TASK_COMPLETE}`
- Tool results will appear in the next iteration

The token `{TASK_COMPLETE}` signals that no further iterations are needed."""
    
    @staticmethod
    def iteration_summary(
        instruction: str,
        iteration: int,
        max_iterations: int
    ) -> str:
        """
        Build iteration summary prompt for LLMClient auto-summary.
        LLM extracts information directly from tool results in conversation history.
        """
        return f"""Based on the original task and the tool execution results in the conversation above, generate a structured iteration summary.

**Original Task:**
{instruction}

**Progress:** Iteration {iteration} of {max_iterations}

**Generate Summary in This Format:**

## Iteration {iteration} Progress

Actions taken: <what tools were called and what they did>

Knowledge obtained (COMPLETE and SPECIFIC):
- File locations: <ALL file paths/names created/read/modified with exact locations, or "None">
- Visual content: <EXTRACT ALL visible information from screenshots - text, data, lists, tables, results, or "N/A">
- Data retrieved: <ALL key data/results from searches/queries with specific values, numbers, names, or "N/A">
- URLs/Links: <ALL important URLs, links, or identifiers found, or "N/A">
- System state: <important state changes, error messages, status indicators, or "N/A">

Errors encountered: <any errors or issues from tool execution, or "None">

CRITICAL GUIDELINES:
- This summary is for preserving knowledge for subsequent iterations
- Extract ALL concrete information from tool outputs in the conversation above
- Filenames, paths, URLs - use exact values from tool outputs
- Visual content - extract actual text/data visible, not just "saw something"
- Search results - include specific data, not vague descriptions
- The next iteration cannot see current tool outputs - this summary is the ONLY source of knowledge"""
    
    @staticmethod
    def visual_analysis(
        tool_name: str,
        num_screenshots: int,
        task_description: str = ""
    ) -> str:
        """
        Build prompt for visual analysis of screenshots.
        
        Args:
            tool_name: Tool name that generated the screenshots
            num_screenshots: Number of screenshots
            task_description: Original task description for context
        """
        screenshot_text = "screenshot" if num_screenshots == 1 else f"{num_screenshots} screenshots"
        these_text = "this screenshot" if num_screenshots == 1 else "these screenshots"
        
        task_context = f"""
**Original Task**: {task_description}

Focus on extracting information RELEVANT to this task. Prioritize content that helps accomplish the goal.
""" if task_description else ""
        
        return f"""Extract the KNOWLEDGE and INFORMATION from {these_text}. This will be passed to the next iteration so it can continue working with the information (search, analyze, save, etc.). Without this extraction, the visual content would only be viewable by humans and unusable for subsequent operations.
{task_context}
**EXTRACT all visible knowledge content** (prioritize task-relevant information):
1. **Text content**: Articles, documentation, code, messages, descriptions - extract the actual text
2. **Data points**: Numbers, statistics, measurements, values, percentages - be specific
3. **List items**: Names, titles, entries in lists/search results/files - list them out
4. **Structured data**: Information from tables, charts, forms - describe what they contain
5. **Key information**: URLs, paths, names, IDs, dates, labels - anything useful for next steps

**IGNORE interface elements**:
- Buttons, menus, toolbars, navigation bars
- UI design, layout, colors, styling
- Non-informational visual elements

**Goal**: Extract usable knowledge that enables the next agent to work with this information programmatically. Be SPECIFIC and COMPLETE, but FOCUS on what's relevant to the task.

{screenshot_text.capitalize()} from tool '{tool_name}'"""
    
    @staticmethod
    def final_summary(
        instruction: str,
        iterations: int
    ) -> str:
        """
        Build prompt for generating final summary across all iterations.
        """    
        return f"""Based on the complete conversation history above (including all {iterations} iteration summaries and tool executions), generate a comprehensive final summary.

## Final Task Summary

Task: {instruction}

What was accomplished: <comprehensive description of all completed actions across all iterations>

Key information obtained: <all important information discovered>
- Files: <files created/read/modified with paths, or "N/A">
- Data: <important data/results obtained, or "N/A">
- Findings: <key discoveries or insights, or "N/A">

Issues encountered: <any errors or issues, or "None">

Result: <"Success" or "Incomplete">

Guidelines:
- Consolidate information from ALL iteration summaries
- Include concrete deliverables (file paths, data, etc.)
- Be comprehensive but concise
- Focus on what the user cares about"""
    
    @staticmethod
    def workspace_directory(workspace_dir: str) -> str:
        """
        Build workspace directory information for cross-iteration/cross-backend data sharing.
        """
        # Check if this is a benchmark scenario (LiveMCPBench /root mapping)
        # In benchmark mode, paths in query are already converted by caller (e.g., map_path_to_local)
        is_benchmark = "/root" in workspace_dir or "LiveMCPBench/root" in workspace_dir
        
        if is_benchmark:
            # Benchmark mode: all task files are in workspace directory
            return f"""**Working Directory**: `{workspace_dir}`
- All task files (input/output) are located in this directory
- Read from and write to this directory for all file operations"""
        else:
            # Normal mode: workspace is for intermediate results
            return f"""**Working Directory**: `{workspace_dir}`
- Persist intermediate results here; later iterations/backends can read what you saved earlier
- Note: User's personal files are NOT here - search in ~/Desktop, ~/Documents, ~/Downloads, etc."""
    
    @staticmethod
    def workspace_matching_files(matching_files: List[str]) -> str:
        """
        Build alert for files matching task requirements.
        """
        files_str = ', '.join([f"`{f}`" for f in matching_files])
        return f"""**Workspace Alert**: Files matching task requirements found: {files_str}
- Read these files to verify if they satisfy the task
- If satisfied, mark task as completed
- If not satisfied, modify or recreate as needed"""
    
    @staticmethod
    def workspace_recent_files(total_files: int, recent_files: List[str]) -> str:
        """
        Build info for recently modified files.
        """
        recent_list = ', '.join([f"`{f}`" for f in recent_files[:15]])
        return f"""**Workspace Info**: {total_files} files exist, {len(recent_files)} recently modified
Recent files: {recent_list}
Consider checking recent files before creating new ones"""
    
    @staticmethod
    def workspace_file_list(files: List[str]) -> str:
        """
        Build list of all existing files.
        """
        files_list = ', '.join([f"`{f}`" for f in files[:15]])
        if len(files) > 15:
            files_list += f" (and {len(files) - 15} more)"
        return f"**Workspace Info**: {len(files)} existing file(s): {files_list}"
    
    @staticmethod
    def iteration_feedback(
        iteration: int,
        llm_summary: str,
        add_guidance: bool = True
    ) -> str:
        """
        Build feedback message to pass iteration summary to next iteration.
        """
        content = f"""## Iteration {iteration} Summary

{llm_summary}"""
        
        if add_guidance:
            content += f"""
---
Now continue with iteration {iteration + 1}. You can see the full conversation history above. Based on all progress so far, decide whether to:
- Call more tools if the task is not yet complete
- Output {GroundingAgentPrompts.TASK_COMPLETE} if the task is fully accomplished"""
        
        return content

================================================
FILE: anytool/recording/__init__.py
================================================
"""
    RecordingManager
      ├── internal management of platform.RecordingClient
      ├── internal management of platform.ScreenshotClient  
      ├── internal management of TrajectoryRecorder
      └── internal management of ActionRecorder
"""

# Auto-record the tool execution
from .manager import RecordingManager

# Low-level components (advanced users)
from .recorder import TrajectoryRecorder
from .action_recorder import ActionRecorder

# Utility functions
from .utils import (
    load_trajectory_from_jsonl,
    load_metadata,
    format_trajectory_for_export,
    analyze_trajectory,
    load_recording_session,
    filter_trajectory,
    extract_errors,
    generate_summary_report,
)

from .action_recorder import (
    load_agent_actions,
    analyze_agent_actions,
    format_agent_actions,
)

__all__ = [
    # Manager
    'RecordingManager',
    
    # Recorders
    'TrajectoryRecorder',
    'ActionRecorder',
    
    # Trajectory utils
    'load_trajectory_from_jsonl',
    'load_metadata',
    'format_trajectory_for_export',
    'analyze_trajectory',
    'load_recording_session',
    'filter_trajectory',
    'extract_errors',
    'generate_summary_report',
    
    # Agent action utils
    'load_agent_actions',
    'analyze_agent_actions',
    'format_agent_actions',
]

================================================
FILE: anytool/recording/action_recorder.py
================================================
"""
Agent Action Recorder

Records agent decision-making processes, reasoning, and outputs.
Focuses on high-level agent behaviors rather than low-level tool executions.
"""

import datetime
import json
from typing import Any, Dict, Optional
from pathlib import Path

from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class ActionRecorder:
    """
    Records agent actions and decision-making processes.
    
    This recorder captures the 'thinking' layer of the agent:
    - Task planning and decomposition
    - Tool selection reasoning
    - Evaluation decisions
    """
    
    def __init__(self, trajectory_dir: Path):
        """
        Initialize action recorder.
        
        Args:
            trajectory_dir: Directory to save action records
        """
        self.trajectory_dir = trajectory_dir
        self.actions_file = trajectory_dir / "agent_actions.jsonl"
        self.step_counter = 0
        
        # Ensure directory exists
        self.trajectory_dir.mkdir(parents=True, exist_ok=True)
    
    async def record_action(
        self,
        agent_name: str,
        action_type: str,
        input_data: Optional[Dict[str, Any]] = None,
        reasoning: Optional[Dict[str, Any]] = None,
        output_data: Optional[Dict[str, Any]] = None,
        metadata: Optional[Dict[str, Any]] = None,
        related_tool_steps: Optional[list] = None,
        correlation_id: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
        Record an agent action.
        
        Args:
            agent_name: Name of the agent performing the action
            action_type: Type of action (plan | execute | evaluate | monitor)
            input_data: Input data the agent received (simplified)
            reasoning: Agent's reasoning process (structured)
            output_data: Agent's output/decision (structured)
            metadata: Additional metadata (LLM model, tokens, duration, etc.)
            related_tool_steps: List of tool execution step numbers related to this action
            correlation_id: Optional correlation ID to link related events
        """
        self.step_counter += 1
        timestamp = datetime.datetime.now().isoformat()
        
        # Infer agent type from agent name
        agent_type = self._infer_agent_type(agent_name)
        
        action_info = {
            "step": self.step_counter,
            "timestamp": timestamp,
            "agent_name": agent_name,
            "agent_type": agent_type, 
            "action_type": action_type,
            "correlation_id": correlation_id or f"action_{self.step_counter}_{timestamp}", 
        }
        
        # Add input (with smart truncation)
        if input_data:
            action_info["input"] = self._truncate_data(input_data, max_length=1000)
        
        # Add reasoning (keep structured)
        if reasoning:
            action_info["reasoning"] = self._truncate_data(reasoning, max_length=2000)
        
        # Add output (keep structured)
        if output_data:
            action_info["output"] = self._truncate_data(output_data, max_length=1000)
        
        # Add metadata
        if metadata:
            action_info["metadata"] = metadata
        
        # Add related tool steps for correlation
        if related_tool_steps:
            action_info["related_tool_steps"] = related_tool_steps
        
        # Append to JSONL file
        await self._append_to_file(action_info)
        
        logger.debug(
            f"Recorded {action_type} action from {agent_name} (step {self.step_counter})"
        )
        
        return action_info
    
    def _infer_agent_type(self, agent_name: str) -> str:
        name_lower = agent_name.lower()
        
        if "host" in name_lower:
            return "host"
        elif "grounding" in name_lower:
            return "grounding"
        elif "eval" in name_lower:
            return "eval"
        elif "coordinator" in name_lower:
            return "coordinator"
        else:
            return "unknown"
    
    def _truncate_data(self, data: Any, max_length: int) -> Any:
        if isinstance(data, str):
            if len(data) > max_length:
                return data[:max_length] + "... [truncated]"
            return data
        
        elif isinstance(data, dict):
            result = {}
            for key, value in data.items():
                if isinstance(value, str) and len(value) > max_length:
                    result[key] = value[:max_length] + "... [truncated]"
                elif isinstance(value, (dict, list)):
                    # Recursively truncate nested structures
                    result[key] = self._truncate_data(value, max_length)
                else:
                    result[key] = value
            return result
        
        elif isinstance(data, list):
            # Truncate list items
            result = []
            for item in data:
                if isinstance(item, str) and len(item) > max_length:
                    result.append(item[:max_length] + "... [truncated]")
                elif isinstance(item, (dict, list)):
                    result.append(self._truncate_data(item, max_length))
                else:
                    result.append(item)
            return result
        
        else:
            return data
    
    async def _append_to_file(self, action_info: Dict[str, Any]):
        """Append action to JSONL file."""
        with open(self.actions_file, "a", encoding="utf-8") as f:
            f.write(json.dumps(action_info, ensure_ascii=False))
            f.write("\n")
    
    def get_step_count(self) -> int:
        """Get current step count."""
        return self.step_counter


def load_agent_actions(trajectory_dir: str) -> list:
    """
    Load agent actions from a trajectory directory.
    """
    actions_file = Path(trajectory_dir) / "agent_actions.jsonl"
    
    if not actions_file.exists():
        logger.warning(f"Agent actions file not found: {actions_file}")
        return []
    
    actions = []
    try:
        with open(actions_file, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line:
                    actions.append(json.loads(line))
        
        logger.info(f"Loaded {len(actions)} agent actions from {actions_file}")
        return actions
    
    except Exception as e:
        logger.error(f"Failed to load agent actions from {actions_file}: {e}")
        return []


def analyze_agent_actions(actions: list) -> Dict[str, Any]:
    """
    Analyze agent actions and generate statistics.
    """
    if not actions:
        return {
            "total_actions": 0,
            "by_agent": {},
            "by_type": {},
        }
    
    # Count by agent
    by_agent = {}
    by_type = {}
    
    for action in actions:
        agent_name = action.get("agent_name", "unknown")
        action_type = action.get("action_type", "unknown")
        
        by_agent[agent_name] = by_agent.get(agent_name, 0) + 1
        by_type[action_type] = by_type.get(action_type, 0) + 1
    
    return {
        "total_actions": len(actions),
        "by_agent": by_agent,
        "by_type": by_type,
    }


def format_agent_actions(actions: list, format_type: str = "compact") -> str:
    """
    Format agent actions for display.
    """
    if not actions:
        return "No agent actions recorded"
    
    if format_type == "compact":
        lines = []
        for action in actions:
            step = action.get("step", "?")
            agent = action.get("agent_name", "?")
            action_type = action.get("action_type", "?")
            
            # Try to extract key info from reasoning or output
            key_info = ""
            if action.get("reasoning"):
                thought = action["reasoning"].get("thought", "")
                if thought:
                    key_info = f": {thought[:60]}..."
            
            lines.append(f"Step {step}: [{agent}] {action_type}{key_info}")
        
        return "\n".join(lines)
    
    elif format_type == "detailed":
        lines = []
        for action in actions:
            lines.append(f"\n{'='*60}")
            lines.append(f"Step {action.get('step', '?')}: {action.get('agent_name', '?')}")
            lines.append(f"Type: {action.get('action_type', '?')}")
            lines.append(f"Time: {action.get('timestamp', '?')}")
            
            if action.get("reasoning"):
                lines.append("\nReasoning:")
                lines.append(json.dumps(action["reasoning"], indent=2, ensure_ascii=False))
            
            if action.get("output"):
                lines.append("\nOutput:")
                lines.append(json.dumps(action["output"], indent=2, ensure_ascii=False))
            
            if action.get("metadata"):
                lines.append("\nMetadata:")
                lines.append(json.dumps(action["metadata"], indent=2, ensure_ascii=False))
        
        return "\n".join(lines)
    
    else:
        raise ValueError(f"Unknown format type: {format_type}")

================================================
FILE: anytool/recording/manager.py
================================================
import datetime
import json
import ast
import types
from typing import Any, Dict, List, Optional
from pathlib import Path

from anytool.utils.logging import Logger
from .recorder import TrajectoryRecorder
from .action_recorder import ActionRecorder

logger = Logger.get_logger(__name__)


class RecordingManager:
    # Global instance management (singleton pattern)
    _global_instance: Optional['RecordingManager'] = None
    
    def __init__(
        self,
        enabled: bool = True,
        task_id: str = "",
        log_dir: str = "./logs/recordings",
        backends: Optional[List[str]] = None,
        enable_screenshot: bool = True,
        enable_video: bool = False,
        enable_conversation_log: bool = True,
        auto_save_interval: int = 10,
        server_url: Optional[str] = None,
        agent_name: str = "GroundingAgent",
    ):
        """
        Initialize automatic recording manager
        
        Args:
            enabled: whether to enable recording
            task_id: task ID (for naming recording directory)
            log_dir: log directory path
            backends: list of backends to record (None = all)
                    (optional: "mcp", "gui", "shell", "system", "web")
            enable_screenshot: whether to enable screenshot (through platform.ScreenshotClient)
            enable_video: whether to enable video recording (through platform.RecordingClient)
            enable_conversation_log: whether to save LLM conversations to conversations.jsonl (default: True)
            auto_save_interval: automatic save interval (steps)
            server_url: local server address (None = read from config/environment variables)
            agent_name: name of the agent performing the recording (default: "GroundingAgent")
        """
        self.enabled = enabled
        self.task_id = task_id
        self.log_dir = log_dir
        self.backends = set(backends) if backends else {"mcp", "gui", "shell", "system", "web"}
        self.enable_screenshot = enable_screenshot
        self.enable_video = enable_video
        self.enable_conversation_log = enable_conversation_log
        self.auto_save_interval = auto_save_interval
        self.server_url = server_url
        self.agent_name = agent_name
        
        # internal state
        self._recorder: Optional[TrajectoryRecorder] = None
        self._action_recorder: Optional[ActionRecorder] = None
        self._is_started = False
        self._step_counter = 0
        
        # registered LLM clients (for automatic recording)
        self._registered_llm_clients = []
        # Store original methods for restoration
        self._original_methods = {}
        
        # video/screenshot clients (internal management)
        self._recording_client = None
        self._screenshot_client = None
        
        # Register as global instance
        RecordingManager._global_instance = self

    @classmethod
    def is_recording(cls) -> bool:
        """
        Check if there is an active recording session
        
        Returns:
            bool: True if recording is active
        """
        return cls._global_instance is not None and cls._global_instance._is_started
    
    @classmethod
    async def record_retrieved_tools(
        cls,
        task_instruction: str,
        tools: List[Any],
        search_debug_info: Optional[Dict[str, Any]] = None,
    ):
        """
        Record the tools retrieved for a task
        
        Args:
            task_instruction: The task instruction used for retrieval
            tools: List of retrieved tools
            search_debug_info: Debug info from search (similarity scores, LLM selections)
        """
        instance = cls._global_instance
        if not instance or not instance._is_started or not instance._recorder:
            return
        
        # Extract tool info
        tool_info = []
        for tool in tools:
            info = {
                "name": getattr(tool, "name", str(tool)),
            }
            if hasattr(tool, "backend_type"):
                info["backend"] = tool.backend_type.value if hasattr(tool.backend_type, "value") else str(tool.backend_type)
            if hasattr(tool, "_runtime_info") and tool._runtime_info:
                info["server_name"] = tool._runtime_info.server_name
            tool_info.append(info)
        
        # Build metadata
        metadata = {
            "instruction": task_instruction[:500],  # Truncate long instructions
            "count": len(tools),
            "tools": tool_info,
        }
        
        # Add search debug info if available
        if search_debug_info:
            metadata["search_debug"] = {
                "search_mode": search_debug_info.get("search_mode", ""),
                "total_candidates": search_debug_info.get("total_candidates", 0),
                "mcp_count": search_debug_info.get("mcp_count", 0),
                "non_mcp_count": search_debug_info.get("non_mcp_count", 0),
                "llm_filter": search_debug_info.get("llm_filter", {}),
                "tool_scores": search_debug_info.get("tool_scores", []),
            }
        
        # Save to metadata
        await instance._recorder.add_metadata("retrieved_tools", metadata)
        
        logger.info(f"Recorded {len(tools)} retrieved tools (with search debug info: {search_debug_info is not None})")
    
    @classmethod
    async def record_iteration_context(
        cls,
        iteration: int,
        messages_input: List[Dict[str, Any]],
        messages_output: List[Dict[str, Any]],
        llm_response_summary: Dict[str, Any],
        max_content_length: int = 5000,
    ):
        """
        Record a single iteration's LLM conversation to conversations.jsonl (real-time).
        
        Args:
            iteration: Iteration number
            messages_input: Messages sent to LLM
            messages_output: Messages after LLM response  
            llm_response_summary: Summary of LLM response
            max_content_length: Max length for message content truncation
        """
        instance = cls._global_instance
        if not instance or not instance._is_started or not instance._recorder:
            return
        
        # Check if conversation recording is enabled
        if not getattr(instance, 'enable_conversation_log', True):
            return
        
        def truncate_message_content(messages: List[Dict]) -> List[Dict]:
            """Truncate message content to avoid huge log files."""
            result = []
            for msg in messages:
                new_msg = {"role": msg.get("role", "unknown")}
                content = msg.get("content", "")
                
                if isinstance(content, str):
                    if len(content) > max_content_length:
                        new_msg["content"] = content[:max_content_length] + f"... [truncated, total {len(content)} chars]"
                    else:
                        new_msg["content"] = content
                elif isinstance(content, list):
                    # Handle multi-part content (e.g., with images)
                    new_content = []
                    for item in content:
                        if isinstance(item, dict):
                            if item.get("type") == "image":
                                new_content.append({"type": "image", "note": "[image data omitted]"})
                            elif item.get("type") == "text":
                                text = item.get("text", "")
                                if len(text) > max_content_length:
                                    new_content.append({
                                        "type": "text",
                                        "text": text[:max_content_length] + f"... [truncated, total {len(text)} chars]"
                                    })
                                else:
                                    new_content.append(item)
                            else:
                                new_content.append(item)
                        else:
                            new_content.append(item)
                    new_msg["content"] = new_content
                else:
                    new_msg["content"] = str(content)[:max_content_length]
                
                if "tool_calls" in msg:
                    new_msg["tool_calls"] = msg["tool_calls"]
                
                result.append(new_msg)
            return result
        
        # Build record
        import datetime
        record = {
            "iteration": iteration,
            "timestamp": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
            "llm_response_summary": llm_response_summary,
            "messages_input": truncate_message_content(messages_input),
            "messages_output": truncate_message_content(messages_output),
        }
        
        # Append to conversations.jsonl (real-time)
        conv_file = instance._recorder.trajectory_dir / "conversations.jsonl"
        try:
            with open(conv_file, "a", encoding="utf-8") as f:
                f.write(json.dumps(record, ensure_ascii=False))
                f.write("\n")
        except Exception as e:
            logger.debug(f"Failed to write conversation log: {e}")
    
    @classmethod
    async def record_tool_execution(
        cls,
        tool_name: str,
        backend: str,
        parameters: Dict[str, Any],
        result: Any,
        server_name: Optional[str] = None,
        is_success: bool = True,
        metadata: Optional[Dict[str, Any]] = None,
    ):
        """
        Record tool execution (internal method, called by BaseTool automatically)
        
        Args:
            tool_name: Name of the tool
            backend: Backend type (gui, shell, mcp, etc.)
            parameters: Tool parameters
            result: Tool execution result (content or error message)
            server_name: Server name for MCP backend
            is_success: Whether the tool execution was successful (default: True for backward compatibility)
            metadata: Tool result metadata (e.g. intermediate_steps for GUI)
        """
        if not cls._global_instance or not cls._global_instance._is_started:
            return
        
        instance = cls._global_instance
        
        # Check if should record this backend
        if backend not in instance.backends:
            return
        
        # Create mock tool_call and result objects for compatibility with existing _record_* methods
        class MockFunctionCall:
            def __init__(self, name, arguments):
                self.name = name
                self.arguments = arguments
        
        class MockToolCall:
            def __init__(self, name, arguments):
                self.function = MockFunctionCall(name, arguments)
        
        class MockResult:
            def __init__(self, content, is_success=True, metadata=None):
                self.content = content
                self.is_success = is_success
                self.is_error = not is_success
                self.error = content if not is_success else None
                self.metadata = metadata or {}
        
        tool_call = MockToolCall(tool_name, parameters)
        mock_result = MockResult(result, is_success=is_success, metadata=metadata)
        
        try:
            if backend == "mcp":
                server = server_name or "unknown"
                await instance._record_mcp(tool_call, mock_result, server)
            elif backend == "gui":
                await instance._record_gui(tool_call, mock_result)
            elif backend == "shell":
                await instance._record_shell(tool_call, mock_result)
            elif backend == "system":
                await instance._record_system(tool_call, mock_result)
            elif backend == "web":
                await instance._record_web(tool_call, mock_result)
            
            instance._step_counter += 1
        except Exception as e:
            logger.debug(f"Failed to record tool execution: {e}")
    
    @staticmethod
    def _parse_arguments(arg_data):
        """Safely parse tool_call.function.arguments which may be JSON string.

        Handles:
        1. Proper JSON strings with true/false/null
        2. Python literal strings (produced by OpenAI) using ast.literal_eval
        3. Already-dict objects (returned by SDK)
        """
        if not isinstance(arg_data, str):
            return arg_data or {}

        # First, try JSON
        try:
            return json.loads(arg_data)
        except json.JSONDecodeError:
            pass

        # Fallback to Python literal
        try:
            return ast.literal_eval(arg_data)
        except Exception:
            logger.debug("Failed to parse arguments, returning raw string")
            return {"raw": arg_data}
    
    async def start(self, task_id: Optional[str] = None):
        """Start automatic recording
        Args:
            task_id: If provided, override the current task_id for this recording session. This allows
                     external callers (e.g. Coordinator) to specify a meaningful task identifier without
                     having to recreate the RecordingManager instance.
        """
        # Allow dynamic update of task_id before recording actually starts
        if task_id:
            self.task_id = task_id
        if not self.enabled or self._is_started:
            return
        
        try:
            # check server availability (only when video or screenshot is enabled)
            if self.enable_video or self.enable_screenshot:
                await self._check_server_availability()
            
            self._recorder = TrajectoryRecorder(
                task_name=self.task_id,
                log_dir=self.log_dir,
                enable_screenshot=self.enable_screenshot,
                enable_video=self.enable_video,
                server_url=self.server_url,
            )
            
            # create action recorder for agent decision tracking
            self._action_recorder = ActionRecorder(
                trajectory_dir=Path(self._recorder.get_trajectory_dir())
            )
            
            
            # create video client (internal management)
            if self.enable_video:
                from anytool.platform import RecordingClient
                self._recording_client = RecordingClient(base_url=self.server_url)
                success = await self._recording_client.start_recording()
                if success:
                    logger.info("Video recording started")
                else:
                    logger.warning("Video recording failed to start")
            
            # create screenshot client (internal management)
            if self.enable_screenshot:
                from anytool.platform import ScreenshotClient
                self._screenshot_client = ScreenshotClient(base_url=self.server_url)
                logger.debug("Screenshot client ready")
            
            # save initial metadata
            await self._recorder.add_metadata("task_id", self.task_id)
            await self._recorder.add_metadata("backends", list(self.backends))
            await self._recorder.add_metadata("start_time", datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S"))

            # Capture and save initial screenshot if enabled
            if self.enable_screenshot and self._screenshot_client:
                try:
                    init_shot = await self._screenshot_client.capture()
                    if init_shot:
                        await self._recorder.save_init_screenshot(init_shot)
                        logger.debug("Initial screenshot saved")
                except Exception as e:
                    logger.debug(f"Failed to capture initial screenshot: {e}")
            
            self._is_started = True
            logger.info(f"Recording started: {self._recorder.get_trajectory_dir()}")
            
        except Exception as e:
            logger.error(f"Recording failed to start: {e}")
            raise
    
    async def _check_server_availability(self):
        """Check if local server is available"""
        try:
            from anytool.platform import SystemInfoClient

            # Use context manager to ensure aiohttp session is closed, avoiding warning of unclosed session
            async with SystemInfoClient(base_url=self.server_url) as client:
                info = await client.get_system_info()

            if info:
                logger.info(f"Server connected ({info.get('platform', 'unknown')})")
            else:
                logger.warning("Server not responding, video/screenshot functionality unavailable")
        
        except Exception:
            logger.warning("Cannot connect to server, video/screenshot functionality unavailable")
    
    async def stop(self):
        """Stop automatic recording"""
        if not self.enabled or not self._is_started:
            return
        
        try:
            # stop video recording and save
            if self._recording_client:
                try:
                    video_path = None
                    if self._recorder:
                        video_path = str(Path(self._recorder.get_trajectory_dir()) / "screen_recording.mp4")
                    
                    video_bytes = await self._recording_client.end_recording(dest=video_path)
                    if video_bytes and video_path:
                        video_size_mb = len(video_bytes) / (1024 * 1024)
                        logger.info(f"Video recording saved: {video_path} ({video_size_mb:.2f} MB)")
                except Exception as e:
                    logger.warning(f"Video recording failed to save: {e}")

            # close RecordingClient session, avoid unclosed session warning
            try:
                if self._recording_client:
                    await self._recording_client.close()
            except Exception as e:
                logger.debug(f"Failed to close RecordingClient session: {e}")
            
            # close screenshot client
            if self._screenshot_client:
                try:
                    await self._screenshot_client.close()
                except Exception as e:
                    logger.debug(f"Screenshot client failed to close: {e}")
                finally:
                    self._screenshot_client = None
            
            # finalize trajectory recording
            if self._recorder:
                # save final metadata
                await self._recorder.add_metadata("end_time", datetime.datetime.now().isoformat())
                await self._recorder.add_metadata("total_steps", self._step_counter)
                
                # generate summary
                await self.generate_summary()
                
                # finalize recording
                await self._recorder.finalize()
                
                logger.info(f"Recording completed: {self._recorder.get_trajectory_dir()}")
            
            # Restore original methods for registered LLM clients
            for client in self._registered_llm_clients:
                client_id = id(client)
                if client_id in self._original_methods:
                    try:
                        # Restore original complete method
                        original_method = self._original_methods[client_id]
                        client.complete = original_method
                    except Exception as e:
                        logger.debug(f"Failed to restore original method for LLM client: {e}")
            
            # Clear registered clients and original methods
            self._registered_llm_clients.clear()
            self._original_methods.clear()
            
            self._is_started = False
            self._recorder = None
            self._action_recorder = None
            
        except Exception as e:
            logger.error(f"Recording failed to stop: {e}")
    
    def register_to_llm(self, llm_client):
        if not self.enabled:
            return
        
        # Check if already registered to avoid double-wrapping
        if id(llm_client) in self._original_methods:
            logger.warning(f"LLM client {llm_client} is already registered, skipping")
            return
        
        # Save original complete method for restoration
        original_complete = llm_client.complete
        self._original_methods[id(llm_client)] = original_complete
        
        # Wrap complete method
        async def wrapped_complete(self_client, *args, **kwargs):
            # Call original method
            response = await original_complete(*args, **kwargs)
            
            # Automatically record tool calls
            if response.get("tool_results"):
                await self._auto_record_tool_results(response["tool_results"])
            
            return response
        
        # Replace method with properly bound method
        llm_client.complete = types.MethodType(wrapped_complete, llm_client)
        self._registered_llm_clients.append(llm_client)
    
    async def _auto_record_tool_results(self, tool_results: List[Dict]):
        """
        Internal method: automatically record tool execution results from LLM client
        
        This is called by register_to_llm() wrapper. Tool results should contain
        backend and server_name information.
        """
        if not self._recorder or not self._is_started:
            return
        
        for tool_result in tool_results:
            # Get necessary information from tool_result
            tool_call = tool_result.get("tool_call")
            result = tool_result.get("result")
            backend = tool_result.get("backend")
            server_name = tool_result.get("server_name")
            
            if not tool_call or not result:
                logger.warning("Tool result missing 'tool_call' or 'result', skipping")
                continue
            
            if not backend:
                logger.warning(
                    f"Tool result missing 'backend' field, skipping recording. "
                    f"Tool: {tool_call.function.name}. "
                    f"Ensure your LLM client provides backend information in tool_results."
                )
                continue
            
            # Extract metadata for embedding intermediate_steps (GUI)
            result_metadata = result.metadata if hasattr(result, 'metadata') else None
            
            await RecordingManager.record_tool_execution(
                tool_name=tool_call.function.name,
                backend=backend,
                parameters=self._parse_arguments(tool_call.function.arguments),
                result=result.content if hasattr(result, 'content') else str(result),
                server_name=server_name,
                is_success=result.is_success if hasattr(result, 'is_success') else True,
                metadata=result_metadata,
            )

    async def _record_mcp(self, tool_call, result, server: str):
        tool_name = tool_call.function.name
        parameters = self._parse_arguments(tool_call.function.arguments)
        
        command = f"{server}.{tool_name}"
        result_str = str(result.content) if result.is_success else str(result.error)
        result_brief = result_str[:200] + "..." if len(result_str) > 200 else result_str
        
        is_actual_success = result.is_success and not result_str.startswith("ERROR:")
        
        step_info = await self._recorder.record_step(
            backend="mcp",
            tool=tool_name,
            command=command,
            result={
                "status": "success" if is_actual_success else "error",
                "output": result_brief,
            },
            parameters=parameters,
            extra={
                "server": server,
            },
            auto_screenshot=self.enable_screenshot
        )
        
        # Add agent_name to step_info
        step_info["agent_name"] = self.agent_name
    
    async def _record_gui(self, tool_call, result):
        tool_name = tool_call.function.name
        parameters = self._parse_arguments(tool_call.function.arguments)
        
        # Extract actual pyautogui command (from action_history)
        command = "gui_agent"
        if result.is_success and hasattr(result, 'metadata') and result.metadata:
            action_history = result.metadata.get("action_history", [])
            if action_history:
                # Get last successful execution action
                for action in reversed(action_history):
                    planned_action = action.get("planned_action", {})
                    execution_result = action.get("execution_result", {})
                    
                    if planned_action.get("action_type") == "PYAUTOGUI_COMMAND":
                        cmd = planned_action.get("command", "")
                        if cmd and execution_result.get("status") == "success":
                            command = cmd
                            break
                    elif execution_result.get("status") == "success":
                        action_type = planned_action.get("action_type", "")
                        if action_type and action_type not in ["WAIT", "DONE", "FAIL"]:
                            params = planned_action.get("parameters", {})
                            if params:
                                param_str = ", ".join([f"{k}={v}" for k, v in list(params.items())[:2]])
                                command = f"{action_type}({param_str})"
                            else:
                                command = action_type
                            break
        
        result_str = str(result.content) if result.is_success else str(result.error)
        
        is_actual_success = result.is_success
        if result.is_success:
            first_200_chars = result_str[:200] if result_str else ""
            critical_failure_patterns = ["Task failed", "CRITICAL ERROR:", "FATAL:"]
            has_critical_failure = any(pattern in first_200_chars for pattern in critical_failure_patterns)
            is_actual_success = not has_critical_failure
        
        # Extract intermediate_steps from metadata for embedding in traj.jsonl
        extra = {}
        if hasattr(result, 'metadata') and result.metadata:
            intermediate_steps = result.metadata.get("intermediate_steps")
            if intermediate_steps:
                extra["intermediate_steps"] = intermediate_steps
        
        step_info = await self._recorder.record_step(
            backend="gui",
            tool="gui_agent",
            command=command,
            result={
                "status": "success" if is_actual_success else "error",
                "output": result_str,
            },
            parameters=parameters,
            auto_screenshot=self.enable_screenshot,
            extra=extra if extra else None,
        )
        
        step_info["agent_name"] = self.agent_name
    
    async def _record_shell(self, tool_call, result):
        tool_name = tool_call.function.name
        parameters = self._parse_arguments(tool_call.function.arguments)
        
        task = parameters.get("task", tool_name)
        exit_code = 0 if result.is_success else 1
        
        stdout = str(result.content) if result.is_success else ""
        stderr = str(result.error) if result.is_error else ""
        
        command = task  
        if hasattr(result, 'metadata') and result.metadata:
            code_history = result.metadata.get("code_history", [])
            if code_history:
                # Try to find the last successful execution
                found_success = False
                for code_info in reversed(code_history):
                    if code_info.get("status") == "success":
                        lang = code_info.get("lang", "bash")
                        code = code_info.get("code", "")
                        # String format code block: ```lang\ncode\n```
                        command = f"```{lang}\n{code}\n```"
                        found_success = True
                        break
                
                # If no successful execution found, use last code block
                if not found_success and code_history:
                    last_code = code_history[-1]
                    lang = last_code.get("lang", "bash")
                    code = last_code.get("code", "")
                    command = f"```{lang}\n{code}\n```"
        
        stdout_brief = stdout[:200] + "..." if len(stdout) > 200 else stdout
        stderr_brief = stderr[:200] + "..." if len(stderr) > 200 else stderr
        
        is_actual_success = result.is_success
        if result.is_success:
            first_200_chars = stdout[:200] if stdout else ""
            critical_failure_patterns = ["Task failed after", "[TASK_FAILED:"]
            has_critical_failure = any(pattern in first_200_chars for pattern in critical_failure_patterns)
            is_actual_success = not has_critical_failure
        
        step_info = await self._recorder.record_step(
            backend="shell",
            tool="shell_agent",
            command=command,
            result={
                "status": "success" if is_actual_success else "error",
                "exit_code": exit_code,
                "stdout": stdout_brief,
                "stderr": stderr_brief,
            },
            auto_screenshot=self.enable_screenshot
        )
        
        step_info["agent_name"] = self.agent_name
    
    async def _record_system(self, tool_call, result):
        tool_name = tool_call.function.name
        parameters = self._parse_arguments(tool_call.function.arguments)
        
        command = tool_name
        if parameters:
            key_params = []
            for key in ['path', 'file', 'directory', 'name', 'provider', 'backend']:
                if key in parameters and parameters[key]:
                    key_params.append(f"{parameters[key]}")
            if key_params:
                command = f"{tool_name}({', '.join(key_params[:2])})"
        
        result_str = str(result.content) if result.is_success else str(result.error)
        result_brief = result_str[:200] + "..." if len(result_str) > 200 else result_str
        
        is_actual_success = result.is_success
        if result.is_success and result_str:
            is_actual_success = not result_str.startswith("ERROR:")
        
        step_info = await self._recorder.record_step(
            backend="system",
            tool=tool_name,
            command=command,
            result={
                "status": "success" if is_actual_success else "error",
                "output": result_brief,
            },
            auto_screenshot=self.enable_screenshot
        )
        
        step_info["agent_name"] = self.agent_name
    
    async def _record_web(self, tool_call, result):
        tool_name = tool_call.function.name
        parameters = self._parse_arguments(tool_call.function.arguments)
        
        query = parameters.get("query", "")
        command = query if query else "deep_research"
        
        result_str = str(result.content) if result.is_success else str(result.error)
        
        is_actual_success = result.is_success
        if result.is_success and result_str:
            is_actual_success = not result_str.startswith("ERROR:")
        
        step_info = await self._recorder.record_step(
            backend="web",
            tool="deep_research_agent",
            command=command,
            result={
                "status": "success" if is_actual_success else "error",
                "output": result_str,  # Full output preserved for training/replay
            },
            auto_screenshot=self.enable_screenshot
        )
        
        # Add agent_name to step_info
        step_info["agent_name"] = self.agent_name
    
    async def add_metadata(self, key: str, value: Any):
        if self._recorder:
            await self._recorder.add_metadata(key, value)
    
    async def save_plan(self, plan: Dict[str, Any], agent_name: str = "GroundingAgent"):
        """
        Save agent plan to recording directory.
        This integrates planning information with execution trajectory.
        
        Args:
            plan: The plan data (usually containing task_updates or plan steps)
            agent_name: Name of the agent creating the plan
        """
        if not self._recorder or not self._is_started:
            logger.warning("Cannot save plan: recording not started")
            return
        
        try:
            plan_dir = Path(self._recorder.get_trajectory_dir()) / "plans"
            plan_dir.mkdir(exist_ok=True)
            
            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            plan_data = {
                "version": timestamp,
                "created_at": datetime.datetime.now().isoformat(),
                "created_by": agent_name,
                "plan": plan
            }
            
            # Save versioned plan
            plan_file = plan_dir / f"plan_{timestamp}.json"
            with open(plan_file, 'w', encoding='utf-8') as f:
                json.dump(plan_data, f, indent=2, ensure_ascii=False)
            
            # Save current plan (latest)
            current_plan_file = plan_dir / "current_plan.json"
            with open(current_plan_file, 'w', encoding='utf-8') as f:
                json.dump(plan_data, f, indent=2, ensure_ascii=False)
            
            logger.debug(f"Saved plan to recording: {plan_file.name}")
        except Exception as e:
            logger.error(f"Failed to save plan: {e}")
    
    async def log_decision(
        self, 
        agent_name: str, 
        decision: str, 
        context: Optional[Dict[str, Any]] = None
    ):
        """
        Log agent decision with optional context.
        This provides insight into agent reasoning process.
        
        Args:
            agent_name: Name of the agent making the decision
            decision: Description of the decision
            context: Additional context information
        """
        if not self._recorder or not self._is_started:
            logger.warning("Cannot log decision: recording not started")
            return
        
        try:
            traj_dir = Path(self._recorder.get_trajectory_dir())
            log_file = traj_dir / "decisions.log"
            
            timestamp = datetime.datetime.now().isoformat()
            log_entry = f"[{timestamp}] {agent_name}: {decision}"
            if context:
                log_entry += f"\n  Context: {json.dumps(context, ensure_ascii=False)}"
            log_entry += "\n"
            
            with open(log_file, 'a', encoding='utf-8') as f:
                f.write(log_entry)
            
            logger.debug(f"Logged decision from {agent_name}")
        except Exception as e:
            logger.error(f"Failed to log decision: {e}")
    
    async def record_agent_action(
        self,
        agent_name: str,
        action_type: str,
        input_data: Optional[Dict[str, Any]] = None,
        reasoning: Optional[Dict[str, Any]] = None,
        output_data: Optional[Dict[str, Any]] = None,
        metadata: Optional[Dict[str, Any]] = None,
        related_tool_steps: Optional[list] = None,
        correlation_id: Optional[str] = None,
    ) -> Optional[Dict[str, Any]]:
        """
        Record an agent's action and decision-making process.
        
        Args:
            agent_name: Name of the agent performing the action
            action_type: Type of action (plan | execute | evaluate | monitor)
            input_data: Input data the agent received (simplified)
            reasoning: Agent's reasoning process (structured)
            output_data: Agent's output/decision (structured)
            metadata: Additional metadata (LLM model, tokens, duration, etc.)
            related_tool_steps: List of tool execution step numbers related to this action
            correlation_id: Optional correlation ID to link related events
            
        Returns:
            The recorded action info, or None if recording not started
        """
        if not self._action_recorder or not self._is_started:
            logger.debug("Cannot record agent action: recording not started")
            return None
        
        try:
            action_info = await self._action_recorder.record_action(
                agent_name=agent_name,
                action_type=action_type,
                input_data=input_data,
                reasoning=reasoning,
                output_data=output_data,
                metadata=metadata,
                related_tool_steps=related_tool_steps,
                correlation_id=correlation_id,
            )
            
            logger.debug(f"Recorded agent action: {agent_name} - {action_type}")
            return action_info
            
        except Exception as e:
            logger.error(f"Failed to record agent action: {e}")
            return None
    
    async def generate_summary(self) -> Dict[str, Any]:
        """
        Generate a comprehensive summary of the recording session.
        """
        if not self._recorder or not self._is_started:
            logger.warning("Cannot generate summary: recording not started")
            return {}
        
        try:
            from .action_recorder import load_agent_actions, analyze_agent_actions
            from .utils import load_trajectory_from_jsonl, analyze_trajectory
            
            traj_dir = self._recorder.get_trajectory_dir()
            
            # Load all recorded data
            trajectory = load_trajectory_from_jsonl(f"{traj_dir}/traj.jsonl")
            agent_actions = load_agent_actions(traj_dir)
            
            # Analyze data
            traj_stats = analyze_trajectory(trajectory)
            action_stats = analyze_agent_actions(agent_actions)
            
            # Build summary
            summary = {
                "task_id": self.task_id,
                "start_time": self._recorder.metadata.get("start_time", ""),
                "end_time": self._recorder.metadata.get("end_time", ""),
                "trajectory": {
                    "total_steps": traj_stats.get("total_steps", 0),
                    "success_count": traj_stats.get("success_count", 0),
                    "success_rate": traj_stats.get("success_rate", 0),
                    "by_backend": traj_stats.get("backends", {}),
                    "by_tool": traj_stats.get("tools", {}),
                },
                "agent_actions": {
                    "total_actions": action_stats.get("total_actions", 0),
                    "by_agent": action_stats.get("by_agent", {}),
                    "by_type": action_stats.get("by_type", {}),
                }
            }
            
            # Save summary to file
            summary_file = Path(traj_dir) / "summary.json"
            with open(summary_file, 'w', encoding='utf-8') as f:
                json.dump(summary, f, indent=2, ensure_ascii=False)
            
            logger.info(f"Generated summary: {summary_file}")
            return summary
            
        except Exception as e:
            logger.error(f"Failed to generate summary: {e}")
            return {}
    
    async def __aenter__(self):
        await self.start()
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        await self.stop()
        return False
    
    @property
    def recording_status(self) -> bool:
        return self._is_started
    
    @property
    def trajectory_dir(self) -> Optional[str]:
        if self._recorder:
            return str(self._recorder.get_trajectory_dir())
        return None
    
    @property
    def recording_client(self):
        return self._recording_client
    
    @property
    def screenshot_client(self):
        return self._screenshot_client
    
    @property
    def step_count(self) -> int:
        """Get current step count"""
        return self._step_counter


__all__ = [
    'RecordingManager',
]

================================================
FILE: anytool/recording/recorder.py
================================================
import datetime
import json
from typing import Any, Dict, List, Optional
from pathlib import Path

from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class TrajectoryRecorder:
    def __init__(
        self,
        task_name: str = "",
        log_dir: str = "./logs/trajectories",
        enable_screenshot: bool = True,
        enable_video: bool = False,
        server_url: Optional[str] = None,
    ):
        """
        Initialize trajectory recorder
        
        Args:
            task_name: task name (optional, will be saved in metadata)
            log_dir: log directory
            enable_screenshot: whether to save screenshots (through platform.ScreenshotClient)
            enable_video: whether to enable video recording (through platform.RecordingClient)
            server_url: local_server address (None = read from config/environment variables)
        """
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        
        # Simplify naming rule: add prefix if task_name is provided, otherwise use timestamp only
        if task_name:
            folder_name = f"{task_name}_{timestamp}"
        else:
            folder_name = timestamp
        
        self.trajectory_dir = Path(log_dir) / folder_name
        self.trajectory_dir.mkdir(parents=True, exist_ok=True)
        
        # Create screenshots directory
        if enable_screenshot:
            self.screenshots_dir = self.trajectory_dir / "screenshots"
            self.screenshots_dir.mkdir(exist_ok=True)
        else:
            self.screenshots_dir = None
        
        # Config
        self.task_name = task_name
        self.enable_screenshot = enable_screenshot
        self.enable_video = enable_video
        self.server_url = server_url
        
        # Trajectory data
        self.steps: List[Dict] = []
        self.step_counter = 0
        
        # Metadata
        self.metadata = {
            "task_name": task_name,
            "start_time": datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
            "enable_screenshot": enable_screenshot,
            "enable_video": enable_video,
        }
        
        # Video recorder (lazy initialization)
        self._video_recorder = None
        
        # Save initial metadata
        self._save_metadata()
    
    async def record_step(
        self,
        backend: str,
        tool: str,
        command: str,
        result: Optional[Dict[str, Any]] = None,
        parameters: Optional[Dict[str, Any]] = None,
        screenshot: Optional[bytes] = None,
        extra: Optional[Dict[str, Any]] = None,
        auto_screenshot: bool = False,
    ) -> Dict[str, Any]:
        """
        Record one step operation
        
        Args:
            backend: backend type (gui/shell/mcp/web/system)
            tool: tool name (name of BaseTool)
            command: human-readable core command
            result: execution result
            parameters: tool parameters
            screenshot: screenshot bytes (if provided)
            extra: extra information (e.g. server field for MCP)
            auto_screenshot: whether to automatically capture screenshot (through platform.ScreenshotClient)
        """
        self.step_counter += 1
        step_num = self.step_counter
        timestamp = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")

        step_info = {
            "step": step_num,
            "timestamp": timestamp,
            "backend": backend,
        }

        # MCP needs to record server (between backend and tool)
        if extra and "server" in extra:
            step_info["server"] = extra.pop("server")

        # General fields
        step_info["tool"] = tool  # BaseTool name
        step_info["command"] = command  # human-readable core command

        # parameters unified write to top level
        if parameters:
            step_info["parameters"] = parameters
        elif extra and "parameters" in extra:
            step_info["parameters"] = extra.pop("parameters")

        # Execution result remains original
        step_info["result"] = result or {}

        # Other extra information (e.g. coordinates/url) only added when needed
        if extra:
            step_info.update(extra)
        
        # Automatic screenshot (if enabled and no screenshot provided)
        if auto_screenshot and screenshot is None and self.enable_screenshot:
            screenshot = await self._capture_screenshot()
        
        # Save screenshot
        if screenshot and self.enable_screenshot and self.screenshots_dir:
            screenshot_filename = f"step_{step_num:03d}.png"
            screenshot_path = self.screenshots_dir / screenshot_filename
            with open(screenshot_path, "wb") as f:
                f.write(screenshot)
            step_info["screenshot"] = f"screenshots/{screenshot_filename}"
        
        # Add to trajectory
        self.steps.append(step_info)
        
        # Save to traj.jsonl in real time
        await self._append_to_traj_file(step_info)
        
        return step_info
    
    async def _capture_screenshot(self) -> Optional[bytes]:
        """Capture screenshot automatically through platform.ScreenshotClient"""
        try:
            from anytool.platform import ScreenshotClient
            
            # Lazy initialization screenshot client
            if not hasattr(self, '_screenshot_client'):
                try:
                    self._screenshot_client = ScreenshotClient(base_url=self.server_url)
                except Exception:
                    self._screenshot_client = None
                    return None
            
            if self._screenshot_client is None:
                return None
            
            return await self._screenshot_client.capture()
        
        except Exception:
            return None
    
    async def save_init_screenshot(self, screenshot: bytes, filename: str = "init.png"):
        """Save initial screenshot to screenshots dir and update metadata."""
        if not (self.enable_screenshot and self.screenshots_dir and screenshot):
            return
        try:
            filepath = self.screenshots_dir / filename
            with open(filepath, "wb") as f:
                f.write(screenshot)
            # Update metadata
            self.metadata["init_screenshot"] = f"screenshots/{filename}"
            self._save_metadata()
        except Exception as e:
            logger.debug(f"Failed to save initial screenshot: {e}")
    
    async def _append_to_traj_file(self, step_info: Dict[str, Any]):
        """Add step to traj.jsonl file"""
        traj_file = self.trajectory_dir / "traj.jsonl"
        with open(traj_file, "a", encoding="utf-8") as f:
            f.write(json.dumps(step_info, ensure_ascii=False))
            f.write("\n")
    
    def _save_metadata(self):
        """Save metadata to metadata.json"""
        metadata_file = self.trajectory_dir / "metadata.json"
        with open(metadata_file, "w", encoding="utf-8") as f:
            json.dump(self.metadata, f, indent=2, ensure_ascii=False)
    
    async def start_video_recording(self):
        """Start video recording (through platform.RecordingClient)"""
        if not self.enable_video:
            return
        
        try:
            from anytool.recording.video import VideoRecorder
            
            video_path = self.trajectory_dir / "recording.mp4"
            self._video_recorder = VideoRecorder(str(video_path), base_url=self.server_url)
            
            success = await self._video_recorder.start()
            if not success:
                self._video_recorder = None
        
        except Exception as e:
            logger.warning(f"Video recording failed to start: {e}")
            self._video_recorder = None
    
    async def stop_video_recording(self):
        """Stop video recording"""
        if self._video_recorder:
            try:
                await self._video_recorder.stop()
            except Exception:
                pass
            finally:
                self._video_recorder = None
    
    async def add_metadata(self, key: str, value: Any):
        """Add metadata"""
        self.metadata[key] = value
        self._save_metadata()
    
    async def finalize(self):
        """Finalize recording, save final information"""
        self.metadata["end_time"] = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
        self.metadata["total_steps"] = self.step_counter
        
        # Backend statistics
        backend_counts = {}
        for step in self.steps:
            backend = step.get("backend", "unknown")
            backend_counts[backend] = backend_counts.get(backend, 0) + 1
        self.metadata["backend_counts"] = backend_counts
        
        self._save_metadata()

        # Close internal ScreenshotClient, avoid unclosed session warning
        await self._cleanup_screenshot_client()

        # Stop video recording
        await self.stop_video_recording()
        
        logger.info(f"Recording completed: {self.trajectory_dir} (steps: {self.step_counter})")
    
    async def _cleanup_screenshot_client(self):
        """Cleanup screenshot client resources"""
        if hasattr(self, '_screenshot_client') and self._screenshot_client:
            try:
                await self._screenshot_client.close()
            except Exception as e:
                logger.debug(f"Failed to close screenshot client: {e}")
            finally:
                self._screenshot_client = None
    
    def __del__(self):
        """Ensure resources are cleaned up even if finalize() is not called"""
        # Note: This is a safety net. Best practice is to call finalize() explicitly.
        if hasattr(self, '_video_recorder') and self._video_recorder:
            logger.warning(
                f"TrajectoryRecorder for {self.trajectory_dir} was not finalized properly. "
                "Consider calling finalize() or using async context manager."
            )
    
    def get_trajectory_dir(self) -> str:
        """Get trajectory directory path"""
        return str(self.trajectory_dir)
    
    async def __aenter__(self):
        """Async context manager entry"""
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Async context manager exit - ensures finalize() is called"""
        await self.finalize()
        return False

async def record_gui_step(
    recorder: TrajectoryRecorder,
    command: str,
    task_description: str,
    result: Dict[str, Any] = None,
    screenshot: Optional[bytes] = None,
    max_steps: int = 10,
    tool: str = "gui_agent",
) -> Dict[str, Any]:
    """
    Record GUI step
    
    Args:
        recorder: recorder instance
        command: actual executed pyautogui command (e.g. "pyautogui.moveTo(960, 540)")
        task_description: task description
        result: execution result
        screenshot: screenshot
        max_steps: maximum number of steps
        tool: tool name
    """
    parameters = {
        "task_description": task_description,
        "max_steps": max_steps,
    }
    
    return await recorder.record_step(
        backend="gui",
        tool=tool,
        command=command,
        result=result,
        parameters=parameters,
        screenshot=screenshot,
    )


async def record_shell_step(
    recorder: TrajectoryRecorder,
    command: str,
    exit_code: int,
    stdout: Optional[str] = None,
    stderr: Optional[str] = None,
    screenshot: Optional[bytes] = None,
    tool: str = "shell_agent",
) -> Dict[str, Any]:
    """
    Record Shell step
    
    Args:
        recorder: recorder instance
        command: command executed
        exit_code: exit code
        stdout: standard output (simplified version, not saved completely)
        stderr: standard error (simplified version)
        screenshot: screenshot
        tool: tool name
    """
    stdout_brief = stdout[:200] + "..." if stdout and len(stdout) > 200 else stdout
    stderr_brief = stderr[:200] + "..." if stderr and len(stderr) > 200 else stderr
    
    result = {
        "status": "success" if exit_code == 0 else "error",
        "exit_code": exit_code,
        "stdout": stdout_brief,
        "stderr": stderr_brief,
    }
    
    return await recorder.record_step(
        backend="shell",
        tool=tool,
        command=command,
        result=result,
        screenshot=screenshot,
    )

async def record_mcp_step(
    recorder: TrajectoryRecorder,
    server: str,
    tool_name: str,
    parameters: Dict[str, Any],
    result: Any,
    screenshot: Optional[bytes] = None,
) -> Dict[str, Any]:
    """
    Record MCP step
    
    Args:
        recorder: recorder instance
        server: MCP server name
        tool_name: tool name
        parameters: tool parameters
        result: execution result
        screenshot: screenshot
    """
    command = f"{server}.{tool_name}"
    
    result_str = str(result)
    result_brief = result_str[:200] + "..." if len(result_str) > 200 else result_str
    
    return await recorder.record_step(
        backend="mcp",
        tool=tool_name,
        command=command,
        result={"status": "success", "output": result_brief},
        parameters=parameters,
        screenshot=screenshot,
        extra={
            "server": server,
        }
    )


async def record_web_step(
    recorder: TrajectoryRecorder,
    query: str,
    result: Dict[str, Any],
    screenshot: Optional[bytes] = None,
    tool: str = "deep_research_agent",
) -> Dict[str, Any]:
    """
    Record Web step (deep research)
    
    Args:
        recorder: recorder instance
        query: search query
        result: execution result
        screenshot: screenshot
        tool: tool name
    """
    command = query  # directly use query as command
    
    return await recorder.record_step(
        backend="web",
        tool=tool,
        command=command,
        result=result,
        screenshot=screenshot,
    )

================================================
FILE: anytool/recording/utils.py
================================================
import json
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)

def load_trajectory_from_jsonl(jsonl_path: str) -> List[Dict[str, Any]]:
    trajectory = []
    
    # Check if file exists first
    if not os.path.exists(jsonl_path):
        logger.debug(f"No trajectory file found at {jsonl_path} (this is normal for knowledge-only tasks)")
        return []
    
    try:
        with open(jsonl_path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line:
                    step = json.loads(line)
                    trajectory.append(step)
        
        logger.info(f"Loaded {len(trajectory)} steps from {jsonl_path}")
        return trajectory
    
    except Exception as e:
        logger.error(f"Failed to load trajectory from {jsonl_path}: {e}")
        return []


def load_metadata(trajectory_dir: str) -> Optional[Dict[str, Any]]:
    metadata_path = os.path.join(trajectory_dir, "metadata.json")
    
    try:
        with open(metadata_path, "r", encoding="utf-8") as f:
            metadata = json.load(f)
        return metadata
    except Exception as e:
        logger.warning(f"Failed to load metadata from {metadata_path}: {e}")
        return None


def format_trajectory_for_export(
    trajectory: List[Dict[str, Any]],
    format_type: str = "compact"
) -> str:
    if format_type == "compact":
        return _format_compact(trajectory)
    elif format_type == "detailed":
        return _format_detailed(trajectory)
    elif format_type == "markdown":
        return _format_markdown(trajectory)
    else:
        raise ValueError(f"Unknown format type: {format_type}")


def _format_compact(trajectory: List[Dict[str, Any]]) -> str:
    """Compact format: one line per step."""
    lines = []
    for step in trajectory:
        step_num = step.get("step", "?")
        backend = step.get("backend", "?")
        server = step.get("server")
        tool = step.get("tool", "?")
        result_status = "success" if step.get("result", {}).get("status") == "success" else "error"
        
        # Include server name for MCP backend
        backend_str = f"{backend}@{server}" if server else backend
        lines.append(f"Step {step_num}: [{backend_str}] {tool} -> {result_status}")
    
    return "\n".join(lines)


def _format_detailed(trajectory: List[Dict[str, Any]]) -> str:
    """Detailed format: multiple lines per step with parameters."""
    lines = []
    for step in trajectory:
        step_num = step.get("step", "?")
        timestamp = step.get("timestamp", "?")
        backend = step.get("backend", "?")
        server = step.get("server")
        tool = step.get("tool", "?")
        command = step.get("command", "?")
        parameters = step.get("parameters", {})
        result = step.get("result", {})
        
        from anytool.utils.display import Box, BoxStyle
        
        box = Box(width=66, style=BoxStyle.ROUNDED, color='bl')
        lines.append("")
        lines.append(box.top_line(0))
        lines.append(box.text_line(f"Step {step_num} ({timestamp})", align='center', indent=0, text_color='c'))
        lines.append(box.separator_line(0))
        lines.append(box.text_line(f"Backend: {backend}", indent=0))
        if server:
            lines.append(box.text_line(f"Server: {server}", indent=0))
        lines.append(box.text_line(f"Tool: {tool}", indent=0))
        lines.append(box.text_line(f"Command: {command}", indent=0))
        lines.append(box.separator_line(0))
        # Parameters and result can be multi-line
        param_str = json.dumps(parameters, indent=2)
        for param_line in param_str.split('\n'):
            lines.append(box.text_line(param_line, indent=0))
        lines.append(box.separator_line(0))
        result_str = json.dumps(result, indent=2)
        for result_line in result_str.split('\n'):
            lines.append(box.text_line(result_line, indent=0))
        lines.append(box.bottom_line(0))
    
    return "\n".join(lines)


def _format_markdown(trajectory: List[Dict[str, Any]]) -> str:
    """Markdown format: table format."""
    lines = [
        "# Trajectory",
        "",
        "| Step | Backend | Server | Tool | Status | Screenshot |",
        "|------|---------|--------|------|--------|------------|"
    ]
    
    for step in trajectory:
        step_num = step.get("step", "?")
        backend = step.get("backend", "?")
        server = step.get("server", "-")
        tool = step.get("tool", "?")
        result_status = "✓" if step.get("result", {}).get("status") == "success" else "✗"
        screenshot = "📷" if step.get("screenshot") else ""
        
        lines.append(f"| {step_num} | {backend} | {server} | {tool} | {result_status} | {screenshot} |")
    
    return "\n".join(lines)


def analyze_trajectory(trajectory: List[Dict[str, Any]]) -> Dict[str, Any]:
    """
    Analyze trajectory and return statistics.
    """
    if not trajectory:
        return {
            "total_steps": 0,
            "success_rate": 0.0,
            "backends": {},
            "action_types": {}
        }
    
    total_steps = len(trajectory)
    success_count = 0
    backends = {}
    action_types = {}
    
    for step in trajectory:
        # Count successes
        if step.get("result", {}).get("status") == "success":
            success_count += 1
        
        # Count backends
        backend = step.get("backend", "unknown")
        backends[backend] = backends.get(backend, 0) + 1
        
        # Count tool types
        tool = step.get("tool", "unknown")
        action_types[tool] = action_types.get(tool, 0) + 1
    
    return {
        "total_steps": total_steps,
        "success_count": success_count,
        "success_rate": success_count / total_steps if total_steps > 0 else 0.0,
        "backends": backends,
        "tools": action_types 
    }


def load_recording_session(recording_dir: str) -> Dict[str, Any]:
    """
    Load complete recording session including trajectory, metadata, plans, and snapshots.
    
    Args:
        recording_dir: Path to recording directory
    
    Returns:
        Dictionary containing all session data:
        {
            "trajectory": List[Dict],
            "metadata": Dict,
            "plans": List[Dict],
            "decisions": List[str],
            "statistics": Dict
        }
    """
    recording_path = Path(recording_dir)
    
    if not recording_path.exists():
        logger.error(f"Recording directory not found: {recording_dir}")
        return {}
    
    session = {
        "trajectory": [],
        "metadata": None,
        "plans": [],
        "decisions": [],
        "statistics": {}
    }
    
    # Load trajectory
    traj_file = recording_path / "traj.jsonl"
    if traj_file.exists():
        session["trajectory"] = load_trajectory_from_jsonl(str(traj_file))
        session["statistics"] = analyze_trajectory(session["trajectory"])
    
    # Load metadata
    metadata_file = recording_path / "metadata.json"
    if metadata_file.exists():
        session["metadata"] = load_metadata(str(recording_path))
    
    # Load plans
    plans_dir = recording_path / "plans"
    if plans_dir.exists():
        for plan_file in sorted(plans_dir.glob("plan_*.json")):
            try:
                with open(plan_file, 'r', encoding='utf-8') as f:
                    session["plans"].append(json.load(f))
            except Exception as e:
                logger.warning(f"Failed to load plan {plan_file}: {e}")
    
    # Load decisions log
    decisions_file = recording_path / "decisions.log"
    if decisions_file.exists():
        try:
            with open(decisions_file, 'r', encoding='utf-8') as f:
                session["decisions"] = f.readlines()
        except Exception as e:
            logger.warning(f"Failed to load decisions: {e}")
    
    return session


def filter_trajectory(
    trajectory: List[Dict[str, Any]],
    backend: Optional[str] = None,
    tool: Optional[str] = None,
    status: Optional[str] = None,
    time_range: Optional[Tuple[str, str]] = None
) -> List[Dict[str, Any]]:
    filtered = trajectory
    
    if backend:
        filtered = [s for s in filtered if s.get("backend") == backend]
    
    if tool:
        filtered = [s for s in filtered if s.get("tool") == tool]
    
    if status:
        filtered = [s for s in filtered if s.get("result", {}).get("status") == status]
    
    if time_range:
        start_time, end_time = time_range
        filtered = [
            s for s in filtered 
            if start_time <= s.get("timestamp", "") <= end_time
        ]
    
    return filtered


def extract_errors(trajectory: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
    return [
        step for step in trajectory
        if step.get("result", {}).get("status") == "error"
    ]


def generate_summary_report(recording_dir: str, output_file: Optional[str] = None) -> str:
    session = load_recording_session(recording_dir)
    
    if not session:
        return "Error: Could not load recording session"
    
    lines = []
    lines.append("# Recording Session Summary\n")
    
    # Metadata section
    if session["metadata"]:
        lines.append("## Metadata")
        metadata = session["metadata"]
        lines.append(f"- **Task ID**: {metadata.get('task_id', 'N/A')}")
        lines.append(f"- **Start Time**: {metadata.get('start_time', 'N/A')}")
        lines.append(f"- **End Time**: {metadata.get('end_time', 'N/A')}")
        lines.append(f"- **Total Steps**: {metadata.get('total_steps', 0)}")
        lines.append(f"- **Backends**: {', '.join(metadata.get('backends', []))}")
        lines.append("")
    
    # Statistics section
    if session["statistics"]:
        lines.append("## Statistics")
        stats = session["statistics"]
        lines.append(f"- **Total Steps**: {stats.get('total_steps', 0)}")
        lines.append(f"- **Success Count**: {stats.get('success_count', 0)}")
        lines.append(f"- **Success Rate**: {stats.get('success_rate', 0):.2%}")
        lines.append("")
        
        lines.append("### Backend Distribution")
        for backend, count in stats.get('backends', {}).items():
            lines.append(f"- {backend}: {count}")
        lines.append("")
        
        lines.append("### Tool Distribution")
        for tool, count in sorted(stats.get('tools', {}).items(), key=lambda x: x[1], reverse=True):
            lines.append(f"- {tool}: {count}")
        lines.append("")
    
    # Plans section
    if session["plans"]:
        lines.append(f"## Plans ({len(session['plans'])} total)")
        for i, plan in enumerate(session["plans"], 1):
            lines.append(f"### Plan {i}")
            lines.append(f"- Created: {plan.get('created_at', 'N/A')}")
            lines.append(f"- Created by: {plan.get('created_by', 'N/A')}")
            plan_data = plan.get('plan', {})
            if 'task_updates' in plan_data:
                lines.append(f"- Tasks: {len(plan_data['task_updates'])}")
            lines.append("")
    
    # Errors section
    if session["trajectory"]:
        errors = extract_errors(session["trajectory"])
        if errors:
            lines.append(f"## Errors ({len(errors)} total)")
            for error in errors[:5]:  # Show first 5 errors
                lines.append(f"- Step {error.get('step')}: {error.get('backend')} - {error.get('tool')}")
                error_msg = error.get('result', {}).get('output', 'No error message')
                lines.append(f"  ```\n  {error_msg[:200]}\n  ```")
            if len(errors) > 5:
                lines.append(f"  ... and {len(errors) - 5} more errors")
            lines.append("")
    
    # Decisions section
    if session["decisions"]:
        lines.append(f"## Decisions ({len(session['decisions'])} total)")
        for decision in session["decisions"][:10]:  # Show first 10 decisions
            lines.append(f"  {decision.strip()}")
        if len(session["decisions"]) > 10:
            lines.append(f"  ... and {len(session['decisions']) - 10} more decisions")
        lines.append("")
    
    report = "\n".join(lines)
    
    # Save to file if requested
    if output_file:
        try:
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(report)
            logger.info(f"Report saved to {output_file}")
        except Exception as e:
            logger.error(f"Failed to save report: {e}")
    
    return report


def compare_recordings(recording_dir1: str, recording_dir2: str) -> Dict[str, Any]:
    session1 = load_recording_session(recording_dir1)
    session2 = load_recording_session(recording_dir2)
    
    stats1 = session1.get("statistics", {})
    stats2 = session2.get("statistics", {})
    
    return {
        "session1": {
            "path": recording_dir1,
            "total_steps": stats1.get("total_steps", 0),
            "success_rate": stats1.get("success_rate", 0),
            "backends": stats1.get("backends", {})
        },
        "session2": {
            "path": recording_dir2,
            "total_steps": stats2.get("total_steps", 0),
            "success_rate": stats2.get("success_rate", 0),
            "backends": stats2.get("backends", {})
        },
        "differences": {
            "step_diff": stats2.get("total_steps", 0) - stats1.get("total_steps", 0),
            "success_rate_diff": stats2.get("success_rate", 0) - stats1.get("success_rate", 0)
        }
    }

================================================
FILE: anytool/recording/video.py
================================================
"""
Video Recorder

Communicates with local_server through platform.RecordingClient
Supports local and remote recording (through configuration LOCAL_SERVER_URL)
"""

from pathlib import Path
from typing import Optional

from anytool.utils.logging import Logger
from anytool.platform import RecordingClient

logger = Logger.get_logger(__name__)


class VideoRecorder:
    def __init__(
        self,
        output_path: str,
        base_url: Optional[str] = None,
    ):
        """
        Initialize video recorder
        
        Args:
            output_path: output video path
            base_url: local_server address (None = read from config/environment variables)
        """
        self.output_path = Path(output_path)
        self.base_url = base_url
        self.is_recording = False
        self._client: Optional[RecordingClient] = None
    
    async def start(self):
        """Start recording screen"""
        if self.is_recording:
            return False
        
        try:
            if self._client is None:
                self._client = RecordingClient(base_url=self.base_url)
            
            success = await self._client.start_recording()
            
            if success:
                self.is_recording = True
                logger.info(f"Video recording started")
                return True
            else:
                logger.warning("Video recording failed to start")
                return False
        
        except Exception as e:
            logger.warning(f"Video recording failed to start: {e}")
            return False
    
    async def stop(self):
        """Stop recording screen and save to local"""
        if not self.is_recording:
            return False
        
        try:
            if self._client:
                video_bytes = await self._client.end_recording(dest=str(self.output_path))
                
                if video_bytes:
                    video_size_mb = len(video_bytes) / (1024 * 1024)
                    self.is_recording = False
                    logger.info(f"Video recording stopped ({video_size_mb:.2f} MB)")
                    return True
                else:
                    logger.warning("Video recording failed to stop")
                    return False
        
        except Exception as e:
            logger.warning(f"Video recording failed to stop: {e}")
            return False
        finally:
            if self._client:
                try:
                    await self._client.close()
                except Exception:
                    pass
                self._client = None


__all__ = ['VideoRecorder']

================================================
FILE: anytool/recording/viewer.py
================================================
"""
Recording Viewer
Convenient tools for viewing and analyzing recording sessions.
"""

import json
from pathlib import Path
from typing import Optional, Dict, Any, List

from anytool.utils.logging import Logger
from .utils import load_recording_session, generate_summary_report
from .action_recorder import load_agent_actions, analyze_agent_actions, format_agent_actions

logger = Logger.get_logger(__name__)


class RecordingViewer:
    """
    Viewer for analyzing recording sessions.
    
    Provides convenient methods to:
    - Load and display recordings
    - Analyze agent behaviors
    - Generate reports
    """
    
    def __init__(self, recording_dir: str):
        """
        Initialize viewer with a recording directory.
        
        Args:
            recording_dir: Path to recording directory
        """
        self.recording_dir = Path(recording_dir)
        
        if not self.recording_dir.exists():
            raise ValueError(f"Recording directory not found: {recording_dir}")
        
        # Load session data
        self.session = load_recording_session(str(self.recording_dir))
        
        logger.info(f"Loaded recording from {recording_dir}")
    
    def show_summary(self) -> str:
        """
        Display a summary of the recording.
        
        Returns:
            Formatted summary string
        """
        if not self.session.get("metadata"):
            return "No metadata available"
        
        metadata = self.session["metadata"]
        stats = self.session.get("statistics", {})
        
        lines = []
        lines.append("=" * 70)
        lines.append("RECORDING SUMMARY")
        lines.append("=" * 70)
        lines.append(f"Task ID: {metadata.get('task_id', 'N/A')}")
        lines.append(f"Start: {metadata.get('start_time', 'N/A')}")
        lines.append(f"End: {metadata.get('end_time', 'N/A')}")
        lines.append(f"Total Steps: {metadata.get('total_steps', 0)}")
        lines.append("")
        
        lines.append("Statistics:")
        lines.append(f"  - Success Rate: {stats.get('success_rate', 0):.2%}")
        lines.append(f"  - Success Count: {stats.get('success_count', 0)}/{stats.get('total_steps', 0)}")
        lines.append("")
        
        if stats.get("backends"):
            lines.append("Backend Usage:")
            for backend, count in sorted(stats["backends"].items(), key=lambda x: x[1], reverse=True):
                lines.append(f"  - {backend}: {count}")
        
        lines.append("=" * 70)
        
        return "\n".join(lines)
    
    def show_agent_actions(self, format_type: str = "compact", agent_name: Optional[str] = None) -> str:
        actions = load_agent_actions(str(self.recording_dir))
        
        if agent_name:
            actions = [a for a in actions if a.get("agent_name") == agent_name]
        
        if not actions:
            return f"No agent actions found{' for ' + agent_name if agent_name else ''}"
        
        # Add header
        header = f"\nAGENT ACTIONS ({len(actions)} total)"
        if agent_name:
            header += f" - {agent_name}"
        header += "\n" + "=" * 70
        
        # Format actions
        formatted = format_agent_actions(actions, format_type)
        
        return header + "\n" + formatted
    
    def analyze_agents(self) -> str:
        actions = load_agent_actions(str(self.recording_dir))
        stats = analyze_agent_actions(actions)
        
        lines = []
        lines.append("\nAGENT ANALYSIS")
        lines.append("=" * 70)
        lines.append(f"Total Actions: {stats.get('total_actions', 0)}")
        lines.append("")
        
        lines.append("By Agent:")
        for agent, count in sorted(stats.get('by_agent', {}).items(), key=lambda x: x[1], reverse=True):
            percentage = (count / stats['total_actions'] * 100) if stats['total_actions'] > 0 else 0
            lines.append(f"  - {agent}: {count} ({percentage:.1f}%)")
        lines.append("")
        
        lines.append("By Action Type:")
        for action_type, count in sorted(stats.get('by_type', {}).items(), key=lambda x: x[1], reverse=True):
            percentage = (count / stats['total_actions'] * 100) if stats['total_actions'] > 0 else 0
            lines.append(f"  - {action_type}: {count} ({percentage:.1f}%)")
        
        return "\n".join(lines)
    
    def generate_full_report(self, output_file: Optional[str] = None) -> str:
        return generate_summary_report(str(self.recording_dir), output_file)
    
    def export_to_json(self, output_file: str):
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(self.session, f, indent=2, ensure_ascii=False)
        
        logger.info(f"Exported session to {output_file}")
    
    def show_timeline(self, max_events: int = 50) -> str:
        # Load all events
        actions = load_agent_actions(str(self.recording_dir))
        trajectory = self.session.get("trajectory", [])
        
        # Combine all events with unified format
        timeline = []
        
        # Add agent actions
        for action in actions:
            timeline.append({
                "timestamp": action.get("timestamp", ""),
                "type": "agent_action",
                "agent_name": action.get("agent_name", ""),
                "agent_type": action.get("agent_type", "unknown"),
                "action_type": action.get("action_type", ""),
                "step": action.get("step"),
                "correlation_id": action.get("correlation_id", ""),
                "description": f"[{action.get('agent_type', '?').upper()}] {action.get('action_type', '?')}",
                "related_tool_steps": action.get("related_tool_steps", []),
            })
        
        # Add tool executions
        for traj_step in trajectory:
            timeline.append({
                "timestamp": traj_step.get("timestamp", ""),
                "type": "tool_execution",
                "backend": traj_step.get("backend", ""),
                "tool": traj_step.get("tool", ""),
                "step": traj_step.get("step"),
                "agent_name": traj_step.get("agent_name", ""),
                "description": f"[TOOL:{traj_step.get('backend', '?').upper()}] {traj_step.get('tool', '?')}",
                "status": traj_step.get("result", {}).get("status", ""),
            })
        
        # Sort by timestamp
        timeline.sort(key=lambda x: x.get("timestamp", ""))
        
        # Format output
        lines = []
        lines.append("\nUNIFIED TIMELINE")
        lines.append("=" * 100)
        lines.append(f"Total events: {len(timeline)} (showing first {max_events})")
        lines.append("")
        
        for i, item in enumerate(timeline[:max_events]):
            timestamp = item.get("timestamp", "N/A")
            time_str = timestamp.split("T")[1][:8] if "T" in timestamp else timestamp[-8:]
            
            # Format line with type indicator
            type_marker = {
                "agent_action": "🤖",
                "tool_execution": "🔧"
            }.get(item.get("type"), "•")
            
            desc = item.get("description", "")
            agent = item.get("agent_name", "")
            agent_type = item.get("agent_type", "")
            
            line = f"{time_str} {type_marker} {desc}"
            
            # Add agent info if available
            if agent and agent_type:
                line += f" (by {agent}/{agent_type})"
            elif agent:
                line += f" (by {agent})"
            
            lines.append(line)
            
            # Show correlations
            correlations = []
            if item.get("related_tool_steps"):
                correlations.append(f"→ tool steps: {item['related_tool_steps']}")
            if item.get("related_action_step"):
                correlations.append(f"→ action step: {item['related_action_step']}")
            
            if correlations:
                for corr in correlations:
                    lines.append(f"         {corr}")
        
        if len(timeline) > max_events:
            lines.append(f"\n... and {len(timeline) - max_events} more events")
        
        return "\n".join(lines)
    
    def show_agent_flow(self, agent_name: Optional[str] = None) -> str:
        """
        Show the flow of a specific agent's actions and related events.
        """
        actions = load_agent_actions(str(self.recording_dir))
        
        if agent_name:
            actions = [a for a in actions if a.get("agent_name") == agent_name]
        
        lines = []
        lines.append(f"\nAGENT FLOW{' - ' + agent_name if agent_name else ''}")
        lines.append("=" * 100)
        
        # Sort by timestamp
        actions.sort(key=lambda x: x.get("timestamp", ""))
        
        for action in actions:
            timestamp = action.get("timestamp", "N/A").split("T")[1][:8] if "T" in action.get("timestamp", "") else "N/A"
            
            agent_type = action.get("agent_type", "?").upper()
            action_type = action.get("action_type", "?")
            step = action.get("step", "?")
            lines.append(f"{timestamp} [{agent_type}] Action #{step}: {action_type}")
            
            # Show reasoning if available
            if action.get("reasoning"):
                thought = action["reasoning"].get("thought", "")
                if thought:
                    lines.append(f"         💭 {thought[:80]}...")
            
            # Show output
            if action.get("output"):
                output = action["output"]
                if isinstance(output, dict):
                    for key in ["message", "status", "evaluation"]:
                        if key in output:
                            lines.append(f"         📤 {key}: {str(output[key])[:60]}")
            
            lines.append("")
        
        return "\n".join(lines)


def view_recording(recording_dir: str):
    """
    Quick interactive viewer for a recording.
    """
    try:
        viewer = RecordingViewer(recording_dir)
        
        print(viewer.show_summary())
        print("\n")
        
        print(viewer.analyze_agents())
        print("\n")
        
        print("Agent Actions (compact):")
        print(viewer.show_agent_actions(format_type="compact"))
        
    except Exception as e:
        logger.error(f"Failed to view recording: {e}")
        print(f"Error: {e}")


def compare_recordings(recording_dir1: str, recording_dir2: str) -> str:
    """
    Compare two recordings side by side.
    """
    try:
        viewer1 = RecordingViewer(recording_dir1)
        viewer2 = RecordingViewer(recording_dir2)
        
        lines = []
        lines.append("=" * 70)
        lines.append("RECORDING COMPARISON")
        lines.append("=" * 70)
        lines.append("")
        
        # Compare metadata
        meta1 = viewer1.session.get("metadata", {})
        meta2 = viewer2.session.get("metadata", {})
        
        lines.append("Recording 1:")
        lines.append(f"  Task: {meta1.get('task_id', 'N/A')}")
        lines.append(f"  Steps: {meta1.get('total_steps', 0)}")
        lines.append("")
        
        lines.append("Recording 2:")
        lines.append(f"  Task: {meta2.get('task_id', 'N/A')}")
        lines.append(f"  Steps: {meta2.get('total_steps', 0)}")
        lines.append("")
        
        # Compare statistics
        stats1 = viewer1.session.get("statistics", {})
        stats2 = viewer2.session.get("statistics", {})
        
        lines.append("Differences:")
        lines.append(f"  Steps: {meta2.get('total_steps', 0) - meta1.get('total_steps', 0):+d}")
        lines.append(f"  Success Rate: {stats2.get('success_rate', 0) - stats1.get('success_rate', 0):+.2%}")
        
        return "\n".join(lines)
        
    except Exception as e:
        logger.error(f"Failed to compare recordings: {e}")
        return f"Error: {e}"


# CLI interface
if __name__ == "__main__":
    import sys
    
    if len(sys.argv) < 2:
        print("Usage: python -m anytool.recording.viewer <recording_dir>")
        sys.exit(1)
    
    recording_dir = sys.argv[1]
    view_recording(recording_dir)

================================================
FILE: anytool/tool_layer.py
================================================
from __future__ import annotations

import asyncio
import traceback
import uuid
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional

from anytool.agents import GroundingAgent
from anytool.llm import LLMClient
from anytool.grounding.core.grounding_client import GroundingClient
from anytool.config import get_config, load_config
from anytool.config.loader import get_agent_config
from anytool.recording import RecordingManager
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


@dataclass
class AnyToolConfig:
    # LLM Configuration
    llm_model: str = "openrouter/anthropic/claude-sonnet-4.5"
    llm_enable_thinking: bool = False
    llm_timeout: float = 120.0
    llm_max_retries: int = 3
    llm_rate_limit_delay: float = 0.0
    llm_kwargs: Dict[str, Any] = field(default_factory=dict)
    
    # Separate models for specific tasks (None = use llm_model)
    tool_retrieval_model: Optional[str] = None  # Model for tool retrieval LLM filter
    visual_analysis_model: Optional[str] = None  # Model for visual analysis
    
    # Grounding Configuration
    grounding_config_path: Optional[str] = None
    grounding_max_iterations: int = 20
    grounding_system_prompt: Optional[str] = None
    
    # Backend Configuration
    backend_scope: Optional[List[str]] = None  # None = All backends ["shell", "gui", "mcp", "web", "system"]
    
    # Workspace Configuration
    workspace_dir: Optional[str] = None
    
    # Recording Configuration
    enable_recording: bool = False
    recording_backends: Optional[List[str]] = None
    recording_log_dir: str = "./logs/recordings"
    enable_screenshot: bool = True
    enable_video: bool = True
    enable_conversation_log: bool = True  # Save LLM conversations to conversations.jsonl
    
    # Logging Configuration
    log_level: str = "INFO"
    log_to_file: bool = False
    log_file_path: Optional[str] = None
    
    def __post_init__(self):
        """Validate configuration"""
        if not self.llm_model:
            raise ValueError("llm_model is required")
        
        logger.debug(f"AnyToolConfig initialized with model: {self.llm_model}")


class AnyTool:
    def __init__(self, config: Optional[AnyToolConfig] = None):
        self.config = config or AnyToolConfig()
        
        self._llm_client: Optional[LLMClient] = None
        self._grounding_client: Optional[GroundingClient] = None
        self._grounding_agent: Optional[GroundingAgent] = None
        self._recording_manager: Optional[RecordingManager] = None
        
        self._initialized = False
        self._running = False
        
        logger.debug("AnyTool instance created")
    
    async def initialize(self) -> None:
        if self._initialized:
            logger.warning("AnyTool already initialized")
            return
        
        logger.info("Initializing AnyTool...")
        
        try:
            self._llm_client = LLMClient(
                model=self.config.llm_model,
                enable_thinking=self.config.llm_enable_thinking,
                rate_limit_delay=self.config.llm_rate_limit_delay,
                max_retries=self.config.llm_max_retries,
                timeout=self.config.llm_timeout,
                **self.config.llm_kwargs
            )
            logger.info(f"✓ LLM Client: {self.config.llm_model}")
            
            # Load grounding config
            # If custom config is provided, merge it with default configs
            # load_config supports multiple files and deep merges them (later files override earlier ones)
            if self.config.grounding_config_path:
                from anytool.config.loader import CONFIG_DIR
                from anytool.config.constants import CONFIG_GROUNDING, CONFIG_SECURITY
                # Load default configs + custom config (custom values will override defaults)
                grounding_config = load_config(
                    CONFIG_DIR / CONFIG_GROUNDING,
                    CONFIG_DIR / CONFIG_SECURITY,
                    self.config.grounding_config_path
                )
                logger.info(f"Merged custom grounding config: {self.config.grounding_config_path}")
            else:
                # Load default configs only
                grounding_config = get_config()
            
            self._grounding_client = GroundingClient(config=grounding_config)
            await self._grounding_client.initialize_all_providers()
            
            backends = list(self._grounding_client.list_providers().keys())
            logger.info(f"✓ Grounding Client: {len(backends)} backends")
            logger.debug(f"  Available backends: {[b.value for b in backends]}")
            
            if self.config.enable_recording:
                self._recording_manager = RecordingManager(
                    enabled=True,
                    task_id="",
                    log_dir=self.config.recording_log_dir,
                    backends=self.config.recording_backends,
                    enable_screenshot=self.config.enable_screenshot,
                    enable_video=self.config.enable_video,
                    enable_conversation_log=self.config.enable_conversation_log,
                    agent_name="AnyTool",
                )
                # Inject recording_manager to grounding_client for GUI intermediate steps
                self._grounding_client.recording_manager = self._recording_manager
                # Register to LLM client for auto-recording tool results
                self._recording_manager.register_to_llm(self._llm_client)
                logger.info(f"✓ Recording enabled: {len(self._recording_manager.backends or [])} backends")
            
            agent_config = get_agent_config("GroundingAgent")
            if agent_config:
                # Use config file values, but command-line args (self.config) take priority
                max_iterations = agent_config.get("max_iterations", self.config.grounding_max_iterations)
                # Command-line backend_scope > config file > default
                backend_scope = self.config.backend_scope or agent_config.get("backend_scope") or ["gui", "shell", "mcp", "web", "system"]
                visual_analysis_timeout = agent_config.get("visual_analysis_timeout", 30.0)
                # Update config with values from config file
                self.config.grounding_max_iterations = max_iterations
                logger.info(f"Loaded GroundingAgent config from config_agents.json (max_iterations={max_iterations}, visual_analysis_timeout={visual_analysis_timeout}s)")
            else:
                # Fall back to AnyToolConfig values
                max_iterations = self.config.grounding_max_iterations
                backend_scope = self.config.backend_scope or ["gui", "shell", "mcp", "web", "system"]
                visual_analysis_timeout = 30.0
                logger.warning(f"config_agents.json not found, using default config (max_iterations={max_iterations})")
            
            # Create separate LLM client for tool retrieval if configured
            tool_retrieval_llm = None
            if self.config.tool_retrieval_model:
                tool_retrieval_llm = LLMClient(
                    model=self.config.tool_retrieval_model,
                    timeout=self.config.llm_timeout,
                    max_retries=self.config.llm_max_retries,
                )
                logger.info(f"✓ Tool retrieval LLM: {self.config.tool_retrieval_model}")
            
            self._grounding_agent = GroundingAgent(
                name="AnyTool-GroundingAgent",
                backend_scope=backend_scope,
                llm_client=self._llm_client,
                grounding_client=self._grounding_client,
                recording_manager=self._recording_manager,
                system_prompt=self.config.grounding_system_prompt,
                max_iterations=max_iterations,
                visual_analysis_timeout=visual_analysis_timeout,
                tool_retrieval_llm=tool_retrieval_llm,
                visual_analysis_model=self.config.visual_analysis_model,
            )
            logger.info(f"✓ GroundingAgent: {', '.join(backend_scope)}")
            
            self._initialized = True
            logger.info("="*60)
            logger.info("AnyTool ready to use!")
            logger.info("="*60)
            
        except Exception as e:
            logger.error(f"Failed to initialize AnyTool: {e}")
            await self.cleanup()
            raise
    
    async def execute(
        self,
        task: str,
        context: Optional[Dict[str, Any]] = None,
        workspace_dir: Optional[str] = None,
        max_iterations: Optional[int] = None,
        task_id: Optional[str] = None,
    ) -> Dict[str, Any]:
        """
        Execute a task with AnyTool.
        
        Args:
            task: Task instruction
            context: Additional context
            workspace_dir: Working directory
            max_iterations: Max iterations override
            task_id: External task ID for recording/logging. If None, generates a random one.
                     This allows external callers (e.g., OSWorld) to specify their own task ID
                     so recordings can be easily matched with benchmark results.
        """
        if not self._initialized:
            raise RuntimeError(
                "AnyTool not initialized. "
                "Call await tool_layer.initialize() first or use async with."
            )
        
        if self._running:
            raise RuntimeError("AnyTool is already running a task.")
        
        logger.info("="*60)
        logger.info(f"Task: {task[:100]}...")
        logger.info("="*60)
        
        self._running = True
        start_time = asyncio.get_event_loop().time()
        # Use external task_id if provided, otherwise generate one
        if task_id is None:
            task_id = f"task_{uuid.uuid4().hex[:8]}"
        logger.info(f"Task ID: {task_id}")
        
        try:
            execution_context = context or {}
            execution_context["task_id"] = task_id
            execution_context["instruction"] = task
            
            if max_iterations is not None:
                execution_context["max_iterations"] = max_iterations
            
            if self._recording_manager:
                if self._recording_manager.recording_status:
                    await self._recording_manager.stop()
                    logger.debug("Stopped previous recording session")
                
                self._recording_manager.task_id = task_id
                await self._recording_manager.start()
                logger.info(f"Recording started: {task_id}")
            
            if workspace_dir:
                execution_context["workspace_dir"] = workspace_dir
                logger.info(f"Workspace: {workspace_dir}")
            elif self.config.workspace_dir:
                execution_context["workspace_dir"] = self.config.workspace_dir
                logger.info(f"Workspace: {self.config.workspace_dir}")
            elif self._recording_manager and self._recording_manager.trajectory_dir:
                execution_context["workspace_dir"] = self._recording_manager.trajectory_dir
                logger.info(f"Workspace: {execution_context['workspace_dir']}")
            else:
                import tempfile
                from pathlib import Path
                workspace = Path(tempfile.gettempdir()) / "anytool_workspace" / task_id
                workspace.mkdir(parents=True, exist_ok=True)
                execution_context["workspace_dir"] = str(workspace)
                logger.info(f"Workspace: {execution_context['workspace_dir']}")
            
            logger.info(f"Executing with GroundingAgent (max {max_iterations or self.config.grounding_max_iterations} iterations)...")
            
            result = await self._grounding_agent.process(execution_context)
            
            execution_time = asyncio.get_event_loop().time() - start_time
            
            final_result = {
                **result,
                "task_id": task_id,
                "execution_time": execution_time,
            }
            
            status = result.get('status', 'unknown')
            iterations = result.get('iterations', 0)
            tool_count = len(result.get('tool_executions', []))
            
            logger.info("="*60)
            if status == "success":
                logger.info(
                    f"Task completed successfully! "
                    f"({iterations} iterations, {tool_count} tool calls, {execution_time:.2f}s)"
                )
            elif status == "incomplete":
                logger.warning(
                    f"Task incomplete after {iterations} iterations. "
                    f"Consider increasing max_iterations."
                )
            else:
                logger.error(f"Task failed: {result.get('error', 'Unknown error')}")
            logger.info("="*60)
            
            return final_result
            
        except Exception as e:
            execution_time = asyncio.get_event_loop().time() - start_time
            tb = traceback.format_exc(limit=10)
            logger.error(f"Task execution failed: {e}", exc_info=True)
            
            return {
                "status": "error",
                "error": str(e),
                "traceback": tb,
                "response": f"Task execution error: {str(e)}",
                "execution_time": execution_time,
                "task_id": task_id,
                "iterations": 0,
                "tool_executions": [],
            }
        
        finally:
            if self._recording_manager and self._recording_manager.recording_status:
                try:
                    await self._recording_manager.stop()
                    logger.debug(f"Recording stopped: {task_id}")
                except Exception as e:
                    logger.warning(f"Failed to stop recording: {e}")
            
            # Trigger quality evolution periodically
            await self._maybe_evolve_quality()
            
            self._running = False
    
    async def _maybe_evolve_quality(self) -> None:
        """Trigger quality evolution based on global execution count."""
        if not self._grounding_client or not self._grounding_client.quality_manager:
            return
        
        # Check if evolution should be triggered (every 10 global executions)
        if self._grounding_client.quality_manager.should_evolve():
            try:
                report = await self._grounding_client.evolve_quality()
                if report.get("recommendations"):
                    logger.info(f"Quality evolution: {report['recommendations']}")
            except Exception as e:
                logger.debug(f"Quality evolution skipped: {e}")
    
    async def cleanup(self) -> None:
        """
        Close all sessions and release resources.
        Automatically called when using context manager.
        """
        logger.info("Cleaning up AnyTool resources...")
        
        try:
            if self._grounding_client:
                await self._grounding_client.close_all_sessions()
                logger.debug("All grounding sessions closed")
            
            if self._recording_manager and self._recording_manager.recording_status:
                try:
                    await self._recording_manager.stop()
                    logger.debug("Recording manager stopped")
                except Exception as e:
                    logger.warning(f"Failed to stop recording: {e}")
            
            self._initialized = False
            self._running = False
            
            logger.info("AnyTool cleanup complete")
            
        except Exception as e:
            logger.error(f"Error during cleanup: {e}", exc_info=True)
    
    def is_initialized(self) -> bool:
        return self._initialized
    
    def is_running(self) -> bool:
        return self._running
    
    def get_config(self) -> AnyToolConfig:
        return self.config
    
    def list_backends(self) -> List[str]:
        if not self._initialized:
            raise RuntimeError("AnyTool not initialized")
        return [backend.value for backend in self._grounding_client.list_providers().keys()]
    
    def list_sessions(self) -> List[str]:
        if not self._initialized:
            raise RuntimeError("AnyTool not initialized")
        return self._grounding_client.list_sessions()
    
    async def __aenter__(self):
        """Context manager entry"""
        await self.initialize()
        return self
    
    async def __aexit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit"""
        await self.cleanup()
        return False
    
    def __repr__(self) -> str:
        status = "initialized" if self._initialized else "not initialized"
        if self._running:
            status = "running"
        backends = ", ".join(self.config.backend_scope) if self.config.backend_scope else "all"
        return f"<AnyTool(status={status}, backends={backends}, model={self.config.llm_model})>"

================================================
FILE: anytool/utils/cli_display.py
================================================
"""CLI Display utilities for AnyTool startup and interaction"""

from anytool.tool_layer import AnyToolConfig
from anytool.utils.display import Box, BoxStyle, colorize


class CLIDisplay:   
    @staticmethod
    def print_banner():
        box = Box(width=70, style=BoxStyle.ROUNDED, color='c')
        
        print()
        print(box.top_line(indent=4))
        print(box.empty_line(indent=4))
        
        title = colorize("AnyTool", 'c', bold=True)
        print(box.text_line(title, align='center', indent=4, text_color=''))
        
        subtitle = "Universal Tool-Use Layer for AI Agents"
        print(box.text_line(subtitle, align='center', indent=4, text_color='gr'))
        
        print(box.empty_line(indent=4))
        print(box.bottom_line(indent=4))
        print()
    
    @staticmethod
    def print_configuration(config: AnyToolConfig):
        box = Box(width=70, style=BoxStyle.ROUNDED, color='bl')
        
        print(box.text_line(colorize("◉ System Configuration", 'c', bold=True), align='center', indent=4, text_color=''))
        print(box.separator_line(indent=4))
        
        configs = [
            ("AI Model", config.llm_model, 'bl'),
            ("Max Iterations", str(config.grounding_max_iterations), 'c'),
            ("LLM Timeout", f"{config.llm_timeout}s", 'c'),
        ]
        
        for label, value, color in configs:
            line = f"  {label:20s} {colorize(value, color)}"
            print(box.text_line(line, indent=4, text_color=''))
        
        print(box.bottom_line(indent=4))
        print()
    
    @staticmethod
    def print_initialization_progress(steps: list, show_header: bool = True):
        box = Box(width=70, style=BoxStyle.ROUNDED, color='g')
        
        if show_header:
            print(box.text_line(colorize("► Initializing Components", 'g', bold=True), 
                              align='center', indent=4, text_color=''))
            print(box.separator_line(indent=4))
        
        for step, status in steps:
            if status == "ok":
                icon = colorize("✓", 'g')
            elif status == "error":
                icon = colorize("✗", 'rd')
            else:
                icon = colorize("[...]", 'y')
            
            line = f"  {icon}  {step}"
            print(box.text_line(line, indent=4, text_color=''))
        
        print(box.bottom_line(indent=4))
        print()
    
    @staticmethod
    def print_result_summary(result: dict):
        box = Box(width=70, style=BoxStyle.ROUNDED, color='c')
        
        print()
        print(box.text_line(colorize("◈ Execution Summary", 'c', bold=True), 
                          align='center', indent=4, text_color=''))
        print(box.separator_line(indent=4))
        
        status = result.get("status", "unknown")
        status_colors = {
            "completed": 'g',
            "timeout": 'y',
            "error": 'rd',
            "max_iterations_reached": 'y',
        }
        status_color = status_colors.get(status, 'gr')
        status_display = colorize(status.upper(), status_color, bold=True)
        
        exec_time = result.get('execution_time', 0)
        result_lines = [
            f"  Status:          {status_display}",
            f"  Execution Time:  {colorize(f'{exec_time:.2f}s', 'c')}",
            f"  Iterations:      {colorize(str(result.get('iterations', 0)), 'y')}",
            f"  Completed Tasks: {colorize(str(result.get('completed_tasks', 0)), 'g')}",
        ]
        
        if result.get('evaluation_results'):
            result_lines.append(f"  Evaluations:     {colorize(str(len(result['evaluation_results'])), 'bl')}")
        
        for line in result_lines:
            print(box.text_line(line, indent=4, text_color=''))
        
        print(box.bottom_line(indent=4))
        print()
        
        # Print user response (the actual answer/result)
        if result.get('user_response'):
            response_box = Box(width=70, style=BoxStyle.ROUNDED, color='g')
            print(response_box.text_line(colorize("◈ Result", 'g', bold=True), 
                                       align='center', indent=4, text_color=''))
            print(response_box.separator_line(indent=4))
            
            user_response = result['user_response']
            for line in user_response.split('\n'):
                if line.strip():
                    display_line = f"  {line.strip()}"
                    print(response_box.text_line(display_line, indent=4, text_color=''))
            
            print(response_box.bottom_line(indent=4))
            print()
    
    @staticmethod
    def print_interactive_header():
        box = Box(width=70, style=BoxStyle.ROUNDED, color='c')
        
        print(box.text_line(colorize("⌨ Interactive Mode", 'c', bold=True), 
                          align='center', indent=4, text_color=''))
        print(box.separator_line(indent=4))
        
        help_lines = [
            "",
            colorize("  Ready to execute your tasks!", 'g'),
            "",
            colorize("  Available Commands:", 'c', bold=True),
            "    " + colorize("status", 'bl') + "  →  View system status",
            "    " + colorize("help", 'bl') + "    →  Show available commands",
            "    " + colorize("quit", 'bl') + "    →  Exit interactive mode",
            "",
            colorize("  ▸ Enter your task description below:", 'gr'),
            "",
        ]
        
        for line in help_lines:
            print(box.text_line(line, indent=4, text_color=''))
        
        print(box.bottom_line(indent=4))
        print()
    
    @staticmethod
    def print_task_header(query: str, title: str = "▶ Executing Task"):
        box = Box(width=70, style=BoxStyle.ROUNDED, color='g')
        print()
        print(box.text_line(colorize(title, 'g', bold=True), align='center', indent=4, text_color=''))
        print(box.separator_line(indent=4))
        print(box.text_line("", indent=4, text_color=''))
        print(box.text_line(f"  {query}", indent=4, text_color=''))
        print(box.text_line("", indent=4, text_color=''))
        print(box.bottom_line(indent=4))
    
    @staticmethod
    def print_system_ready():
        box = Box(width=70, style=BoxStyle.ROUNDED, color='g')
        print(box.text_line(colorize("◈ System Ready", 'g', bold=True), 
                          align='center', indent=4, text_color=''))
        print(box.separator_line(indent=4))
        print(box.text_line("", indent=4, text_color=''))
        print(box.text_line(colorize("  Real-time UI will display:", 'c'), indent=4, text_color=''))
        print(box.text_line("    § Agent activities and status", indent=4, text_color=''))
        print(box.text_line("    ⊕ Grounding backend operations", indent=4, text_color=''))
        print(box.text_line("    ⊞ Execution logs", indent=4, text_color=''))
        print(box.text_line("", indent=4, text_color=''))
        print(box.bottom_line(indent=4))
        print()
    
    @staticmethod
    def print_status(agent):
        box = Box(width=70, style=BoxStyle.ROUNDED, color='bl')
        print()
        print(box.text_line(colorize("System Status", 'bl', bold=True), 
                          align='center', indent=4, text_color=''))
        print(box.separator_line(indent=4))
        
        status = agent.get_status()
        status_lines = [
            f"Initialized: {colorize('Yes' if status['initialized'] else 'No', 'g' if status['initialized'] else 'rd')}",
            f"Running: {colorize('Yes' if status['running'] else 'No', 'y' if status['running'] else 'g')}",
        ]
        
        if "agents" in status:
            status_lines.append(f"Agents: {colorize(', '.join(status['agents']), 'c')}")
        
        for line in status_lines:
            print(box.text_line(line, indent=4, text_color=''))
        
        print(box.bottom_line(indent=4))
        print()
    
    @staticmethod
    def print_help():
        box = Box(width=70, style=BoxStyle.ROUNDED, color='y')
        print()
        print(box.text_line(colorize("Available Commands", 'y', bold=True), 
                          align='center', indent=4, text_color=''))
        print(box.separator_line(indent=4))
        
        help_items = [
            (colorize("status", 'c'), "Show system status"),
            (colorize("help", 'c'), "Show this help message"),
            (colorize("quit/exit", 'c'), "Exit interactive mode"),
            ("", ""),
            (colorize("Other input", 'gr'), "Execute as task"),
        ]
        
        for cmd, desc in help_items:
            if cmd:
                print(box.text_line(f"  {cmd:20s} {desc}", indent=4, text_color=''))
            else:
                print(box.separator_line(indent=4))
        
        print(box.bottom_line(indent=4))
        print()

================================================
FILE: anytool/utils/display.py
================================================
from typing import Optional, List
from enum import Enum
import re


class Colors:
    RESET = "\033[0m"
    BOLD = "\033[1m"
    DIM = "\033[2m"
    
    RED = "\033[91m"
    GREEN = "\033[92m"
    YELLOW = "\033[93m"
    BLUE = "\033[94m"
    MAGENTA = "\033[95m"
    CYAN = "\033[96m"
    WHITE = "\033[97m"
    GRAY = "\033[90m"
    
    GREEN_SOFT = '\033[38;5;78m'
    BLUE_SOFT = '\033[38;5;39m'
    CYAN_SOFT = '\033[38;5;51m'
    YELLOW_SOFT = '\033[38;5;222m'
    RED_SOFT = '\033[38;5;204m'
    MAGENTA_SOFT = '\033[38;5;141m'
    GRAY_SOFT = '\033[38;5;246m'


class BoxStyle(Enum):
    ROUNDED = "rounded"  # Rounded corner box ╭─╮╰╯
    SQUARE = "square"    # Square corner box ┌─┐└┘
    DOUBLE = "double"    # Double line box ╔═╗╚╝
    SIMPLE = "simple"    # Simple box ===


BOX_CHARS = {
    BoxStyle.ROUNDED: {
        'tl': '╭', 'tr': '╮', 'bl': '╰', 'br': '╯',
        'h': '─', 'v': '│'
    },
    BoxStyle.SQUARE: {
        'tl': '┌', 'tr': '┐', 'bl': '└', 'br': '┘',
        'h': '─', 'v': '│'
    },
    BoxStyle.DOUBLE: {
        'tl': '╔', 'tr': '╗', 'bl': '╚', 'br': '╝',
        'h': '═', 'v': '║'
    },
}


def strip_ansi(text: str) -> str:
    """
    Strip ANSI color codes from text
    
    Args:
        text: Text with potential ANSI codes
        
    Returns:
        Clean text without ANSI codes
    """
    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
    return ansi_escape.sub('', text)


def colorize(text: str, color: str = '', bold: bool = False) -> str:
    try:
        color_map = {
            'r': Colors.RESET,
            'b': Colors.BOLD,
            'd': Colors.DIM,
            'g': Colors.GREEN_SOFT,
            'bl': Colors.BLUE_SOFT,
            'c': Colors.CYAN_SOFT,
            'y': Colors.YELLOW_SOFT,
            'rd': Colors.RED_SOFT,
            'm': Colors.MAGENTA_SOFT,
            'gr': Colors.GRAY_SOFT,
        }
        
        prefix = Colors.BOLD if bold else ''
        code = color_map.get(color, color)
        return f"{prefix}{code}{text}{Colors.RESET}"
    except:
        return text


class Box:
    def __init__(self, 
                 width: int = 68,
                 style: BoxStyle = BoxStyle.ROUNDED,
                 color: str = 'bl',
                 padding: int = 2):
        
        self.width = width
        self.style = style
        self.color = color
        self.padding = padding
        self.chars = BOX_CHARS.get(style, BOX_CHARS[BoxStyle.ROUNDED])
    
    def top_line(self, indent: int = 2) -> str:
        indent_str = " " * indent
        if self.style == BoxStyle.SIMPLE:
            return colorize(indent_str + "=" * self.width, self.color)
        return colorize(
            indent_str + self.chars['tl'] + self.chars['h'] * self.width + self.chars['tr'],
            self.color
        )
    
    def bottom_line(self, indent: int = 2) -> str:
        indent_str = " " * indent
        if self.style == BoxStyle.SIMPLE:
            return colorize(indent_str + "=" * self.width, self.color)
        return colorize(
            indent_str + self.chars['bl'] + self.chars['h'] * self.width + self.chars['br'],
            self.color
        )
    
    def separator_line(self, indent: int = 2) -> str:
        indent_str = " " * indent
        if self.style == BoxStyle.SIMPLE:
            return colorize(indent_str + "-" * self.width, self.color)
        return colorize(indent_str + " " + self.chars['h'] * self.width, self.color)
    
    def empty_line(self, indent: int = 2) -> str:
        indent_str = " " * indent
        if self.style == BoxStyle.SIMPLE:
            return ""
        return colorize(
            indent_str + self.chars['v'] + " " * self.width + self.chars['v'],
            self.color
        )
    
    def text_line(self, text: str, align: str = 'left', indent: int = 2, text_color: str = '') -> str:
        indent_str = " " * indent
        content_width = self.width - 2 * self.padding
        
        # Strip ANSI codes to get actual display length
        clean_text = strip_ansi(text)
        text_len = len(clean_text)
        
        # Use original text (may contain colors) or apply new color
        display_text = colorize(text, text_color) if text_color else text
        
        if align == 'center':
            left_pad = (content_width - text_len) // 2
            right_pad = content_width - text_len - left_pad
            content = " " * left_pad + display_text + " " * right_pad
        elif align == 'right':
            left_pad = content_width - text_len
            content = " " * left_pad + display_text
        else:  # left
            right_pad = content_width - text_len
            content = display_text + " " * right_pad
        
        if self.style == BoxStyle.SIMPLE:
            return indent_str + " " * self.padding + content
        
        padding_str = " " * self.padding
        return colorize(indent_str + self.chars['v'], self.color) + \
               padding_str + content + padding_str + \
               colorize(self.chars['v'], self.color)
    
    def build(self, 
              title: Optional[str] = None,
              lines: List[str] = None,
              footer: Optional[str] = None,
              indent: int = 2) -> str:
    
        result = []
        
        result.append(self.top_line(indent))
        
        if title:
            result.append(self.empty_line(indent))
            result.append(self.text_line(title, align='center', indent=indent, text_color='c'))
            result.append(self.empty_line(indent))
        
        if lines:
            for line in lines:
                result.append(self.text_line(line, indent=indent))
        
        if footer:
            result.append(self.empty_line(indent))
            result.append(self.text_line(footer, align='center', indent=indent, text_color='gr'))
        
        result.append(self.bottom_line(indent))
        
        return "\n".join(result)


def print_box(title: Optional[str] = None,
              lines: List[str] = None,
              footer: Optional[str] = None,
              width: int = 68,
              style: BoxStyle = BoxStyle.ROUNDED,
              color: str = 'bl',
              indent: int = 2):
  
    box = Box(width=width, style=style, color=color)
    print(box.build(title=title, lines=lines, footer=footer, indent=indent))


def print_banner(title: str,
                 subtitle: Optional[str] = None,
                 width: int = 66,
                 style: BoxStyle = BoxStyle.ROUNDED,
                 color: str = 'bl',
                 indent: int = 2):
  
    box = Box(width=width, style=style, color=color)
    print()
    print(box.top_line(indent))
    print(box.empty_line(indent))
    print(box.text_line(title, align='center', indent=indent, text_color='c'))
    if subtitle:
        print(box.text_line(subtitle, align='center', indent=indent, text_color='gr'))
    print(box.empty_line(indent))
    print(box.bottom_line(indent))
    print()


def print_section(title: str,
                  content: List[str],
                  color: str = 'c',
                  indent: int = 2):
    indent_str = " " * indent
    print(f"\n{indent_str}{colorize('- ' + title, color, bold=True)}")
    for line in content:
        print(f"{indent_str}   {line}")


def print_separator(width: int = 68, color: str = 'bl', indent: int = 2):
    indent_str = " " * indent
    print(colorize(indent_str + "─" * width, color))

================================================
FILE: anytool/utils/logging.py
================================================
import logging
import os
import sys
import threading
import json
from pathlib import Path
from datetime import datetime
from typing import Optional
from colorama import init

init(autoreset=True)


def _load_log_level_from_config() -> int:
    """
    Load log_level from config_grounding.json and convert to ANYTOOL_DEBUG value.
    Returns: 0 (WARNING), 1 (INFO), or 2 (DEBUG)
    """
    try:
        config_path = Path(__file__).parent.parent / "config" / "config_grounding.json"
        if config_path.exists():
            with open(config_path, 'r', encoding='utf-8') as f:
                config = json.load(f)
                log_level = config.get("log_level", "INFO").upper()
                
                # Convert log level string to ANYTOOL_DEBUG value
                level_map = {
                    "DEBUG": 2,
                    "INFO": 1,
                    "WARNING": 0,
                    "ERROR": 0,
                    "CRITICAL": 0
                }
                return level_map.get(log_level, 1)  # Default to INFO
    except Exception:
        # If any error occurs, silently return default INFO level
        pass
    return 1  # Default to INFO


# 0=WARNING, 1=INFO, 2=DEBUG; can be overridden by set_debug / environment variable
# Load from config_grounding.json to ensure consistency
ANYTOOL_DEBUG = _load_log_level_from_config()

# Default log directory and file pattern
# Use absolute path to anytool/logs directory
DEFAULT_LOG_DIR = os.path.join(os.path.dirname(__file__), "..", "..", "logs")
DEFAULT_LOG_FILE_PATTERN = "anytool_{timestamp}.log"


class FlushFileHandler(logging.FileHandler):
    """File handler that flushes after each emit for real-time logging"""
    
    def emit(self, record):
        super().emit(record)
        self.flush()  # Immediately flush to disk


class ColoredFormatter(logging.Formatter):
    COLORS = {
        'DEBUG': '\033[1;36m',    # Bold cyan
        'INFO': '\033[1;32m',     # Bold green
        'WARNING': '\033[1;33m',  # Bold yellow
        'ERROR': '\033[1;31m',    # Bold red
        'CRITICAL': '\033[1;35m', # Bold magenta
        'RESET': '\033[0m',
    }

    def format(self, record: logging.LogRecord) -> str:
        formatted = super().format(record)
        
        level_color = self.COLORS.get(record.levelname, self.COLORS["RESET"])
        colored_line = f"{level_color}{formatted}{self.COLORS['RESET']}"
        
        return colored_line


class Logger:
    """
    Thread-safe logger facade that:
    1. Configures handlers only once (lazy initialization).
    2. Ensures all subsequent loggers obtained via ``Logger.get_logger()``
       inherit the configured handlers.
    3. Dynamically adapts log levels according to ``ANYTOOL_DEBUG``.
    """

    _ROOT_NAME = "anytool"        # Package root name
    # Standard format: time with milliseconds | level | file:line number | message
    _LOG_FORMAT = (
        "%(asctime)s.%(msecs)03d [%(levelname)-8s] %(filename)s:%(lineno)d - %(message)s"
    )

    _lock = threading.Lock()
    _configured = False
    _registered: dict[str, logging.Logger] = {}
    
    @staticmethod
    def _get_default_log_file() -> str:
        """Generate default log file path with timestamp (to seconds)
        
        Log files are organized by the running script name:
        - logs/<script_name>/anytool_2025-10-24_15-30-00.log
        """
        # Get the name of the main script
        script_name = "anytool"  # Default name
        try:
            import __main__
            if hasattr(__main__, "__file__") and __main__.__file__:
                # Extract script name without extension
                script_path = os.path.basename(__main__.__file__)
                script_name = os.path.splitext(script_path)[0]
        except Exception:
            # If can't get script name, use default
            pass
        
        # Create log directory: logs/<script_name>/
        log_dir = os.path.join(DEFAULT_LOG_DIR, script_name)
        
        timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        filename = DEFAULT_LOG_FILE_PATTERN.format(timestamp=timestamp)
        return os.path.abspath(os.path.join(log_dir, filename))

    @classmethod
    def get_logger(cls, name: Optional[str] = None) -> logging.Logger:
        """Return a logger with *name* (defaults to ``anytool``).
        The first call triggers :meth:`configure` automatically."""
        if name is None:
            name = cls._ROOT_NAME

        # Check if configuration is needed to avoid recursive calls.
        need_config = False
        with cls._lock:
            logger = cls._registered.get(name)
            if logger is None:
                logger = logging.getLogger(name)
                logger.propagate = True
                cls._registered[name] = logger
            if not cls._configured:
                need_config = True

        if need_config:
            cls.configure()
        return logger

    @classmethod
    def configure(
        cls,
        *,
        level: Optional[int] = None,
        fmt: Optional[str] = None,
        log_to_console: bool = True,
        log_to_file: Optional[str] = "auto",
        use_colors: bool = True,
        force_color: bool = False,
        force: bool = False,
        attach_to_root: bool = False,
    ) -> None:
        """
        Configure the logging system. Usually called automatically
        on first use; pass ``force=True`` to reconfigure explicitly.

        Args:
            level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
            fmt: Log format string
            log_to_console: Whether to output to console
            log_to_file: Log file path ("auto" auto-generate by date, None disable, or specify path)
            use_colors: Whether to use colors on console
            force_color: Force use of colors (even if not supported)
            force: Whether to force reconfiguration
            attach_to_root: Whether to attach to root logger

        If *attach_to_root* is ``True``, handlers are attached to the *root*
        logger (``""``). This makes every logger—regardless of its name—
        inherit the handlers (handy for standalone scripts) but will also
        surface logs from third-party libraries. Choose with care.
        """
        with cls._lock:
            if cls._configured and not force:
                # Already configured and no need to force reconfiguration, only update level.
                if level is not None:
                    cls._update_level(level)
                return

            resolved_level = cls._resolve_level(level)
            fmt_str = fmt or cls._LOG_FORMAT

            # Handle log_to_file parameter
            actual_log_file = None
            if log_to_file == "auto":
                actual_log_file = cls._get_default_log_file()
            elif log_to_file is not None:
                actual_log_file = log_to_file

            # Select the logger to attach handlers to (root logger or anytool).
            target_logger = (
                logging.getLogger() if attach_to_root else logging.getLogger(cls._ROOT_NAME)
            )
            target_logger.setLevel(resolved_level)

            # Clean up old handlers.
            for h in target_logger.handlers[:]:
                target_logger.removeHandler(h)

            # Construct Formatter
            date_fmt = "%Y-%m-%d %H:%M:%S"
            color_supported = force_color or (use_colors and cls._stdout_supports_color())
            console_formatter = (
                ColoredFormatter(fmt_str, datefmt=date_fmt) if color_supported 
                else logging.Formatter(fmt_str, datefmt=date_fmt)
            )
            file_formatter = logging.Formatter(fmt_str, datefmt=date_fmt)

            # Console Handler
            if log_to_console:
                ch = logging.StreamHandler(sys.stdout)
                ch.setLevel(resolved_level)
                ch.setFormatter(console_formatter)
                target_logger.addHandler(ch)

            # File Handler (with real-time flush)
            if actual_log_file:
                dir_path = os.path.dirname(actual_log_file)
                if dir_path:
                    os.makedirs(dir_path, exist_ok=True)
                fh = FlushFileHandler(actual_log_file, encoding="utf-8")
                fh.setLevel(resolved_level)
                fh.setFormatter(file_formatter)
                target_logger.addHandler(fh)
                
                # Record log file location
                if not cls._configured:
                    print(f"Log file enabled: {actual_log_file}")

            cls._configured = True

    @classmethod
    def set_debug(cls, debug_level: int = 2) -> None:
        """Dynamically switch debug level: 0 = WARNING, 1 = INFO, 2 = DEBUG."""
        global ANYTOOL_DEBUG
        ANYTOOL_DEBUG = max(0, min(debug_level, 2))
        cls._update_level(cls._resolve_level(None))

    @classmethod
    def add_file_handler(
        cls, 
        filepath: str, 
        logger_name: Optional[str] = None
    ) -> None:
        """
        Append a file handler to the given (default ``anytool``) logger.
        
        Args:
            filepath: Log file path
            logger_name: Log logger name
        """
        logger = cls.get_logger(logger_name or cls._ROOT_NAME)

        dir_path = os.path.dirname(filepath)
        if dir_path:
            os.makedirs(dir_path, exist_ok=True)

        fh = FlushFileHandler(filepath, encoding="utf-8")
        fh.setLevel(logger.level)
        fh.setFormatter(logging.Formatter(cls._LOG_FORMAT, datefmt="%Y-%m-%d %H:%M:%S"))
        logger.addHandler(fh)

    @classmethod
    def reset_configuration(cls) -> None:
        """Remove all handlers and clear registered loggers."""
        with cls._lock:
            for lg in cls._registered.values():
                for h in lg.handlers[:]:
                    lg.removeHandler(h)
            cls._registered.clear()
            cls._configured = False

    @staticmethod
    def _stdout_supports_color() -> bool:
        return sys.stdout.isatty() and not os.getenv("NO_COLOR")

    @classmethod
    def _resolve_level(cls, level: Optional[int]) -> int:
        if level is not None:
            # Allow passing logging.INFO / "INFO" / 20 etc.
            return getattr(logging, str(level).upper(), level)
        return {2: logging.DEBUG, 1: logging.INFO}.get(ANYTOOL_DEBUG, logging.WARNING)

    @classmethod
    def _update_level(cls, level: int) -> None:
        for lg in cls._registered.values():
            lg.setLevel(level)
            for h in lg.handlers:
                h.setLevel(level)


# Adjust debug level automatically according to the
# ``ANYTOOL_DEBUG`` (preferred) or legacy ``DEBUG`` environment variable.
_env_debug = os.getenv("ANYTOOL_DEBUG") or os.getenv("DEBUG")
if _env_debug is not None:
    try:
        Logger.set_debug(int(_env_debug))
    except ValueError:
        # When not a number, use common format: DEBUG=1/true
        Logger.set_debug(2 if _env_debug.strip().lower() in {"1", "true", "yes"} else 0)

# Initialize logger system, attach to root so all loggers inherit the configuration
# This ensures any logger obtained via Logger.get_logger() will work correctly
Logger.configure(attach_to_root=True)

# Get anytool logger for internal logging
logger = Logger.get_logger()
logger.debug("AnyTool logging initialized")

================================================
FILE: anytool/utils/telemetry/__init__.py
================================================


================================================
FILE: anytool/utils/telemetry/events.py
================================================
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any


class BaseTelemetryEvent(ABC):
    """Base class for all telemetry events"""

    @property
    @abstractmethod
    def name(self) -> str:
        """Event name for tracking"""
        pass

    @property
    @abstractmethod
    def properties(self) -> dict[str, Any]:
        """Event properties to send with the event"""
        pass


@dataclass
class MCPAgentExecutionEvent(BaseTelemetryEvent):
    """Comprehensive event for tracking complete MCP agent execution"""

    # Execution method and context
    execution_method: str  # "run" or "astream"
    query: str  # The actual user query
    success: bool

    # Agent configuration
    model_provider: str
    model_name: str
    server_count: int
    server_identifiers: list[dict[str, str]]
    total_tools_available: int
    tools_available_names: list[str]
    max_steps_configured: int
    memory_enabled: bool
    use_server_manager: bool

    # Execution PARAMETERS
    max_steps_used: int | None
    manage_connector: bool
    external_history_used: bool

    # Execution results
    steps_taken: int | None = None
    tools_used_count: int | None = None
    tools_used_names: list[str] | None = None
    response: str | None = None  # The actual response
    execution_time_ms: int | None = None
    error_type: str | None = None

    # Context
    conversation_history_length: int | None = None

    @property
    def name(self) -> str:
        return "mcp_agent_execution"

    @property
    def properties(self) -> dict[str, Any]:
        return {
            # Core execution info
            "execution_method": self.execution_method,
            "query": self.query,
            "query_length": len(self.query),
            "success": self.success,
            # Agent configuration
            "model_provider": self.model_provider,
            "model_name": self.model_name,
            "server_count": self.server_count,
            "server_identifiers": self.server_identifiers,
            "total_tools_available": self.total_tools_available,
            "tools_available_names": self.tools_available_names,
            "max_steps_configured": self.max_steps_configured,
            "memory_enabled": self.memory_enabled,
            "use_server_manager": self.use_server_manager,
            # Execution parameters (always include, even if None)
            "max_steps_used": self.max_steps_used,
            "manage_connector": self.manage_connector,
            "external_history_used": self.external_history_used,
            # Execution results (always include, even if None)
            "steps_taken": self.steps_taken,
            "tools_used_count": self.tools_used_count,
            "tools_used_names": self.tools_used_names,
            "response": self.response,
            "response_length": len(self.response) if self.response else None,
            "execution_time_ms": self.execution_time_ms,
            "error_type": self.error_type,
            "conversation_history_length": self.conversation_history_length,
        }

================================================
FILE: anytool/utils/telemetry/telemetry.py
================================================
import logging
import os
import platform
import uuid
from collections.abc import Callable
from functools import wraps
from pathlib import Path
from typing import Any

from posthog import Posthog
from scarf import ScarfEventLogger

from mcp_use.logging import MCP_USE_DEBUG
from mcp_use.telemetry.events import (
    BaseTelemetryEvent,
    MCPAgentExecutionEvent,
)
from mcp_use.telemetry.utils import get_package_version

logger = logging.getLogger(__name__)


def singleton(cls):
    """A decorator that implements the singleton pattern for a class."""
    instance = [None]
    
    def wrapper(*args, **kwargs):
        if instance[0] is None:
            instance[0] = cls(*args, **kwargs)
        return instance[0]
    
    return wrapper

def requires_telemetry(func: Callable) -> Callable:
    """Decorator that skips function execution if telemetry is disabled"""

    @wraps(func)
    def wrapper(self, *args, **kwargs):
        if not self._posthog_client and not self._scarf_client:
            return None
        return func(self, *args, **kwargs)

    return wrapper


def get_cache_home() -> Path:
    """Get platform-appropriate cache directory."""
    # XDG_CACHE_HOME for Linux and manually set envs
    env_var: str | None = os.getenv("XDG_CACHE_HOME")
    if env_var and (path := Path(env_var)).is_absolute():
        return path

    system = platform.system()
    if system == "Windows":
        appdata = os.getenv("LOCALAPPDATA") or os.getenv("APPDATA")
        if appdata:
            return Path(appdata)
        return Path.home() / "AppData" / "Local"
    elif system == "Darwin":  # macOS
        return Path.home() / "Library" / "Caches"
    else:  # Linux or other Unix
        return Path.home() / ".cache"


@singleton
class Telemetry:
    """
    Service for capturing anonymized telemetry data via PostHog and Scarf.
    If the environment variable `MCP_USE_ANONYMIZED_TELEMETRY=false`, telemetry will be disabled.
    """

    USER_ID_PATH = str(get_cache_home() / "mcp_use_3" / "telemetry_user_id")
    VERSION_DOWNLOAD_PATH = str(get_cache_home() / "mcp_use" / "download_version")
    PROJECT_API_KEY = "phc_lyTtbYwvkdSbrcMQNPiKiiRWrrM1seyKIMjycSvItEI"
    HOST = "https://eu.i.posthog.com"
    SCARF_GATEWAY_URL = "https://mcpuse.gateway.scarf.sh/events"
    UNKNOWN_USER_ID = "UNKNOWN_USER_ID"

    _curr_user_id = None

    def __init__(self):
        telemetry_disabled = os.getenv("MCP_USE_ANONYMIZED_TELEMETRY", "true").lower() == "false"

        if telemetry_disabled:
            self._posthog_client = None
            self._scarf_client = None
            logger.debug("Telemetry disabled")
        else:
            logger.info("Anonymized telemetry enabled. Set MCP_USE_ANONYMIZED_TELEMETRY=false to disable.")

            # Initialize PostHog
            try:
                self._posthog_client = Posthog(
                    project_api_key=self.PROJECT_API_KEY,
                    host=self.HOST,
                    disable_geoip=False,
                    enable_exception_autocapture=True,
                )

                # Silence posthog's logging unless debug mode (level 2)
                if MCP_USE_DEBUG < 2:
                    posthog_logger = logging.getLogger("posthog")
                    posthog_logger.disabled = True

            except Exception as e:
                logger.warning(f"Failed to initialize PostHog telemetry: {e}")
                self._posthog_client = None

            # Initialize Scarf
            try:
                self._scarf_client = ScarfEventLogger(
                    endpoint_url=self.SCARF_GATEWAY_URL,
                    timeout=3.0,
                    verbose=MCP_USE_DEBUG >= 2,
                )

                # Silence scarf's logging unless debug mode (level 2)
                if MCP_USE_DEBUG < 2:
                    scarf_logger = logging.getLogger("scarf")
                    scarf_logger.disabled = True

            except Exception as e:
                logger.warning(f"Failed to initialize Scarf telemetry: {e}")
                self._scarf_client = None

    @property
    def user_id(self) -> str:
        """Get or create a persistent anonymous user ID"""
        if self._curr_user_id:
            return self._curr_user_id

        try:
            is_first_time = not os.path.exists(self.USER_ID_PATH)

            if is_first_time:
                logger.debug(f"Creating user ID path: {self.USER_ID_PATH}")
                os.makedirs(os.path.dirname(self.USER_ID_PATH), exist_ok=True)
                with open(self.USER_ID_PATH, "w") as f:
                    new_user_id = str(uuid.uuid4())
                    f.write(new_user_id)
                self._curr_user_id = new_user_id

                logger.debug(f"User ID path created: {self.USER_ID_PATH}")
            else:
                with open(self.USER_ID_PATH) as f:
                    self._curr_user_id = f.read().strip()

            # Always check for version-based download tracking
            self.track_package_download(
                {
                    "triggered_by": "user_id_property",
                }
            )
        except Exception as e:
            logger.debug(f"Failed to get/create user ID: {e}")
            self._curr_user_id = self.UNKNOWN_USER_ID

        return self._curr_user_id

    @requires_telemetry
    def capture(self, event: BaseTelemetryEvent) -> None:
        """Capture a telemetry event"""
        # Send to PostHog
        if self._posthog_client:
            try:
                # Add package version to all events
                properties = event.properties.copy()
                properties["mcp_use_version"] = get_package_version()

                self._posthog_client.capture(distinct_id=self.user_id, event=event.name, properties=properties)
            except Exception as e:
                logger.debug(f"Failed to track PostHog event {event.name}: {e}")

        # Send to Scarf
        if self._scarf_client:
            try:
                # Add package version and user_id to all events
                properties = {}
                properties["mcp_use_version"] = get_package_version()
                properties["user_id"] = self.user_id
                properties["event"] = event.name

                # Convert complex types to simple types for Scarf compatibility
                self._scarf_client.log_event(properties=properties)
            except Exception as e:
                logger.debug(f"Failed to track Scarf event {event.name}: {e}")

    @requires_telemetry
    def track_package_download(self, properties: dict[str, Any] | None = None) -> None:
        """Track package download event specifically for Scarf analytics"""
        if self._scarf_client:
            try:
                current_version = get_package_version()
                should_track = False
                first_download = False

                # Check if version file exists
                if not os.path.exists(self.VERSION_DOWNLOAD_PATH):
                    # First download
                    should_track = True
                    first_download = True

                    # Create directory and save version
                    os.makedirs(os.path.dirname(self.VERSION_DOWNLOAD_PATH), exist_ok=True)
                    with open(self.VERSION_DOWNLOAD_PATH, "w") as f:
                        f.write(current_version)
                else:
                    # Read saved version
                    with open(self.VERSION_DOWNLOAD_PATH) as f:
                        saved_version = f.read().strip()

                    # Compare versions (simple string comparison for now)
                    if current_version > saved_version:
                        should_track = True
                        first_download = False

                        # Update saved version
                        with open(self.VERSION_DOWNLOAD_PATH, "w") as f:
                            f.write(current_version)

                if should_track:
                    logger.debug(f"Tracking package download event with properties: {properties}")
                    # Add package version and user_id to event
                    event_properties = (properties or {}).copy()
                    event_properties["mcp_use_version"] = current_version
                    event_properties["user_id"] = self.user_id
                    event_properties["event"] = "package_download"
                    event_properties["first_download"] = first_download

                    # Convert complex types to simple types for Scarf compatibility
                    self._scarf_client.log_event(properties=event_properties)
            except Exception as e:
                logger.debug(f"Failed to track Scarf package_download event: {e}")

    @requires_telemetry
    def track_agent_execution(
        self,
        execution_method: str,
        query: str,
        success: bool,
        model_provider: str,
        model_name: str,
        server_count: int,
        server_identifiers: list[dict[str, str]],
        total_tools_available: int,
        tools_available_names: list[str],
        max_steps_configured: int,
        memory_enabled: bool,
        use_server_manager: bool,
        max_steps_used: int | None,
        manage_connector: bool,
        external_history_used: bool,
        steps_taken: int | None = None,
        tools_used_count: int | None = None,
        tools_used_names: list[str] | None = None,
        response: str | None = None,
        execution_time_ms: int | None = None,
        error_type: str | None = None,
        conversation_history_length: int | None = None,
    ) -> None:
        """Track comprehensive agent execution"""
        event = MCPAgentExecutionEvent(
            execution_method=execution_method,
            query=query,
            success=success,
            model_provider=model_provider,
            model_name=model_name,
            server_count=server_count,
            server_identifiers=server_identifiers,
            total_tools_available=total_tools_available,
            tools_available_names=tools_available_names,
            max_steps_configured=max_steps_configured,
            memory_enabled=memory_enabled,
            use_server_manager=use_server_manager,
            max_steps_used=max_steps_used,
            manage_connector=manage_connector,
            external_history_used=external_history_used,
            steps_taken=steps_taken,
            tools_used_count=tools_used_count,
            tools_used_names=tools_used_names,
            response=response,
            execution_time_ms=execution_time_ms,
            error_type=error_type,
            conversation_history_length=conversation_history_length,
        )
        self.capture(event)

    @requires_telemetry
    def flush(self) -> None:
        """Flush any queued telemetry events"""
        # Flush PostHog
        if self._posthog_client:
            try:
                self._posthog_client.flush()
                logger.debug("PostHog client telemetry queue flushed")
            except Exception as e:
                logger.debug(f"Failed to flush PostHog client: {e}")

        # Scarf events are sent immediately, no flush needed
        if self._scarf_client:
            logger.debug("Scarf telemetry events sent immediately (no flush needed)")

    @requires_telemetry
    def shutdown(self) -> None:
        """Shutdown telemetry clients and flush remaining events"""
        # Shutdown PostHog
        if self._posthog_client:
            try:
                self._posthog_client.shutdown()
                logger.debug("PostHog client shutdown successfully")
            except Exception as e:
                logger.debug(f"Error shutting down PostHog client: {e}")

        # Scarf doesn't require explicit shutdown
        if self._scarf_client:
            logger.debug("Scarf telemetry client shutdown (no action needed)")


================================================
FILE: anytool/utils/telemetry/utils.py
================================================
"""
Utility functions for extracting model information from LangChain LLMs.

This module provides utilities to extract provider and model information
from LangChain language models for telemetry purposes.
"""

import importlib.metadata

from langchain_core.language_models.base import BaseLanguageModel


def get_package_version() -> str:
    """Get the current mcp-use package version."""
    try:
        return importlib.metadata.version("mcp-use")
    except importlib.metadata.PackageNotFoundError:
        return "unknown"


def get_model_provider(llm: BaseLanguageModel) -> str:
    """Extract the model provider from LangChain LLM using BaseChatModel standards."""
    # Use LangChain's standard _llm_type property for identification
    return getattr(llm, "_llm_type", llm.__class__.__name__.lower())


def get_model_name(llm: BaseLanguageModel) -> str:
    """Extract the model name from LangChain LLM using BaseChatModel standards."""
    # First try _identifying_params which may contain model info
    if hasattr(llm, "_identifying_params"):
        identifying_params = llm._identifying_params
        if isinstance(identifying_params, dict):
            # Common keys that contain model names
            for key in ["model", "model_name", "model_id", "deployment_name"]:
                if key in identifying_params:
                    return str(identifying_params[key])

    # Fallback to direct model attributes
    return getattr(llm, "model", getattr(llm, "model_name", llm.__class__.__name__))


def extract_model_info(llm: BaseLanguageModel) -> tuple[str, str]:
    """Extract both provider and model name from LangChain LLM.

    Returns:
        Tuple of (provider, model_name)
    """
    return get_model_provider(llm), get_model_name(llm)


================================================
FILE: anytool/utils/ui.py
================================================
"""
AnyTool Terminal UI System

Provides real-time CLI visualization for AnyTool execution flow.
Displays agent activities, grounding backends, and detailed logs.

Uses native ANSI colors and custom box drawing for a clean, lightweight interface.
"""

from typing import Optional, Dict, Any, List, Tuple
from datetime import datetime
from enum import Enum
import asyncio
import sys
import shutil

from anytool.utils.display import Box, BoxStyle, colorize


class AgentStatus(Enum):
    """Agent execution status"""
    IDLE = "idle"
    THINKING = "thinking"
    EXECUTING = "executing"
    WAITING = "waiting"


class AnyToolUI:
    """
    AnyTool Terminal UI
    
    Provides real-time visualization of:
    - Agent activities and status
    - Grounding backend operations
    - Execution logs
    - System metrics
    
    Design Philosophy:
    - Lightweight and fast (no heavy dependencies)
    - Clean ANSI-based rendering
    - Minimal CPU overhead
    - Easy to customize
    """
    
    def __init__(self, enable_live: bool = True, compact: bool = False):
        """
        Initialize UI
        
        Args:
            enable_live: Whether to enable live display updates
            compact: Use compact layout (for smaller terminals)
        """
        self.enable_live = enable_live
        self.compact = compact
        
        # Terminal dimensions
        self.term_width, self.term_height = self._get_terminal_size()
        
        # State tracking
        self.agent_status: Dict[str, AgentStatus] = {}
        self.agent_activities: Dict[str, List[str]] = {}
        self.grounding_operations: List[Dict[str, Any]] = []
        self.grounding_backends: List[Dict[str, Any]] = []  # Backend info (type, servers, etc.)
        self.log_buffer: List[Tuple[str, str, datetime]] = []  # (message, level, timestamp)
        
        # Metrics
        self.metrics: Dict[str, Any] = {
            "start_time": None,
            "iterations": 0,
            "completed_tasks": 0,
            "llm_calls": 0,
            "grounding_calls": 0,
        }
        
        # Live display state
        self._live_running = False
        self._live_task: Optional[asyncio.Task] = None
        self._last_render: List[str] = []
    
    def _get_terminal_size(self) -> Tuple[int, int]:
        """Get terminal size"""
        try:
            size = shutil.get_terminal_size((80, 24))
            return size.columns, size.lines
        except:
            return 80, 24
    
    def _clear_screen(self):
        """Clear screen"""
        if self.enable_live:
            # Clear entire screen and move cursor to top-left
            sys.stdout.write('\033[2J\033[H')
            sys.stdout.flush()
    
    def _move_cursor_home(self):
        """Move cursor to home position"""
        sys.stdout.write('\033[H')
        sys.stdout.flush()
    
    def _hide_cursor(self):
        """Hide cursor"""
        sys.stdout.write('\033[?25l')
        sys.stdout.flush()
    
    def _show_cursor(self):
        """Show cursor"""
        sys.stdout.write('\033[?25h')
        sys.stdout.flush()
    
    # Banner and Startup
    def print_banner(self):
        """Print startup banner"""
        box = Box(width=70, style=BoxStyle.ROUNDED, color='c')
        
        print()
        print(box.top_line(indent=4))
        print(box.empty_line(indent=4))
        
        # Title
        title = colorize("AnyTool", 'c', bold=True)
        print(box.text_line(title, align='center', indent=4, text_color=''))
        
        # Subtitle
        subtitle = "Universal Tool-Use Layer for AI Agents"
        print(box.text_line(subtitle, align='center', indent=4, text_color='gr'))
        
        print(box.empty_line(indent=4))
        print(box.bottom_line(indent=4))
        print()
    
    def print_initialization(self, steps: List[Tuple[str, str]]):
        """
        Print initialization steps
        
        Args:
            steps: List of (component_name, status) tuples
        """
        box = Box(width=70, style=BoxStyle.ROUNDED, color='bl')
        
        print(box.text_line("Initializing Components", align='center', indent=4, text_color='c'))
        print(box.separator_line(indent=4))
        
        for component, status in steps:
            icon = colorize("✓", 'g') if status == "ok" else colorize("✗", 'rd')
            line = f"{icon} {component}"
            print(box.text_line(line, indent=4))
        
        print(box.bottom_line(indent=4))
        print()
    
    async def start_live_display(self):
        """Start live display"""
        if not self.enable_live or self._live_running:
            return
        
        self._live_running = True
        self.metrics["start_time"] = datetime.now()
        self._clear_screen()
        self._hide_cursor()
        
        # Start update loop
        self._live_task = asyncio.create_task(self._live_update_loop())
    
    async def stop_live_display(self):
        """Stop live display"""
        if not self._live_running:
            return
        
        self._live_running = False
        
        if self._live_task:
            self._live_task.cancel()
            try:
                await self._live_task
            except asyncio.CancelledError:
                pass
        
        self._show_cursor()
        print()  # Add newline after live display
    
    async def _live_update_loop(self):
        """Live update loop"""
        while self._live_running:
            try:
                self.render()
                await asyncio.sleep(2.0)
            except asyncio.CancelledError:
                break
            except Exception as e:
                print(f"UI render error: {e}")
    
    def render(self):
        """Render entire UI"""
        if not self.enable_live or not self._live_running:
            return
        
        # Clear and redraw
        self._clear_screen()
        
        lines = []
        
        # Header
        lines.extend(self._render_header())
        lines.append("")
        
        # Stack all panels vertically
        lines.extend(self._render_agents())
        lines.append("")
        lines.extend(self._render_grounding())
        lines.append("")
        lines.extend(self._render_logs())
        
        output = "\n".join(lines)
        sys.stdout.write(output)
        sys.stdout.flush()
    
    def update_display(self):
        """Update display (alias for render())"""
        self.render()
    
    def _render_header(self) -> List[str]:
        """Render header section"""
        lines = []
        
        # Calculate elapsed time
        elapsed = "0s"
        if self.metrics["start_time"]:
            delta = datetime.now() - self.metrics["start_time"]
            minutes = delta.seconds // 60
            seconds = delta.seconds % 60
            if minutes > 0:
                elapsed = f"{minutes}m{seconds}s"
            else:
                elapsed = f"{seconds}s"
        
        status_text = (
            f"▶ {colorize('RUNNING', 'g')} | "
            f"Time: {colorize(elapsed, 'c')} | "
            f"Iter: {colorize(str(self.metrics['iterations']), 'y')} | "
            f"Tasks: {colorize(str(self.metrics['completed_tasks']), 'g')} | "
            f"LLM: {colorize(str(self.metrics['llm_calls']), 'bl')} | "
            f"Grounding: {colorize(str(self.metrics['grounding_calls']), 'm')}"
        )
        
        lines.append("  " + status_text)
        lines.append("  " + "─" * 60)
        
        return lines
    
    def _render_agents(self) -> List[str]:
        """Render agents section"""
        lines = []
        
        lines.append("  " + colorize("§ Agents", 'c', bold=True))
        
        # Agent info
        agents = [
            ("GroundingAgent", 'c', self.agent_status.get("GroundingAgent", AgentStatus.IDLE)),
        ]
        
        for agent_name, color, status in agents:
            # Status icon
            status_icons = {
                AgentStatus.IDLE: "○",
                AgentStatus.THINKING: "◐",
                AgentStatus.EXECUTING: "◉",
                AgentStatus.WAITING: "◷",
            }
            icon = status_icons.get(status, "○")
            
            # Recent activity
            activities = self.agent_activities.get(agent_name, [])
            activity = activities[-1][:40] if activities else "idle"
            
            # Format line
            line = f"    {colorize(icon, 'y')} {colorize(agent_name, color):<20s} {activity}"
            lines.append(line)
        
        return lines
    
    
    def _render_grounding(self) -> List[str]:
        """Render grounding operations section"""
        lines = []
        
        lines.append("  " + colorize("⊕ Grounding Backends", 'c', bold=True))
        
        # Show backend types and servers
        if self.grounding_backends:
            for backend_info in self.grounding_backends:
                backend_name = backend_info.get("name", "unknown")
                backend_type = backend_info.get("type", "unknown")
                servers = backend_info.get("servers", [])
                
                # Backend type icon
                type_icons = {
                    "gui": "■",      
                    "shell": "$",   
                    "mcp": "◆",     
                    "system": "●",   
                    "web": "◉",     
                }
                icon = type_icons.get(backend_type, "○")
                
                # Format backend line
                if backend_type == "mcp" and servers:
                    servers_str = ", ".join([s[:15] for s in servers])
                    line = f"    {icon} {colorize(backend_name, 'y')} ({backend_type}): {colorize(servers_str, 'gr')}"
                else:
                    line = f"    {icon} {colorize(backend_name, 'y')} ({backend_type})"
                
                lines.append(line)
        
        # Show last 3 operations
        recent_ops = self.grounding_operations[-3:] if self.grounding_operations else []
        
        if recent_ops:
            lines.append("    " + colorize("Recent Operations:", 'gr'))
            for op in recent_ops:
                backend = op.get("backend", "unknown")
                action = op.get("action", "unknown")[:40]
                status = op.get("status", "pending")
                
                # Status icon
                if status == "success":
                    icon = colorize("✓", 'g')
                elif status == "pending":
                    icon = colorize("⏳", 'y')
                else:
                    icon = colorize("✗", 'rd')
                
                line = f"      {icon} {colorize(backend, 'bl')}: {action}"
                lines.append(line)
        
        return lines
    
    def _render_logs(self) -> List[str]:
        """Render logs section"""
        lines = []
        
        lines.append("  " + colorize("⊞ Recent Events", 'c', bold=True))
        
        # Show last 5 logs
        recent_logs = self.log_buffer[-5:] if self.log_buffer else []
        
        if recent_logs:
            for message, level, timestamp in recent_logs:
                time_str = timestamp.strftime("%H:%M:%S")
                
                # Truncate long messages
                msg_display = message[:55]
                
                log_line = f"    {colorize(time_str, 'gr')} | {msg_display}"
                lines.append(log_line)
        
        return lines
    
    
    def update_agent_status(self, agent_name: str, status: AgentStatus):
        """Update agent status"""
        self.agent_status[agent_name] = status
    
    def add_agent_activity(self, agent_name: str, activity: str):
        """Add agent activity"""
        if agent_name not in self.agent_activities:
            self.agent_activities[agent_name] = []
        
        self.agent_activities[agent_name].append(activity)
        
        # Keep only last 10 activities
        if len(self.agent_activities[agent_name]) > 10:
            self.agent_activities[agent_name] = self.agent_activities[agent_name][-10:]
    
    def update_grounding_backends(self, backends: List[Dict[str, Any]]):
        """
        Update grounding backends information
        
        Args:
            backends: List of backend info dicts with keys:
                - name: backend name
                - type: backend type (gui, shell, mcp, system, web)
                - servers: list of server names (for mcp)
        """
        self.grounding_backends = backends
    
    def add_grounding_operation(self, backend: str, action: str, status: str = "pending"):
        """Add grounding operation"""
        self.grounding_operations.append({
            "backend": backend,
            "action": action,
            "status": status,
            "timestamp": datetime.now(),
        })
        
        self.metrics["grounding_calls"] += 1
    
    def add_log(self, message: str, level: str = "info"):
        """Add log message"""
        self.log_buffer.append((message, level, datetime.now()))
        
        # Keep only last 100 logs
        if len(self.log_buffer) > 100:
            self.log_buffer = self.log_buffer[-100:]
    
    def update_metrics(self, **kwargs):
        """Update metrics"""
        self.metrics.update(kwargs)
    
    def print_summary(self, result: Dict[str, Any]):
        """Print execution summary"""
        box = Box(width=70, style=BoxStyle.ROUNDED, color='c')
        
        print()
        print(box.text_line(colorize("◈ Execution Summary", 'c', bold=True), align='center', indent=4, text_color=''))
        print(box.separator_line(indent=4))
        
        # Status
        status = result.get("status", "unknown")
        status_display = {
            "completed": colorize("COMPLETED", 'g', bold=True),
            "timeout": colorize("TIMEOUT", 'y', bold=True),
            "error": colorize("ERROR", 'rd', bold=True),
        }
        status_text = status_display.get(status, status)
        
        print(box.text_line(f"  Status:          {status_text}", indent=4, text_color=''))
        print(box.text_line(f"  Execution Time:  {colorize(f'{result.get('execution_time', 0):.2f}s', 'c')}", indent=4, text_color=''))
        print(box.text_line(f"  Iterations:      {colorize(str(result.get('iterations', 0)), 'y')}", indent=4, text_color=''))
        print(box.text_line(f"  Completed Tasks: {colorize(str(result.get('completed_tasks', 0)), 'g')}", indent=4, text_color=''))
        
        if result.get('evaluation_results'):
            print(box.text_line(f"  Evaluations:     {colorize(str(len(result['evaluation_results'])), 'bl')}", indent=4, text_color=''))
        
        print(box.bottom_line(indent=4))
        print()


def create_ui(enable_live: bool = True, compact: bool = False) -> AnyToolUI:
    """
    Create AnyTool UI instance
    
    Args:
        enable_live: Whether to enable live display updates
        compact: Use compact layout for smaller terminals
    """
    return AnyToolUI(enable_live=enable_live, compact=compact)

================================================
FILE: anytool/utils/ui_integration.py
================================================
"""
AnyTool UI Integration

Integrates the UI system with AnyTool core components.
Provides hooks and callbacks to update UI in real-time.
"""

import asyncio
from typing import Optional

from anytool.utils.ui import AnyToolUI, AgentStatus
from anytool.utils.logging import Logger

logger = Logger.get_logger(__name__)


class UIIntegration:
    """
    UI Integration for AnyTool
    
    Connects AnyTool components with the UI system to provide real-time
    visualization of agent activities and execution flow.
    """
    
    def __init__(self, ui: AnyToolUI):
        """
        Initialize UI integration
        
        Args:
            ui: AnyToolUI instance
        """
        self.ui = ui
        self._update_task: Optional[asyncio.Task] = None
        self._running = False
        
        # Tracked components
        self._llm_client = None
        self._grounding_client = None
    
    def attach_llm_client(self, llm_client):
        """
        Attach LLM client
        
        Args:
            llm_client: LLMClient instance
        """
        self._llm_client = llm_client
        logger.debug("UI attached to LLMClient")
    
    def attach_grounding_client(self, grounding_client):
        """
        Attach grounding client
        
        Args:
            grounding_client: GroundingClient instance
        """
        self._grounding_client = grounding_client
        logger.debug("UI attached to GroundingClient")
    
    async def start_monitoring(self, poll_interval: float = 0.5):
        """
        Start monitoring and updating UI
        
        Args:
            poll_interval: Update interval in seconds
        """
        if self._running:
            logger.warning("UI monitoring already running")
            return
        
        self._running = True
        
        # Immediately update UI once before starting the loop
        await self._update_ui()
        
        self._update_task = asyncio.create_task(
            self._monitor_loop(poll_interval)
        )
        logger.info("UI monitoring started")
    
    async def stop_monitoring(self):
        """Stop monitoring"""
        if not self._running:
            return
        
        self._running = False
        
        if self._update_task:
            self._update_task.cancel()
            try:
                await self._update_task
            except asyncio.CancelledError:
                pass
        
        logger.info("UI monitoring stopped")
    
    async def _monitor_loop(self, poll_interval: float):
        """
        Main monitoring loop
        
        Args:
            poll_interval: Update interval in seconds
        """
        while self._running:
            try:
                await self._update_ui()
                await asyncio.sleep(poll_interval)
            except asyncio.CancelledError:
                break
            except Exception as e:
                logger.error(f"UI update error: {e}", exc_info=True)
    
    async def _update_ui(self):
        """Update UI with current state"""
        # Update grounding backends info
        if self._grounding_client:
            backends = []
            try:
                # Get list of providers
                providers = self._grounding_client.list_providers()
                
                for backend_type, provider in providers.items():
                    backend_name = backend_type.value if hasattr(backend_type, 'value') else str(backend_type)
                    
                    backend_info = {
                        "name": backend_name,
                        "type": backend_name,  # gui, shell, mcp, system, web
                        "servers": []
                    }
                    
                    # For MCP provider, get server names
                    if backend_name == "mcp":
                        try:
                            # Try to get MCP sessions from provider
                            if hasattr(provider, '_sessions'):
                                backend_info["servers"] = list(provider._sessions.keys())
                        except Exception:
                            pass
                    
                    backends.append(backend_info)
                
                self.ui.update_grounding_backends(backends)
            except Exception as e:
                logger.debug(f"Failed to update grounding backends: {e}")
        
        # Refresh display
        self.ui.update_display()
    
    # Event handlers - to be called by agents
    
    def on_agent_start(self, agent_name: str, activity: str):
        """
        Called when agent starts an activity
        
        Args:
            agent_name: Agent name
            activity: Activity description
        """
        self.ui.update_agent_status(agent_name, AgentStatus.EXECUTING)
        self.ui.add_agent_activity(agent_name, activity)
        self.ui.add_log(f"{agent_name}: {activity}", level="info")
    
    def on_agent_thinking(self, agent_name: str):
        """
        Called when agent is thinking
        
        Args:
            agent_name: Agent name
        """
        self.ui.update_agent_status(agent_name, AgentStatus.THINKING)
    
    def on_agent_complete(self, agent_name: str, result: str = ""):
        """
        Called when agent completes an activity
        
        Args:
            agent_name: Agent name
            result: Result description
        """
        self.ui.update_agent_status(agent_name, AgentStatus.IDLE)
        if result:
            self.ui.add_log(f"{agent_name}: {result}", level="success")
    
    def on_llm_call(self, model: str, prompt_length: int):
        """
        Called when LLM is called
        
        Args:
            model: Model name
            prompt_length: Prompt length
        """
        self.ui.update_metrics(
            llm_calls=self.ui.metrics.get("llm_calls", 0) + 1
        )
        self.ui.add_log(f"LLM call: {model} (prompt: {prompt_length} chars)", level="debug")
    
    def on_grounding_call(self, backend: str, action: str):
        """
        Called when grounding backend is called
        
        Args:
            backend: Backend name
            action: Action description
        """
        self.ui.add_grounding_operation(backend, action, status="pending")
        self.ui.add_log(f"Grounding [{backend}]: {action}", level="info")
    
    def on_grounding_complete(self, backend: str, action: str, success: bool):
        """
        Called when grounding operation completes
        
        Args:
            backend: Backend name
            action: Action description
            success: Whether operation succeeded
        """
        status = "success" if success else "error"
        
        # Update last operation status
        for op in reversed(self.ui.grounding_operations):
            if op["backend"] == backend and op["action"] == action and op["status"] == "pending":
                op["status"] = status
                break
        
        level = "success" if success else "error"
        result = "✓" if success else "✗"
        self.ui.add_log(f"Grounding [{backend}]: {action} {result}", level=level)
    
    
    def on_iteration(self, iteration: int):
        """
        Called on each iteration
        
        Args:
            iteration: Iteration number
        """
        self.ui.update_metrics(iterations=iteration)
    
    def on_error(self, message: str):
        """
        Called when an error occurs
        
        Args:
            message: Error message
        """
        self.ui.add_log(f"ERROR: {message}", level="error")


class UILoggingHandler:
    """
    Logging handler that forwards logs to UI
    """
    
    def __init__(self, ui: AnyToolUI):
        """
        Initialize logging handler
        
        Args:
            ui: AnyToolUI instance
        """
        self.ui = ui
    
    def emit(self, record):
        """
        Emit a log record to UI
        
        Args:
            record: Log record
        """
        level_map = {
            "DEBUG": "debug",
            "INFO": "info",
            "WARNING": "warning",
            "ERROR": "error",
            "CRITICAL": "error",
        }
        
        level = level_map.get(record.levelname, "info")
        message = record.getMessage()
        
        # Filter out noisy logs
        if any(skip in message.lower() for skip in ["processing card", "workflow poll"]):
            return
        
        self.ui.add_log(message, level=level)


def create_integration(ui: AnyToolUI) -> UIIntegration:
    """
    Create UI integration instance
    
    Args:
        ui: AnyToolUI instance
        
    Returns:
        UIIntegration instance
    """
    return UIIntegration(ui)

================================================
FILE: pyproject.toml
================================================
[build-system]
requires = ["setuptools>=68.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "anytool"
version = "0.1.0"
description = "AnyTool: Universal Tool-Use Layer for AI Agents"
readme = "README.md"
requires-python = ">=3.10"
license = {text = "MIT"}
authors = [
    {name = "lingruixu@HKUDS", email = "lingruixu.db@gmail.com"}
]

dependencies = [
    "litellm>=1.70.0",
    "python-dotenv>=1.0.0",
    "openai>=1.0.0",
    "jsonschema>=4.25.0",
    "mcp>=1.0.0",
    "anthropic>=0.71.0",
    "pillow>=12.0.0",
    "flask>=3.1.0",
    "pyautogui>=0.9.54",
    "pydantic>=2.12.0",
    "requests>=2.32.0",
]

[project.optional-dependencies]
macos = [
    "pyobjc-core>=12.0",
    "pyobjc-framework-cocoa>=12.0",
    "pyobjc-framework-quartz>=12.0",
    "atomacos>=3.2.0",
]

linux = [
    "python-xlib>=0.33",
    "pyatspi>=2.38.0",
    "numpy>=1.24.0",
]

windows = [
    "pywinauto>=0.6.8",
    "pywin32>=306",
    "PyGetWindow>=0.0.9",
]

dev = [
    "pytest>=7.0.0",
    "pytest-asyncio>=0.21.0",
    "black>=23.0.0",
    "flake8>=6.0.0",
    "mypy>=1.0.0",
]

all = [
    "anytool[macos,linux,windows,dev]",
]

[project.urls]
Repository = "https://github.com/HKUDS/AnyTool"
"Bug Tracker" = "https://github.com/HKUDS/AnyTool/issues"

[project.scripts]
anytool = "anytool.__main__:run_main"
anytool-server = "anytool.local_server.main:main"

[tool.setuptools]
packages = {find = {where = ["."], include = ["anytool*"]}}

[tool.setuptools.package-data]
anytool = [
    "config/*.json",
    "config/*.json.example",
    "local_server/config.json",
    "local_server/README.md",
]


================================================
FILE: requirements.txt
================================================
# AnyTool core dependencies
litellm>=1.70.0
python-dotenv>=1.0.0
openai>=1.0.0
jsonschema>=4.25.0
mcp>=1.0.0
anthropic>=0.71.0
pillow>=12.0.0
colorama

# Local server dependencies (cross-platform)
flask>=3.1.0
pyautogui>=0.9.54
pydantic>=2.12.0
requests>=2.32.0

# # macOS-specific dependencies (local server)
# pyobjc-core>=12.0; sys_platform == 'darwin'
# pyobjc-framework-cocoa>=12.0; sys_platform == 'darwin'
# pyobjc-framework-quartz>=12.0; sys_platform == 'darwin'
# atomacos>=3.2.0; sys_platform == 'darwin'

# # Linux-specific dependencies (local server)
# python-xlib>=0.33; sys_platform == 'linux'
# pyatspi>=2.38.0; sys_platform == 'linux'
# numpy>=1.24.0; sys_platform == 'linux'

# # Windows-specific dependencies (local server)
# pywinauto>=0.6.8; sys_platform == 'win32'
# pywin32>=306; sys_platform == 'win32'
# PyGetWindow>=0.0.9; sys_platform == 'win32'