Full Code of HKUDS/OpenPhone for AI

main aff6dea330e6 cached
200 files
57.2 MB
1.2M tokens
2015 symbols
1 requests
Download .txt
Showing preview only (5,016K chars total). Download the full file or copy to clipboard to get everything.
Repository: HKUDS/OpenPhone
Branch: main
Commit: aff6dea330e6
Files: 200
Total size: 57.2 MB

Directory structure:
gitextract_hys_i7ka/

├── .gitignore
├── Communication.md
├── LICENSE
├── PhoneClaw/
│   ├── README.md
│   ├── __init__.py
│   ├── actions.py
│   ├── agent.py
│   ├── connection.py
│   ├── controller.py
│   ├── embeddings.py
│   ├── evaluator.py
│   ├── executor.py
│   ├── experience.py
│   ├── hierarchy.py
│   ├── keepalive.py
│   ├── labeling.py
│   ├── learn.py
│   ├── loop.py
│   ├── memory.py
│   ├── planner.py
│   ├── prompts.py
│   ├── recorder.py
│   ├── run_phoneclaw.py
│   ├── screenshot.py
│   └── state.py
├── README.md
├── adb_client.py
├── agent/
│   ├── __init__.py
│   ├── llm/
│   │   ├── glm4.py
│   │   └── qwen_llm_model.py
│   ├── mllm/
│   │   ├── claude_model.py
│   │   └── qwen_model.py
│   ├── model.py
│   └── utils.py
├── app_data/
│   └── README.md
├── configs/
│   ├── example_xml_cloud_hyper.yaml
│   ├── glm-4-plus-mac-XML.yaml
│   ├── gpt-4o-linux-XML.yaml
│   └── gpt-4o-mac-SoM.yaml
├── docs/
│   ├── modify_androidlab.md
│   ├── new_apps.md
│   ├── prepare_for_linux.md
│   └── prepare_for_mac.md
├── eval.py
├── evaluation/
│   ├── auto_test.py
│   ├── config/
│   │   ├── bluecoins.yaml
│   │   ├── calendar.yaml
│   │   ├── cantook.yaml
│   │   ├── chrome.yaml
│   │   ├── clock.yaml
│   │   ├── contacts.yaml
│   │   ├── gmail.yaml
│   │   ├── map.yaml
│   │   ├── pimusic.yaml
│   │   ├── reddit.yaml
│   │   ├── setting.yaml
│   │   ├── tiktok.yaml
│   │   └── zoom.yaml
│   ├── configs.py
│   ├── definition.py
│   ├── docker_utils.py
│   ├── evaluation.py
│   ├── parallel.py
│   ├── task.py
│   ├── tasks/
│   │   ├── __init__.py
│   │   ├── bluecoins/
│   │   │   ├── __init__.py
│   │   │   └── bluecoins.py
│   │   ├── calendar/
│   │   │   ├── __init__.py
│   │   │   └── calendar.py
│   │   ├── cantook/
│   │   │   ├── __init__.py
│   │   │   └── cantook.py
│   │   ├── chrome/
│   │   │   ├── __init__.py
│   │   │   └── chrome.py
│   │   ├── clock/
│   │   │   ├── __init__.py
│   │   │   └── clock.py
│   │   ├── contacts/
│   │   │   ├── __init__.py
│   │   │   └── contacts.py
│   │   ├── gmail/
│   │   │   ├── __init__.py
│   │   │   └── gmail.py
│   │   ├── human_ground_turth/
│   │   │   └── ground_truth_length.json
│   │   ├── llm_evaluator.py
│   │   ├── map_me/
│   │   │   ├── __init__.py
│   │   │   └── map.py
│   │   ├── pimusic/
│   │   │   ├── __init__.py
│   │   │   └── pimusic.py
│   │   ├── reddit/
│   │   │   ├── __init__.py
│   │   │   └── reddit.py
│   │   ├── setting/
│   │   │   ├── __init__.py
│   │   │   └── setting.py
│   │   ├── tiktok/
│   │   │   ├── __init__.py
│   │   │   └── tiktok.py
│   │   └── zoom/
│   │       ├── __init__.py
│   │       └── zoom.py
│   └── utils.py
├── generate_result.py
├── ios_agent/
│   ├── README.md
│   ├── __init__.py
│   ├── actions.py
│   ├── application/
│   │   └── mail/
│   │       ├── __init__.py
│   │       ├── pipeline.py
│   │       ├── rag_system.py
│   │       └── run_mail_and_rag.sh
│   ├── connection.py
│   ├── controller.py
│   ├── executor.py
│   ├── hierarchy.py
│   ├── labeling.py
│   ├── prompts.py
│   ├── recorder.py
│   ├── run_ios_agent.py
│   ├── screenshot.py
│   └── task.py
├── model_training/
│   ├── R1-V/
│   │   └── src/
│   │       ├── distill_r1/
│   │       │   ├── README.md
│   │       │   ├── create_hf_dataset.py
│   │       │   ├── filter_r1.py
│   │       │   ├── generate_scene_qa_pairs.ipynb
│   │       │   ├── prompt.py
│   │       │   └── query_r1.py
│   │       ├── eval/
│   │       │   ├── logs/
│   │       │   │   ├── counting_results_superclevr_200_qwen2vl_2b_instruct_grpo100_legacy.json
│   │       │   │   ├── counting_results_superclevr_200_qwen2vl_2b_instruct_legacy.json
│   │       │   │   ├── geoqa_test_qwen2vl_7b_grpo_2epochs_legacy.json
│   │       │   │   └── geoqa_test_qwen2vl_7b_instruct_legacy.json
│   │       │   ├── prompts/
│   │       │   │   ├── geoqa_test_prompts.jsonl
│   │       │   │   └── superclevr_test200_counting_problems.jsonl
│   │       │   ├── test_qwen2vl_counting_superclevr.py
│   │       │   ├── test_qwen2vl_geoqa.py
│   │       │   └── test_qwen2vl_geoqa_multigpu.py
│   │       ├── r1-v/
│   │       │   ├── .gitignore
│   │       │   ├── LICENSE
│   │       │   ├── Makefile
│   │       │   ├── configs/
│   │       │   │   ├── ddp.yaml
│   │       │   │   ├── qwen2vl_sft_config.yaml
│   │       │   │   ├── zero2.yaml
│   │       │   │   └── zero3.yaml
│   │       │   ├── local_scripts/
│   │       │   │   ├── create_vision_cot_data.py
│   │       │   │   ├── lmms_eval_qwen2vl.sh
│   │       │   │   ├── prepare_hf_data.py
│   │       │   │   ├── train_aria_moe.sh
│   │       │   │   ├── train_qwen2_vl.sh
│   │       │   │   ├── zero1_no_optimizer.json
│   │       │   │   ├── zero2.json
│   │       │   │   ├── zero2_fix.json
│   │       │   │   ├── zero3.json
│   │       │   │   ├── zero3.yaml
│   │       │   │   ├── zero3_offload.json
│   │       │   │   └── zero3_offload_fix.json
│   │       │   ├── run_grpo.sh
│   │       │   ├── setup.cfg
│   │       │   ├── setup.py
│   │       │   └── src/
│   │       │       └── open_r1/
│   │       │           ├── __init__.py
│   │       │           ├── evaluate.py
│   │       │           ├── generate.py
│   │       │           ├── grpo.py
│   │       │           ├── grpo_agent.py
│   │       │           ├── sft.py
│   │       │           └── trainer/
│   │       │               ├── __init__.py
│   │       │               ├── grpo_trainer.py
│   │       │               ├── vllm_grpo_trainer.py
│   │       │               └── vllm_grpo_trainer_modified.py
│   │       ├── requirements.txt
│   │       └── scripts/
│   │           ├── run_grpo_clevr.sh
│   │           ├── run_grpo_vllm.sh
│   │           ├── run_grpo_vllm_qwen25vl.sh
│   │           ├── run_grpo_vllm_qwen25vl_agent.sh
│   │           ├── run_sft_clevr.sh
│   │           └── test_grpo_geoqa_multigpu.sh
│   ├── README.md
│   └── llama_factory_scripts/
│       ├── deepspeed_config_zero2.json
│       ├── deepspeed_config_zero3.json
│       └── full_tuning_setting_3B_vl_fix.sh
├── page_executor/
│   ├── __init__.py
│   ├── simple_vision_executor.py
│   ├── text_executor.py
│   └── utils.py
├── prepare_data/
│   ├── README.md
│   ├── rl/
│   │   └── convert_to_hf_vl.py
│   └── visual_model_data/
│       ├── alpaca_format_o1_data_visual_cot.json
│       ├── data_maker.py
│       ├── o1_data_visual_cot_all.json
│       ├── o1_data_visual_cot_pimusic.json
│       └── sft_data_maker.py
├── recorder/
│   ├── __init__.py
│   └── json_recoder.py
├── requirements.txt
├── templates/
│   ├── __init__.py
│   ├── android_screenshot_template.py
│   ├── packages.py
│   ├── seeact_screenshot_prompts.py
│   └── text_only_mobile.py
├── test_script/
│   ├── all_test_cloud_v1_hyper.sh
│   └── all_test_cloud_v1_hyper_add.sh
├── tools/
│   ├── check_result_multiprocess.py
│   ├── delete_blank.py
│   ├── modify_mobile_to_avd.py
│   └── modify_mobile_to_docker.py
├── utils_mobile/
│   ├── and_controller.py
│   ├── specialCheck.py
│   ├── utils.py
│   └── xml_tool.py
└── vllm_script/
    └── run_server_visual_cot.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.DS_Store


================================================
FILE: Communication.md
================================================
We provide QR codes for joining the HKUDS discussion groups on WeChat and Feishu.

You can join by scanning the QR codes below:

<img src="https://github.com/HKUDS/.github/blob/main/profile/QR.png" alt="WeChat QR Code" width="400"/>



================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2024 xuyifan-0731
Copyright (c) 2025 yangqin-jiang

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: PhoneClaw/README.md
================================================
# PhoneClaw — iOS Ralph Loop GUI Agent

An autonomous iOS GUI automation agent built on the **Ralph Loop** methodology:  
**EXECUTE → EVALUATE → FIX → REPEAT**, repeating until every subtask passes its success criteria.

Device control is provided by WebDriverAgent (WDA). Key features include structured LLM-driven planning, per-subtask evaluation with automatic retries, natural-language answer extraction, a persistent interactive daemon mode, and a **two-layer self-learning memory** that makes the agent progressively smarter across sessions.

---

## Architecture

```
run_phoneclaw.py
    │
    ├─ ScreenKeepalive      ← background: keep device screen on
    ├─ UserMemory           ← persistent user profile (facts, habits, history)
    ├─ ExperienceLog        ← persistent execution know-how (what worked / failed)
    ├─ TaskPlanner          ← VLM: task → subtask list with success criteria
    ├─ StateManager         ← filesystem persistence (phoneclaw_state.json)
    │
    └─ RalphLoop (loop.py)
          │
          │  for each subtask:
          │
          ├─ EXPERIENCE HINTS ─ ExperienceLog.get_hints_for(app, subtask)
          │                      injected into Executor system prompt
          │
          ├─ EXECUTE ─── IOSExecutor(code_snippet)
          │                  ├─ VLM outputs normalised relative coords, e.g. tap(0.50, 0.90)
          │                  ├─ Executor converts [0,1] → physical pixels → WDA logical coords
          │                  └─ IOSActionHandler → WDA HTTP → iOS device
          │
          ├─ EVALUATE ── SubTaskEvaluator(screenshot, criteria)
          │                  └─ VLM: screenshot + criteria → {passed, reason}
          │
          ├─ PASS ──────── advance to next subtask, persist state
          │
          └─ FAIL ──────── increment fix_retries
                           retries < max → FIX (inject fail reason + action history)
                                         → EXECUTE again
                           retries ≥ max → mark failed, skip / abort
          │
          └─ FINAL ANSWER ── VLM reads current screen + original question → answer string
          │
          └─ LEARN ────────── ExperienceLog.extract_and_record(trace)
                               UserMemory.extract_insights(task, answer)
```

---

## Self-Learning Memory

PhoneClaw accumulates two complementary memory stores that persist across sessions:

### UserMemory — who the user is

Stored in `PhoneClaw/data/user_profile.json`.

| Category | Examples |
|---|---|
| Profile | Inferred name, location, language |
| App usage | Meituan used 8×, WeChat used 5× |
| Task history | Last 200 tasks with answers and status |
| Insights | "User is located in Hangzhou", "Frequently orders Heytea delivery", "Meituan account ********" |

**How it helps the Planner:** A `## User Profile` block is injected into every Planner prompt, so the agent can make smarter subtask plans (e.g. knowing the user's city improves location-sensitive searches).

**Memory-first retrieval:** Before touching the device, the agent checks whether the profile already contains a confident answer to the question. If yes, the answer is returned immediately with zero device interactions.

```
[PhoneClaw] Task> What is the name of my Meituan account ?
[Memory] Checking profile for cached answer...
[Memory] Answer found in profile — skipping device interaction.
═══════════════════════════════════════
[PhoneClaw] ANSWER  (from memory)
═══════════════════════════════════════
Your account name is ********
```

### ExperienceLog — how to do things

Stored in `PhoneClaw/data/experience_log.json`.

| Lesson type | Example |
|---|---|
| `successful_navigation` | "Meituan orders tab coordinate approx. (0.62, 0.94)" |
| `failed_approach` | "Tapping (0.5, 0.5) on Meituan home screen triggers an ad popup" |
| `ui_knowledge` | "WeChat search bar is at the top, y≈0.07" |
| `timing` | "Meituan order list takes approx. 3 seconds to load" |

**How it helps the Executor:** Before every Executor VLM call, relevant lessons for the current app are injected into the system prompt, so the model avoids previously-failed actions and reuses previously-successful navigation paths.

**Reinforcement:** When a lesson is confirmed by a later task, its `reinforced` counter increments and its `confidence` may be upgraded (`low → medium → high`).

**Semantic deduplication:** New lessons and insights are compared to existing ones using OpenAI-compatible text embeddings (cosine similarity, threshold 0.88). A Jaccard similarity fallback (threshold 0.50) is used when no embedding API key is available.

**Automatic compaction:** When an app accumulates ≥ 20 lessons, a VLM-driven compaction pass merges near-duplicate entries, removes low-value items, and generalises specific coordinates — targeting ≤ 8 high-quality lessons per app.

---

## Complete Interactive-Mode Flow

```
Start session
    │
    ├─ Load UserMemory         (profile + history from data/user_profile.json)
    ├─ Load ExperienceLog      (lessons from data/experience_log.json)
    ├─ Start ScreenKeepalive   (device stays awake)
    │
    │  User types task
    │      │
    │      ▼
    ├─ [1] Memory-first query  ─── answered from profile? ──► return answer, done
    │      │ not found
    │      ▼
    ├─ [2] Plan  (Planner + user context injected from UserMemory)
    │      │
    │      ▼
    ├─ [3] Ralph Loop  (for each subtask)
    │        ├─ inject ExperienceLog hints into Executor prompt
    │        └─ EXECUTE → EVALUATE → FIX → REPEAT
    │      │
    │      ▼
    ├─ [4] Final Answer  (VLM reads screen → natural-language answer)
    │      │
    │      ▼
    ├─ [5] Record task  (UserMemory.record_task)
    ├─ [6] Extract insights  (VLM → new user facts → UserMemory)
    └─ [7] Extract lessons   (VLM → new app lessons → ExperienceLog)
              └─ compact_if_needed()  (auto-compact when lessons ≥ 20)
```

---

## Coordinate System

VLM outputs **normalised relative coordinates** in `[0.0, 1.0]`:

```
(0.0, 0.0) ─────────────── (1.0, 0.0)   top
     │                           │
     │       (0.5, 0.5)          │   centre
     │                           │
(0.0, 1.0) ─────────────── (1.0, 1.0)   bottom
```

| Screen area | x range | y range |
|---|---|---|
| Status bar | any | 0.02 – 0.06 |
| Top navigation | any | 0.06 – 0.12 |
| Centre | ~0.50 | ~0.50 |
| Bottom tab bar | any | 0.90 – 0.96 |

The Executor converts relative coords → physical pixels → WDA logical coords internally. The VLM never needs to know the device resolution.

---

## Directory Structure

```
PhoneClaw/
├── run_phoneclaw.py     # CLI entry point (single-task + interactive daemon)
│
├── loop.py              # Ralph Loop orchestrator (EXECUTE → EVALUATE → FIX)
├── planner.py           # VLM task decomposition → subtask list
├── evaluator.py         # VLM screenshot evaluation → pass/fail
├── state.py             # Filesystem state persistence (phoneclaw_state.json)
├── prompts.py           # All VLM prompt templates
├── agent.py             # OpenRouterAgent (OpenRouter API)
├── keepalive.py         # Screen keepalive (idleTimerDisabled / touch fallback)
│
├── memory.py            # UserMemory: user profile + task history + insights
├── experience.py        # ExperienceLog: app-specific execution lessons
├── embeddings.py        # Semantic deduplication (embedding cosine / Jaccard)
├── learn.py             # DemoRecorder: learning mode (record user demos)
│
├── actions.py           # WDA HTTP action primitives + iOS bundle ID map
├── connection.py        # WDA session management
├── controller.py        # IOSController (Android-Lab compatible interface)
├── executor.py          # IOSExecutor: coord conversion + action dispatch
├── hierarchy.py         # XML page source → IOSElement list
├── labeling.py          # Draw bounding boxes on screenshots
├── screenshot.py        # Screenshot capture via WDA / idevicescreenshot
├── recorder.py          # Per-step JSONL trace logging (PhoneClawRecorder)
│
└── data/                # Persistent data (auto-created on first run)
    ├── user_profile.json
    ├── experience_log.json
    └── demos/           # Learning-mode demo recordings
```

Runtime log directories (created on each task run):

```
phoneclaw_logs/<task_id>/
├── phoneclaw_state.json  # Subtask list, progress, fix counts
├── traces/trace.jsonl    # Per-step trace: screenshots, VLM responses, eval results
├── screenshots/          # Raw screenshots per round
└── xml/                  # iOS page source XML per round
```

---

## Requirements

- **iOS device** with **WebDriverAgent** running
- Python packages: `requests`, `Pillow`, `opencv-python`, `openai`, `backoff`, `lxml`
- Optional: `libimobiledevice` (`idevicescreenshot`) for screenshot fallback

---

## Quick Start

### 1. Start WebDriverAgent on the device

```bash
iproxy 8100 8100
```

### 2. Configure the VLM backend

**Option A — OpenRouter (recommended)**

```bash
export OPENROUTER_API_KEY="sk-or-v1-..."
```

**Option B — Local / self-hosted model**

```bash
export API_BASE="http://localhost:8002/v1"
export MODEL_NAME="Qwen/Qwen2.5-VL-7B-Instruct"
```

Or copy `.env.example` to `.env` and edit it.

### 3a. Single-task mode

```bash
cd Android-Lab

python PhoneClaw/run_phoneclaw.py \
    --task "Open Meituan and check what the most recent food delivery order was" \
    --openrouter \
    --model-name "z-ai/glm-4.6v"
```

Example output:

```
[Memory] Checking profile for cached answer...
[Memory] Not in profile — will use device.

[PhoneClaw] Planning subtasks...
[Planner] Decomposed task into 3 subtask(s).
  #1: Call launch("Meituan") to open Meituan
  #2: Navigate to the orders page
  #3: Read the most recent order and call finish() with the answer

... (execution) ...

[Experience] Extracting lessons from task trace...
[Experience] new: [Meituan] Meituan orders tab coordinate approx. (0.62, 0.94)

════════════════════════════════════════════════════
[PhoneClaw] ANSWER
════════════════════════════════════════════════════
The most recent order is from Heytea (Hangzhou Wanxiang Center):
Hot Roasted Brown Sugar Boba Milk Tea ¥21.9, placed on 2026-02-19 12:21.
════════════════════════════════════════════════════
```

Subsequent identical questions are answered from memory with no device interaction:

```
[Memory] Answer found in profile — skipping device interaction.
```

### 3b. Interactive / daemon mode

Connect once, accept tasks from stdin indefinitely. The device screen stays on automatically.

```bash
python PhoneClaw/run_phoneclaw.py \
    --interactive \
    --openrouter \
    --model-name "z-ai/glm-4.6v"
```

```
════════════════════════════════════════════════════
[PhoneClaw] Interactive mode — device connected.
[Memory] Profile: PhoneClaw/data/user_profile.json
[Memory] Sessions: 4  |  Tasks: 12 completed / 1 failed  |  Insights: 8
[Experience] Log: PhoneClaw/data/experience_log.json  |  Lessons: 23  |  Tasks: 11
[PhoneClaw] Commands: 'memory' — profile  |  'experience' — lessons  |  'quit' — exit
════════════════════════════════════════════════════

[PhoneClaw] Task> Open WeChat and view the 5 most recent conversations
... (executes) ...

[PhoneClaw] Task> What is my Meituan account name?
[Memory] Answer found in profile — skipping device interaction.
Your Meituan account name is Alice123

[PhoneClaw] Task> memory
  ══════════════════════════════════════════════════════════
  User Profile  —  PhoneClaw/data/user_profile.json
  ══════════════════════════════════════════════════════════
  Sessions     : 4
  Tasks total  : 13  (✓ 12  ✗ 1)
  Insights     : 8
  Name         : Alice123
  Location     : Hangzhou

  App usage:
    Meituan              8×  (last: 2026-03-09)
    WeChat               5×  (last: 2026-03-09)

  Insights:
    • User's Meituan account name is Alice123
    • User frequently orders Heytea delivery near Hangzhou Wanxiang Center
    • User typically orders food via Meituan at lunchtime

[PhoneClaw] Task> experience
  ══════════════════════════════════════════════════════════
  Experience Log  —  PhoneClaw/data/experience_log.json
  ══════════════════════════════════════════════════════════
  Lessons: 23  |  Tasks processed: 11

  [Meituan]
    ✓[H×4] View Meituan order history: tap the 'Orders' tab at the bottom, coord approx. (0.62, 0.94)
    ✗[H×2] Tapping (0.5, 0.5) on Meituan home triggers a promotional popup, not the orders page
    ℹ[M×1] Meituan bottom tabs: Home x≈0.12 / Nearby x≈0.38 / Orders x≈0.62 / Mine x≈0.88

  [WeChat]
    ✓[H×3] WeChat chat list is visible immediately after launch; no extra navigation needed
    ℹ[M×2] WeChat search bar is at the top, coord approx. (0.5, 0.07)

[PhoneClaw] Task> compact
[PhoneClaw] Running full experience compaction (may take a minute)...

[PhoneClaw] Task> quit
[PhoneClaw] Goodbye.
```

**Special commands in interactive mode:**

| Command | Aliases | Description |
|---|---|---|
| `memory` | `profile` | Display user profile summary |
| `experience` | `exp`, `lessons` | Display experience log by app |
| `compact` | — | Trigger manual compaction of experience log |
| `quit` | `exit`, `q` | Exit cleanly |

- **Ctrl+C inside a task** — interrupts that task only; ready for the next
- **Ctrl+C at the prompt / `quit`** — exits cleanly
- Each task creates its own log directory under `./phoneclaw_logs/`

### 3c. Learning / demonstration mode

Record your own manual device operations so PhoneClaw can learn from them.

**Prerequisites:** Enable *Settings → Accessibility → Touch → Show Touches* on the device so tap positions can be detected from screenshots.

```bash
python PhoneClaw/run_phoneclaw.py \
    --learn \
    --learn-app "Xiaohongshu" \
    --learn-describe "browsing the discovery feed" \
    --openrouter \
    --model-name "z-ai/glm-4.6v"
```

PhoneClaw captures screenshots at ~8 fps, detects tap positions via OpenCV `HoughCircles` (falls back to pixel-diff centroid), annotates each frame, and then calls the VLM to extract reusable lessons that are added directly to the ExperienceLog.

Annotated frames are saved to `PhoneClaw/data/demos/<app>_<timestamp>/`.

---

## OpenRouter Configuration

`agent.py` provides `OpenRouterAgent`:

- Standard `image_url` data-URI format (compatible with all OpenRouter vision models)
- Required `HTTP-Referer` and `X-Title` headers
- Auto-resizes screenshots to fit model context limits
- Exponential backoff, up to 5 retries on transient API errors

### Separate executor and evaluator models

```bash
python PhoneClaw/run_phoneclaw.py \
    --task "Send a message to Alice on WeChat" \
    --openrouter \
    --model-name "z-ai/glm-4.6v" \
    --eval-model-name "openai/gpt-4o"
```

---

## CLI Reference

### Mode arguments

| Argument | Default | Description |
|---|---|---|
| `--task TEXT` | — | Task description (required in single-task mode) |
| `--interactive` | off | Daemon mode: connect once, accept tasks indefinitely |
| `--learn` | off | Learning mode: record demo and extract lessons |

### Loop / execution arguments

| Argument | Default | Description |
|---|---|---|
| `--wda-url URL` | `$WDA_URL` / `http://localhost:8100` | WebDriverAgent base URL |
| `--max-rounds N` | `100` | Global cap on total action rounds |
| `--max-fix-retries N` | `3` | Max fix attempts per failing subtask |
| `--no-skip-failed` | off | Abort entire task on subtask failure |
| `--request-interval S` | `2.0` | Seconds between action rounds |

### Logging / resume arguments

| Argument | Default | Description |
|---|---|---|
| `--task-dir PATH` | auto-generated | Override log output directory |
| `--resume` | off | Resume from saved state in `--task-dir` |

### Memory arguments

| Argument | Env var | Default | Description |
|---|---|---|---|
| `--memory-path PATH` | `PHONECLAW_MEMORY` | `PhoneClaw/data/user_profile.json` | User profile JSON path |
| `--no-memory` | — | off | Disable memory recording for this run |

### Experience arguments

| Argument | Env var | Default | Description |
|---|---|---|---|
| `--experience-path PATH` | `PHONECLAW_EXPERIENCE` | `PhoneClaw/data/experience_log.json` | Experience log JSON path |
| `--no-experience` | — | off | Disable experience recording / injection |

### Screen keepalive

| Argument | Env var | Default | Description |
|---|---|---|---|
| `--keepalive-interval S` | `KEEPALIVE_INTERVAL` | `30` | Heartbeat / fallback-tap interval (seconds). Always active in `--interactive`. Set `0` to disable. |

### OpenRouter arguments

| Argument | Env var | Default | Description |
|---|---|---|---|
| `--openrouter` | `OPENROUTER_API_KEY` (auto-detect) | off | Use OpenRouter backend |
| `--openrouter-api-key KEY` | `OPENROUTER_API_KEY` | — | API key |
| `--model-name SLUG` | `OPENROUTER_MODEL` | `z-ai/glm-4.6v` | Executor model |
| `--eval-model-name SLUG` | `EVAL_OPENROUTER_MODEL` | same as `--model-name` | Evaluator model |
| `--openrouter-base-url URL` | — | `https://openrouter.ai/api/v1` | API endpoint |
| `--openrouter-site-url URL` | `OPENROUTER_SITE_URL` | — | HTTP-Referer header |
| `--openrouter-app-title STR` | `OPENROUTER_APP_TITLE` | `PhoneClaw` | X-Title header |

### Local VLM arguments

| Argument | Env var | Default | Description |
|---|---|---|---|
| `--api-base URL` | `API_BASE` | `http://localhost:8002/v1` | Executor endpoint |
| `--model-name NAME` | `MODEL_NAME` | `Qwen/Qwen2.5-VL-7B-Instruct` | Executor model |
| `--api-key KEY` | `API_KEY` | `EMPTY` | Executor API key |
| `--agent-type TYPE` | `AGENT_TYPE` | `OpenAIAgent` | `OpenAIAgent` or `QwenVLAgent` |
| `--eval-api-base URL` | `EVAL_API_BASE` | same as `--api-base` | Evaluator endpoint |
| `--eval-model-name NAME` | `EVAL_MODEL_NAME` | same as `--model-name` | Evaluator model |

### Learning-mode arguments

| Argument | Default | Description |
|---|---|---|
| `--learn-app NAME` | — | App name to record (required with `--learn`) |
| `--learn-describe TEXT` | — | Optional description of what you are demonstrating |
| `--learn-duration S` | `60` | Max recording duration in seconds |
| `--learn-poll MS` | `125` | Screenshot polling interval (milliseconds) |
| `--learn-threshold N` | `30` | Pixel-diff threshold for change detection |
| `--learn-dir PATH` | `PhoneClaw/data/demos/` | Override demo output directory |
| `--no-analyse` | off | Record frames only; skip VLM analysis |

---

## Screen Keepalive

`keepalive.py` prevents the device screen from sleeping using a two-tier strategy:

**Primary — `idleTimerDisabled` (no UI interaction)**  
On `start()`, PhoneClaw sends `POST /wda/settings {"settings": {"idleTimerDisabled": true}}` to the WDA session. This disables iOS's auto-lock at the system level for the lifetime of the session, with zero UI side-effects. A lightweight `GET /status` heartbeat thread keeps the WDA HTTP session alive. On `stop()`, `idleTimerDisabled` is reset to `false`.

**Fallback — periodic centre tap**  
Older or custom WDA builds may not support the `idleTimerDisabled` setting. In that case, PhoneClaw falls back to a periodic synthetic tap at screen centre (0.50, 0.50) via WDA W3C pointer actions.

---

## Supported Apps (via `launch()`)

`launch("AppName")` opens apps directly by bundle ID — faster and more reliable than tapping the home screen icon. Both English and Chinese app names are resolved via bundle ID, so `launch("Xiaohongshu")` and its Chinese alias both refer to the same app.

| Category | Supported apps |
|---|---|
| System | Settings, Safari, Calendar, Messages, Mail, Photos, Camera, Clock, Maps, Music, App Store, Notes, Reminders, Weather, Calculator, Contacts, FaceTime, Phone |
| Social | WeChat, QQ, Weibo, Feishu / Lark |
| Shopping | Meituan, Taobao, JD, Pinduoduo, Xiaohongshu |
| Travel | Didi, Ctrip |
| Finance | Alipay |
| Video | Douyin, Bilibili, iQIYI, Youku, Tencent Video |
| Music | NetEase Music, QQ Music |
| Google | Gmail, Google Maps, Google Chrome, YouTube |
| Other | Zhihu, Baidu Maps, Gaode Maps |

---

## Output Format

### Per-step trace entry

```json
{
  "subtask_idx": 1,
  "subtask_instruction": "Navigate to the orders page",
  "subtask_criteria": "Order list is visible with past orders",
  "image": "screenshots/screenshot-2-before.png",
  "response": "<CALLED_FUNCTION>tap(0.62, 0.94)</CALLED_FUNCTION>...",
  "code_snippet": "tap(0.62, 0.94)",
  "eval_result": {"passed": true, "reason": "Order list is visible"},
  "fix_attempt": 0
}
```

### Task completion entry

```json
{
  "type": "task_complete",
  "all_passed": true,
  "summary": "Task: ... Progress: 3/3 passed ...",
  "final_answer": "The most recent order is from Heytea (Hangzhou Wanxiang Center): Hot Roasted Brown Sugar Boba Milk Tea ¥21.9"
}
```


================================================
FILE: PhoneClaw/__init__.py
================================================
"""PhoneClaw: iOS GUI Agent based on Ralph Loop (EXECUTE → EVALUATE → FIX → REPEAT)."""


================================================
FILE: PhoneClaw/actions.py
================================================
"""Action execution for iOS devices via WebDriverAgent."""

import time
from typing import Optional, Tuple

# iOS app bundle IDs
# Add entries here to extend launch() support.
# Find bundle IDs via: ideviceinstaller -l  or  GET /wda/apps/list
APP_PACKAGES_IOS = {
    # ── Apple system apps ──────────────────────────────────────────────
    "Safari":           "com.apple.mobilesafari",
    "Settings":         "com.apple.Preferences",
    "Messages":         "com.apple.MobileSMS",
    "Mail":             "com.apple.mobilemail",
    "Photos":           "com.apple.mobileslideshow",
    "Camera":           "com.apple.camera",
    "Clock":            "com.apple.mobiletimer",
    "Calendar":         "com.apple.mobilecal",
    "Maps":             "com.apple.Maps",
    "Music":            "com.apple.Music",
    "App Store":        "com.apple.AppStore",
    "Notes":            "com.apple.mobilenotes",
    "Reminders":        "com.apple.reminders",
    "Weather":          "com.apple.weather",
    "Calculator":       "com.apple.calculator",
    "Contacts":         "com.apple.MobileAddressBook",
    "FaceTime":         "com.apple.facetime",
    "Phone":            "com.apple.mobilephone",
    "Health":           "com.apple.Health",
    "Wallet":           "com.apple.Passbook",
    "Files":            "com.apple.DocumentsApp",
    "Podcasts":         "com.apple.podcasts",
    "Shortcuts":        "com.apple.shortcuts",
    "Translate":        "com.apple.Translate",
    "Find My":          "com.apple.findmy",

    # ── Social / communication ─────────────────────────────────────────
    "WeChat":           "com.tencent.xin",       # NOT com.tencent.xinWeChat
    "QQ":               "com.tencent.mqq",
    "Weibo":            "com.sina.weibo",
    "Feishu":           "com.bytedance.feishu",
    "Lark":             "com.bytedance.lark",
    "DingTalk":         "com.laiwang.DingTalk",
    "钉钉":             "com.laiwang.DingTalk",

    # ── Shopping / delivery ────────────────────────────────────────────
    # com.meituan.imeituan is the current "美团" app (food delivery + all
    # services). The old super-app com.sankuai.meituan is rarely installed.
    "Meituan":          "com.meituan.imeituan",
    "美团":             "com.meituan.imeituan",
    "Meituan Waimai":   "com.meituan.imeituan",
    "美团外卖":         "com.meituan.imeituan",
    "Taobao":           "com.taobao.taobao4iphone",
    "淘宝":             "com.taobao.taobao4iphone",
    "JD":               "com.jingdong.app.mall",
    "京东":             "com.jingdong.app.mall",
    "Pinduoduo":        "com.xunmeng.pinduoduo",
    "拼多多":           "com.xunmeng.pinduoduo",
    "Xiaohongshu":      "com.xingin.discover",
    "小红书":           "com.xingin.discover",
    "Eleme":            "me.ele.ios",
    "饿了么":           "me.ele.ios",

    # ── Finance / payment ─────────────────────────────────────────────
    "Alipay":           "com.alipay.iphoneclient",
    "支付宝":           "com.alipay.iphoneclient",

    # ── Travel / maps ──────────────────────────────────────────────────
    "Didi":             "com.xiaojukeji.didi.passenger.activity",
    "滴滴":             "com.xiaojukeji.didi.passenger.activity",
    "Ctrip":            "com.ctrip.inner.wireless",
    "携程":             "com.ctrip.inner.wireless",
    "Gaode Maps":       "com.autonavi.amap",
    "高德地图":         "com.autonavi.amap",
    "Baidu Maps":       "com.baidu.map",
    "百度地图":         "com.baidu.map",

    # ── Video / streaming ─────────────────────────────────────────────
    "Douyin":           "com.ss.iphone.ugc.Aweme",
    "抖音":             "com.ss.iphone.ugc.Aweme",
    "Bilibili":         "tv.danmaku.bilianime",
    "哔哩哔哩":         "tv.danmaku.bilianime",
    "iQIYI":            "com.qiyi.iphone",
    "爱奇艺":           "com.qiyi.iphone",
    "Youku":            "com.youku.YouKu",
    "优酷":             "com.youku.YouKu",
    "Tencent Video":    "com.tencent.now",
    "腾讯视频":         "com.tencent.now",

    # ── Music ──────────────────────────────────────────────────────────
    "NetEase Music":    "com.netease.cloudmusic",
    "网易云音乐":       "com.netease.cloudmusic",
    "QQ Music":         "com.tencent.qqmusic",
    "QQ音乐":           "com.tencent.qqmusic",

    # ── Knowledge / tools ─────────────────────────────────────────────
    "Zhihu":            "com.zhihu.ios",
    "知乎":             "com.zhihu.ios",
    "Baidu":            "com.baidu.BaiduMobile",
    "百度":             "com.baidu.BaiduMobile",

    # ── Google apps ───────────────────────────────────────────────────
    "Gmail":            "com.google.Gmail",         # capital G
    "Google Maps":      "com.google.Maps",
    "Google Chrome":    "com.google.chrome.ios",
    "Chrome":           "com.google.chrome.ios",
    "YouTube":          "com.google.ios.youtube",
}

SCALE_FACTOR = 3  # 3 for most modern iPhone


def _physical_to_logical(x: int, y: int) -> Tuple[int, int]:
    """Convert physical coordinates (screenshot) to logical coordinates (WDA)."""
    return int(x / SCALE_FACTOR), int(y / SCALE_FACTOR)


def _logical_to_physical(x: int, y: int) -> Tuple[int, int]:
    """Convert logical coordinates (WDA) to physical coordinates (screenshot)."""
    return int(x * SCALE_FACTOR), int(y * SCALE_FACTOR)


def _get_wda_session_url(wda_url: str, session_id: Optional[str], endpoint: str) -> str:
    """Get the correct WDA URL for a session endpoint."""
    base = wda_url.rstrip("/")
    if session_id:
        return f"{base}/session/{session_id}/{endpoint}"
    else:
        return f"{base}/{endpoint}"


class IOSActionHandler:
    """Handles execution of actions for iOS devices."""

    def __init__(
        self,
        wda_url: str = "http://localhost:8100",
        session_id: Optional[str] = None,
    ):
        self.wda_url = wda_url
        self.session_id = session_id

    def tap(self, x: int, y: int, delay: float = 1.0) -> bool:
        """Tap at the specified coordinates."""
        try:
            import requests
            url = _get_wda_session_url(self.wda_url, self.session_id, "actions")

            actions = {
                "actions": [
                    {
                        "type": "pointer",
                        "id": "finger1",
                        "parameters": {"pointerType": "touch"},
                        "actions": [
                            {"type": "pointerMove", "duration": 0, "x": x / SCALE_FACTOR, "y": y / SCALE_FACTOR},
                            {"type": "pointerDown", "button": 0},
                            {"type": "pause", "duration": 100},
                            {"type": "pointerUp", "button": 0},
                        ],
                    }
                ]
            }

            response = requests.post(url, json=actions, timeout=15, verify=False)
            time.sleep(delay)
            return response.status_code in (200, 201)
        except Exception as e:
            print(f"Error tapping: {e}")
            return False

    def double_tap(self, x: int, y: int, delay: float = 1.0) -> bool:
        """Double tap at the specified coordinates."""
        try:
            import requests
            url = _get_wda_session_url(self.wda_url, self.session_id, "actions")

            actions = {
                "actions": [
                    {
                        "type": "pointer",
                        "id": "finger1",
                        "parameters": {"pointerType": "touch"},
                        "actions": [
                            {"type": "pointerMove", "duration": 0, "x": x / SCALE_FACTOR, "y": y / SCALE_FACTOR},
                            {"type": "pointerDown", "button": 0},
                            {"type": "pause", "duration": 100},
                            {"type": "pointerUp", "button": 0},
                            {"type": "pause", "duration": 100},
                            {"type": "pointerDown", "button": 0},
                            {"type": "pause", "duration": 100},
                            {"type": "pointerUp", "button": 0},
                        ],
                    }
                ]
            }

            response = requests.post(url, json=actions, timeout=10, verify=False)
            time.sleep(delay)
            return response.status_code in (200, 201)
        except Exception as e:
            print(f"Error double tapping: {e}")
            return False

    def long_press(self, x: int, y: int, duration: float = 3.0, delay: float = 1.0) -> bool:
        """Long press at the specified coordinates."""
        try:
            import requests
            url = _get_wda_session_url(self.wda_url, self.session_id, "actions")

            duration_ms = int(duration * 1000)
            actions = {
                "actions": [
                    {
                        "type": "pointer",
                        "id": "finger1",
                        "parameters": {"pointerType": "touch"},
                        "actions": [
                            {"type": "pointerMove", "duration": 0, "x": x / SCALE_FACTOR, "y": y / SCALE_FACTOR},
                            {"type": "pointerDown", "button": 0},
                            {"type": "pause", "duration": duration_ms},
                            {"type": "pointerUp", "button": 0},
                        ],
                    }
                ]
            }

            response = requests.post(url, json=actions, timeout=int(duration + 10), verify=False)
            time.sleep(delay)
            return response.status_code in (200, 201)
        except Exception as e:
            print(f"Error long pressing: {e}")
            return False

    def swipe(
        self,
        start_x: int,
        start_y: int,
        end_x: int,
        end_y: int,
        duration: Optional[float] = None,
        delay: float = 1.0,
    ) -> bool:
        """Swipe from start to end coordinates."""
        try:
            import requests

            if duration is None:
                dist_sq = (start_x - end_x) ** 2 + (start_y - end_y) ** 2
                duration = dist_sq / 1000000
                duration = max(0.3, min(duration, 2.0))

            url = _get_wda_session_url(self.wda_url, self.session_id, "wda/dragfromtoforduration")

            payload = {
                "fromX": start_x / SCALE_FACTOR,
                "fromY": start_y / SCALE_FACTOR,
                "toX": end_x / SCALE_FACTOR,
                "toY": end_y / SCALE_FACTOR,
                "duration": duration,
            }

            response = requests.post(url, json=payload, timeout=int(duration + 10), verify=False)
            time.sleep(delay)
            return response.status_code in (200, 201)
        except Exception as e:
            print(f"Error swiping: {e}")
            return False

    def back(self, delay: float = 1.0) -> bool:
        """Navigate back (swipe from left edge)."""
        try:
            import requests
            url = _get_wda_session_url(self.wda_url, self.session_id, "wda/dragfromtoforduration")

            screen_width, screen_height = self.get_screen_size()

            from_x = 0
            from_y = screen_height // 2
            to_x = screen_width // 3
            to_y = from_y

            payload = {
                "fromX": from_x,
                "fromY": from_y,
                "toX": to_x,
                "toY": to_y,
                "duration": 0.3,
            }

            response = requests.post(url, json=payload, timeout=10, verify=False)
            time.sleep(delay)
            return response.status_code in (200, 201)
        except Exception as e:
            print(f"Error performing back gesture: {e}")
            return False

    def home(self, delay: float = 1.0) -> bool:
        """Press the home button."""
        try:
            import requests
            url = f"{self.wda_url.rstrip('/')}/wda/homescreen"
            response = requests.post(url, timeout=10, verify=False)
            time.sleep(delay)
            return response.status_code in (200, 201)
        except Exception as e:
            print(f"Error pressing home: {e}")
            return False

    def launch_app(self, app_name: str, delay: float = 2.0) -> bool:
        """
        Launch (or bring to foreground) an app by name via WDA.

        Uses /wda/apps/activate rather than /wda/apps/launch:
        - activate  → iOS system-level "open application" (works for ALL installed apps)
        - launch    → XCTest XCUIApplication cold-start (fails for third-party apps on
                      real devices with "FBSApplicationLibrary returned nil" error)
        """
        if app_name not in APP_PACKAGES_IOS:
            print(f"App '{app_name}' not found in APP_PACKAGES_IOS")
            return False

        try:
            import requests
            bundle_id = APP_PACKAGES_IOS[app_name]
            url = _get_wda_session_url(self.wda_url, self.session_id, "wda/apps/activate")

            response = requests.post(
                url, json={"bundleId": bundle_id}, timeout=15, verify=False
            )

            time.sleep(delay)
            # activate returns null value on success (status 200)
            if response.status_code in (200, 201):
                data = response.json()
                value = data.get("value")
                if isinstance(value, dict) and "error" in value:
                    msg = value.get("message", "")
                    if "NotFound" in msg or "returned nil" in msg:
                        print(f"[launch] '{app_name}' not installed on this device.")
                    else:
                        print(f"[launch] activate error: {msg[:120]}")
                    return False
                return True
            elif response.status_code == 400:
                # HTTP 400 also means the app is not installed on this device
                print(f"[launch] '{app_name}' ({bundle_id}) is not installed on this device.")
                return False
            else:
                print(f"[launch] Unexpected HTTP {response.status_code} for '{app_name}'")
                return False
        except Exception as e:
            print(f"Error launching app: {e}")
            return False

    def type_text(self, text: str, frequency: int = 60) -> bool:
        """Type text into the currently focused input field."""
        try:
            import requests
            url = _get_wda_session_url(self.wda_url, self.session_id, "wda/keys")

            response = requests.post(
                url, json={"value": list(text), "frequency": frequency}, timeout=30, verify=False
            )

            return response.status_code in (200, 201)
        except Exception as e:
            print(f"Error typing text: {e}")
            return False

    def clear_text(self) -> bool:
        """Clear text in the currently focused input field."""
        try:
            import requests
            url = _get_wda_session_url(self.wda_url, self.session_id, "element/active")

            response = requests.get(url, timeout=10, verify=False)

            if response.status_code == 200:
                data = response.json()
                element_id = data.get("value", {}).get("ELEMENT") or data.get("value", {}).get("element-6066-11e4-a52e-4f735466cecf")

                if element_id:
                    clear_url = _get_wda_session_url(self.wda_url, self.session_id, f"element/{element_id}/clear")
                    response = requests.post(clear_url, timeout=10, verify=False)
                    return response.status_code in (200, 201)

            return False
        except Exception as e:
            print(f"Error clearing text: {e}")
            return False

    def hide_keyboard(self) -> bool:
        """Hide the on-screen keyboard."""
        try:
            import requests
            url = f"{self.wda_url.rstrip('/')}/wda/keyboard/dismiss"
            response = requests.post(url, timeout=10, verify=False)
            return response.status_code in (200, 201)
        except Exception as e:
            print(f"Error hiding keyboard: {e}")
            return False

    def get_current_app(self) -> str:
        """Get the currently active app name."""
        try:
            import requests
            response = requests.get(
                f"{self.wda_url.rstrip('/')}/wda/activeAppInfo", timeout=5, verify=False
            )

            if response.status_code == 200:
                data = response.json()
                value = data.get("value", {})
                bundle_id = value.get("bundleId", "")

                if bundle_id:
                    for app_name, package in APP_PACKAGES_IOS.items():
                        if package == bundle_id:
                            return app_name

                return "System Home"

        except Exception as e:
            print(f"Error getting current app: {e}")

        return "System Home"

    def get_screen_size(self) -> tuple[int, int]:
        """Get the screen dimensions."""
        try:
            import requests
            url = _get_wda_session_url(self.wda_url, self.session_id, "window/size")

            response = requests.get(url, timeout=5, verify=False)

            if response.status_code == 200:
                data = response.json()
                value = data.get("value", {})
                width = value.get("width", 375)
                height = value.get("height", 812)
                return width, height

        except Exception as e:
            print(f"Error getting screen size: {e}")

        return 375, 812


================================================
FILE: PhoneClaw/agent.py
================================================
"""OpenRouter VLM agent for PhoneClaw.

OpenRouter (https://openrouter.ai) provides unified access to many VLMs
(GPT-4o, Claude, Gemini, Qwen-VL, etc.) through an OpenAI-compatible API.

Key differences from the base OpenAIAgent in Android-Lab:
  - Base URL: https://openrouter.ai/api/v1
  - Auth header: Authorization: Bearer <OPENROUTER_API_KEY>
  - Extra recommended headers: HTTP-Referer, X-Title
  - Image format: standard OpenAI image_url (data URI), NOT the non-standard
    "type": "image" format used by QwenVLAgent/OpenAIAgent in Android-Lab
"""

import base64
import io
from typing import List, Dict, Any, Optional

import backoff
from openai import OpenAI
from PIL import Image


OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"


def _handle_backoff(details):
    args_str = str(details['args'])[:500]
    print(f"[OpenRouterAgent] Backing off {details['wait']:.1f}s after {details['tries']} tries. "
          f"Args: {args_str}")


def _handle_giveup(details):
    print(f"[OpenRouterAgent] Giving up after {details['tries']} tries.")


class OpenRouterAgent:
    """
    VLM agent that calls models via OpenRouter's API.

    Supports any multimodal model available on OpenRouter, e.g.:
      - openai/gpt-4o
      - anthropic/claude-3.5-sonnet
      - google/gemini-2.0-flash-exp
      - z-ai/glm-4.6v
      - meta-llama/llama-3.2-90b-vision-instruct

    Image format uses the standard OpenAI image_url (data URI) which all
    OpenRouter vision models understand.
    """

    def __init__(
        self,
        api_key: str,
        model_name: str,
        api_base: str = OPENROUTER_BASE_URL,
        max_new_tokens: int = 4096,
        temperature: float = 0.0,
        top_p: float = 0.9,
        site_url: str = "None",
        app_title: str = "PhoneClaw",
        max_image_width: int = 1280,
        max_image_height: int = 2800,
        **kwargs,
    ):
        """
        Args:
            api_key: OpenRouter API key (from https://openrouter.ai/keys).
            model_name: OpenRouter model identifier, e.g. "openai/gpt-4o".
            api_base: API base URL (default: https://openrouter.ai/api/v1).
            max_new_tokens: Maximum tokens to generate.
            temperature: Sampling temperature (0 = deterministic).
            top_p: Nucleus sampling parameter.
            site_url: HTTP-Referer header value (recommended by OpenRouter).
            app_title: X-Title header value (shown in OpenRouter dashboard).
            max_image_width: Images wider than this will be resized before upload.
            max_image_height: Images taller than this will be resized before upload.
        """
        self.model_name = model_name
        self.max_new_tokens = max_new_tokens
        self.temperature = temperature
        self.top_p = top_p
        self.max_image_width = max_image_width
        self.max_image_height = max_image_height
        self.name = "OpenRouterAgent"

        self.client = OpenAI(
            api_key=api_key,
            base_url=api_base,
            default_headers={
                "HTTP-Referer": site_url,
                "X-Title": app_title,
            },
        )

    # ------------------------------------------------------------------
    # Core API call (with exponential backoff)
    # ------------------------------------------------------------------

    @backoff.on_exception(
        backoff.expo,
        Exception,
        on_backoff=_handle_backoff,
        on_giveup=_handle_giveup,
        max_tries=5,
    )
    def act(self, messages: List[Dict[str, Any]]) -> str:
        """
        Send messages to the model and return the response text.

        Args:
            messages: List of OpenAI-format chat messages.

        Returns:
            Model response as a string.
        """
        response = self.client.chat.completions.create(
            model=self.model_name,
            messages=messages,
            max_tokens=self.max_new_tokens,
            temperature=self.temperature,
            top_p=self.top_p,
        )
        content = response.choices[0].message.content
        print("-------------------------------")
        print(content)
        print("-------------------------------")
        return content

    # ------------------------------------------------------------------
    # Message construction
    # ------------------------------------------------------------------

    def prompt_to_message_visual(
        self,
        prompt: str,
        img: str,
        max_width: Optional[int] = None,
        max_height: Optional[int] = None,
    ) -> List[Dict[str, Any]]:
        """
        Build an OpenAI-compatible user message with text + image.

        Uses the standard image_url format (data URI) accepted by all
        OpenRouter vision models.

        Args:
            prompt: Text prompt.
            img: Path to the screenshot image file.
            max_width: Override instance max_image_width.
            max_height: Override instance max_image_height.

        Returns:
            List containing one user message dict.
        """
        max_w = max_width or self.max_image_width
        max_h = max_height or self.max_image_height

        img_obj = Image.open(img).convert("RGB")
        orig_w, orig_h = img_obj.size

        if orig_w > max_w or orig_h > max_h:
            ratio = min(max_w / orig_w, max_h / orig_h)
            img_obj = img_obj.resize(
                (int(orig_w * ratio), int(orig_h * ratio)),
                Image.Resampling.LANCZOS,
            )

        buf = io.BytesIO()
        img_obj.save(buf, format="PNG")
        b64 = base64.b64encode(buf.getvalue()).decode("utf-8")

        # Standard OpenAI / OpenRouter image_url format
        content = [
            {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/png;base64,{b64}",
                },
            },
            {
                "type": "text",
                "text": prompt,
            },
        ]

        return [{"role": "user", "content": content}]

    def prompt_to_message_text(self, prompt: str) -> Dict[str, Any]:
        """Build a plain text user message (no image)."""
        return {"role": "user", "content": prompt}


================================================
FILE: PhoneClaw/connection.py
================================================
"""iOS device connection management via WebDriverAgent."""

import subprocess
import time
from dataclasses import dataclass
from enum import Enum
from typing import Optional


class ConnectionType(Enum):
    """Type of iOS connection."""
    USB = "usb"
    NETWORK = "network"


@dataclass
class DeviceInfo:
    """Information about a connected iOS device."""
    device_id: str
    status: str
    connection_type: ConnectionType
    model: Optional[str] = None
    ios_version: Optional[str] = None
    device_name: Optional[str] = None


class IOSConnection:
    """
    Manages connections to iOS devices via libimobiledevice and WebDriverAgent.

    Requires:
        - libimobiledevice (idevice_id, ideviceinfo)
        - WebDriverAgent running on the iOS device
    """

    def __init__(self, wda_url: str = "http://localhost:8100"):
        self.wda_url = wda_url.rstrip("/")
        self.session_id: Optional[str] = None

    def list_devices(self) -> list[DeviceInfo]:
        """List all connected iOS devices."""
        try:
            result = subprocess.run(
                ["idevice_id", "-ln"],
                capture_output=True,
                text=True,
                timeout=5,
            )

            devices = []
            for line in result.stdout.strip().split("\n"):
                udid = line.strip()
                if not udid:
                    continue

                conn_type = (
                    ConnectionType.NETWORK
                    if "-" in udid and len(udid) > 40
                    else ConnectionType.USB
                )

                device_info = self._get_device_details(udid)

                devices.append(
                    DeviceInfo(
                        device_id=udid,
                        status="connected",
                        connection_type=conn_type,
                        model=device_info.get("model"),
                        ios_version=device_info.get("ios_version"),
                        device_name=device_info.get("name"),
                    )
                )

            return devices

        except FileNotFoundError:
            print(
                "Error: idevice_id not found. Install libimobiledevice: brew install libimobiledevice"
            )
            return []
        except Exception as e:
            print(f"Error listing devices: {e}")
            return []

    def _get_device_details(self, udid: str) -> dict[str, str]:
        """Get detailed information about a specific device."""
        try:
            result = subprocess.run(
                ["ideviceinfo", "-u", udid],
                capture_output=True,
                text=True,
                timeout=5,
            )

            info = {}
            for line in result.stdout.split("\n"):
                if ": " in line:
                    key, value = line.split(": ", 1)
                    key = key.strip()
                    value = value.strip()

                    if key == "ProductType":
                        info["model"] = value
                    elif key == "ProductVersion":
                        info["ios_version"] = value
                    elif key == "DeviceName":
                        info["name"] = value

            return info

        except Exception:
            return {}

    def is_connected(self, device_id: Optional[str] = None) -> bool:
        """Check if a device is connected."""
        devices = self.list_devices()
        if not devices:
            return False
        if device_id is None:
            return len(devices) > 0
        return any(d.device_id == device_id for d in devices)

    def is_wda_ready(self, timeout: int = 2) -> bool:
        """Check if WebDriverAgent is running and accessible."""
        try:
            import requests
            response = requests.get(
                f"{self.wda_url}/status", timeout=timeout, verify=False
            )
            return response.status_code == 200
        except ImportError:
            print("Error: requests library not found. Install it: pip install requests")
            return False
        except Exception:
            return False

    def start_wda_session(self) -> tuple[bool, str]:
        """Start a new WebDriverAgent session."""
        try:
            import requests
            response = requests.post(
                f"{self.wda_url}/session",
                json={"capabilities": {}},
                timeout=30,
                verify=False,
            )

            if response.status_code in (200, 201):
                data = response.json()
                session_id = data.get("sessionId") or data.get("value", {}).get("sessionId")
                if session_id:
                    self.session_id = session_id
                return True, session_id or "session_started"
            else:
                return False, f"Failed to start session: {response.text}"

        except ImportError:
            return (
                False,
                "requests library not found. Install it: pip install requests",
            )
        except Exception as e:
            return False, f"Error starting WDA session: {e}"

    def get_wda_status(self) -> dict:
        """Get WebDriverAgent status information."""
        try:
            import requests
            response = requests.get(f"{self.wda_url}/status", timeout=5, verify=False)
            if response.status_code == 200:
                return response.json()
            return {}
        except Exception:
            return {}


================================================
FILE: PhoneClaw/controller.py
================================================
"""iOS Controller - adapts iOS device control to Android-Lab's controller interface."""

import os
import time
from typing import Tuple, Optional

from PhoneClaw.actions import IOSActionHandler, _physical_to_logical, SCALE_FACTOR
from PhoneClaw.screenshot import get_screenshot, save_screenshot, Screenshot
from PhoneClaw.hierarchy import get_page_source


class IOSController:
    """
    iOS Controller that adapts iOS device control to Android-Lab's controller interface.

    Provides methods compatible with Android-Lab's AndroidController,
    allowing iOS devices to be used with the same agent code.
    """

    def __init__(self, wda_url: str = "http://localhost:8100", session_id: Optional[str] = None):
        self.action_handler = IOSActionHandler(wda_url=wda_url, session_id=session_id)
        self.wda_url = wda_url
        self.session_id = session_id
        self.width, self.height = self.get_device_size()
        self.viewport_size = (self.width, self.height)
        # Default screenshot dir; overridden per-task by run_phoneclaw.py.
        # Directory is NOT pre-created here to avoid leaving empty folders.
        self.screenshot_dir = "./phoneclaw_screenshots"

    def get_device_size(self) -> Tuple[int, int]:
        """Get device screen size."""
        return self.action_handler.get_screen_size()

    def get_current_activity(self) -> str:
        """Get current app name (iOS equivalent of Android activity)."""
        return self.action_handler.get_current_app()

    def get_current_app(self) -> str:
        """Alias for get_current_activity for compatibility."""
        return self.get_current_activity()

    def tap(self, x: int, y: int) -> bool:
        """Tap at coordinates."""
        return self.action_handler.tap(x, y)

    def text(self, input_str: str) -> bool:
        """Type text into focused input field."""
        self.action_handler.clear_text()
        time.sleep(0.5)
        success = self.action_handler.type_text(input_str)
        time.sleep(0.5)
        self.action_handler.hide_keyboard()
        return success

    def long_press(self, x: int, y: int, duration: int = 3000) -> bool:
        """Long press at coordinates. duration in milliseconds."""
        return self.action_handler.long_press(x, y, duration=duration / 1000.0)

    def swipe(self, x: int, y: int, direction: str, dist: str = "medium", quick: bool = False) -> bool:
        """Swipe from coordinates in specified direction."""
        x_logical, y_logical = _physical_to_logical(x, y)

        dist_multiplier = {"short": 0.3, "medium": 0.5, "long": 0.7}.get(dist, 0.5)

        if direction == "up":
            end_x_logical = x_logical
            end_y_logical = max(0, int(y_logical - self.height * dist_multiplier))
        elif direction == "down":
            end_x_logical = x_logical
            end_y_logical = min(self.height, int(y_logical + self.height * dist_multiplier))
        elif direction == "left":
            end_x_logical = max(0, int(x_logical - self.width * dist_multiplier))
            end_y_logical = y_logical
        elif direction == "right":
            end_x_logical = min(self.width, int(x_logical + self.width * dist_multiplier))
            end_y_logical = y_logical
        else:
            end_x_logical = x_logical
            end_y_logical = min(self.height, int(y_logical + self.height * dist_multiplier))

        end_x, end_y = int(end_x_logical * SCALE_FACTOR), int(end_y_logical * SCALE_FACTOR)
        return self.action_handler.swipe(x, y, end_x, end_y)

    def back(self) -> bool:
        """Navigate back (swipe from left edge on iOS)."""
        return self.action_handler.back()

    def home(self) -> bool:
        """Press home button."""
        return self.action_handler.home()

    def enter(self) -> bool:
        """Press Enter key (hides keyboard on iOS)."""
        return self.action_handler.hide_keyboard()

    def launch_app(self, app_name: str) -> bool:
        """Launch an app by name."""
        return self.action_handler.launch_app(app_name)

    def save_screenshot(self, file_path: str) -> bool:
        """Save screenshot to file."""
        screenshot = get_screenshot(wda_url=self.wda_url, session_id=self.session_id)
        return save_screenshot(screenshot, file_path)

    def get_screenshot(self) -> Screenshot:
        """Get current screenshot."""
        return get_screenshot(wda_url=self.wda_url, session_id=self.session_id)

    def get_xml(self, prefix: str = "", save_dir: str = "") -> str:
        """
        Get iOS page source (XML hierarchy).

        Compatible with Android-Lab's get_xml interface.
        """
        try:
            xml_string = get_page_source(
                wda_url=self.wda_url,
                session_id=self.session_id,
                timeout=15
            )

            if xml_string:
                if save_dir:
                    os.makedirs(save_dir, exist_ok=True)
                    xml_path = os.path.join(save_dir, f"{prefix}.xml")
                    try:
                        with open(xml_path, 'w', encoding='utf-8') as f:
                            f.write(xml_string)
                    except Exception as e:
                        print(f"Warning: Failed to save XML to {xml_path}: {e}")

                return "SUCCESS"
            else:
                return "ERROR: Failed to get page source (returned None)"
        except Exception as e:
            print(f"Error getting XML: {e}")
            return f"ERROR: {e}"


================================================
FILE: PhoneClaw/embeddings.py
================================================
"""Embedding utilities for semantic deduplication in PhoneClaw.

Uses an OpenAI-compatible embeddings API to compute dense vector representations
of text, enabling semantic similarity checks that catch paraphrases and
near-duplicates that exact string matching would miss.

API key priority (first found wins):
  1. OPENAI_API_KEY  → uses https://api.openai.com/v1
  2. OPENROUTER_API_KEY → uses https://openrouter.ai/api/v1
  3. Neither available → falls back to normalised-token Jaccard similarity

Embeddings for known texts are cached in-process to avoid redundant API calls.
"""

from __future__ import annotations

import math
import os
import re
import unicodedata
from typing import Optional

# Cosine similarity above this threshold → treat as semantic duplicate.
# 0.88 works well for short factual phrases in Chinese / English.
SIMILARITY_THRESHOLD = 0.88

# Jaccard similarity threshold used when the embedding API is unavailable.
JACCARD_THRESHOLD = 0.50

# Model used for embedding.  text-embedding-3-small is cheap, fast, and
# works with both OpenAI and OpenRouter.
EMBED_MODEL = "text-embedding-3-small"

# ---------------------------------------------------------------------------
# Module-level lazy state
# ---------------------------------------------------------------------------
_client = None          # openai.OpenAI instance (or None)
_cache: dict[str, list[float]] = {}   # in-process cache: text → vector


# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------

def _get_client():
    """Return a lazy-initialised OpenAI-compatible client, or None."""
    global _client
    if _client is not None:
        return _client

    try:
        import openai
    except ImportError:
        return None

    openai_key = os.getenv("OPENAI_API_KEY", "").strip()
    openrouter_key = os.getenv("OPENROUTER_API_KEY", "").strip()

    if openai_key:
        _client = openai.OpenAI(api_key=openai_key)
    elif openrouter_key:
        _client = openai.OpenAI(
            api_key=openrouter_key,
            base_url="https://openrouter.ai/api/v1",
        )
    return _client


def _normalise(text: str) -> str:
    """Lower-case, Unicode-normalise, and strip punctuation."""
    text = unicodedata.normalize("NFC", text).lower()
    text = re.sub(r"[\s\W_]+", " ", text)
    return text.strip()


def _jaccard_tokens(a: str, b: str) -> float:
    """Token-level Jaccard similarity between two strings."""
    ta = set(_normalise(a).split())
    tb = set(_normalise(b).split())
    if not ta and not tb:
        return 1.0
    if not ta or not tb:
        return 0.0
    return len(ta & tb) / len(ta | tb)


def _cosine(v1: list[float], v2: list[float]) -> float:
    """Cosine similarity between two equal-length vectors."""
    dot = sum(a * b for a, b in zip(v1, v2))
    norm1 = math.sqrt(sum(a * a for a in v1))
    norm2 = math.sqrt(sum(b * b for b in v2))
    if norm1 == 0.0 or norm2 == 0.0:
        return 0.0
    return dot / (norm1 * norm2)


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------

def embed(text: str) -> Optional[list[float]]:
    """Return the embedding vector for *text*.

    Returns None if the embedding API is unavailable or the call fails.
    Results are cached in-process for the lifetime of the interpreter.
    """
    text = text.strip()
    if not text:
        return None

    if text in _cache:
        return _cache[text]

    client = _get_client()
    if client is None:
        return None

    try:
        resp = client.embeddings.create(model=EMBED_MODEL, input=text)
        vec: list[float] = resp.data[0].embedding
        _cache[text] = vec
        return vec
    except Exception:
        return None


def similarity(text_a: str, text_b: str) -> float:
    """Return a [0, 1] similarity score between two texts.

    Tries the embedding API first; falls back to Jaccard if unavailable.
    """
    va = embed(text_a)
    vb = embed(text_b)
    if va is not None and vb is not None:
        return _cosine(va, vb)
    return _jaccard_tokens(text_a, text_b)


def is_semantic_duplicate(
    text: str,
    candidates: list[str],
    threshold: float = SIMILARITY_THRESHOLD,
) -> int:
    """Check whether *text* is semantically equivalent to any string in
    *candidates*.

    Uses embedding cosine similarity when the API is reachable, otherwise
    falls back to normalised-token Jaccard similarity with a lower threshold.

    Returns:
        Index (≥ 0) of the best-matching candidate above threshold,
        or -1 if no duplicate was found.
    """
    text = text.strip()
    if not text:
        return -1

    using_embeddings = embed(text) is not None

    if using_embeddings:
        sim_threshold = threshold
    else:
        # Jaccard is a weaker signal; use a lower threshold
        sim_threshold = JACCARD_THRESHOLD

    best_idx = -1
    best_sim = sim_threshold - 1e-6  # must strictly exceed threshold

    for i, c in enumerate(candidates):
        if using_embeddings:
            score = similarity(text, c)
        else:
            score = _jaccard_tokens(text, c)

        if score > best_sim:
            best_sim = score
            best_idx = i

    return best_idx


================================================
FILE: PhoneClaw/evaluator.py
================================================
"""Evaluator for PhoneClaw Ralph Loop.

Uses a VLM to determine whether the current screen satisfies a subtask's
success criteria.  Returns a structured pass/fail result with a reason.
"""

import json
import re
from typing import Optional, Dict, Any

from PhoneClaw.prompts import EVALUATOR_SYSTEM_PROMPT, EVALUATOR_USER_TEMPLATE


class EvalResult:
    """Result from the Evaluator."""

    def __init__(self, passed: bool, reason: str):
        self.passed = passed
        self.reason = reason

    def to_dict(self) -> Dict[str, Any]:
        return {"passed": self.passed, "reason": self.reason}

    def __repr__(self) -> str:
        status = "PASS" if self.passed else "FAIL"
        return f"EvalResult({status}: {self.reason})"


class SubTaskEvaluator:
    """
    Evaluates whether a screenshot satisfies a subtask's success criterion.

    The agent object must implement:
        agent.prompt_to_message_visual(prompt: str, image_path: str) -> list[dict]
        agent.act(messages: list[dict]) -> str
    """

    def __init__(self, agent, max_retries: int = 2):
        """
        Args:
            agent: VLM agent instance (e.g., OpenAIAgent from Android-Lab).
            max_retries: How many times to retry if JSON parsing fails.
        """
        self.agent = agent
        self.max_retries = max_retries

    def evaluate(
        self,
        screenshot_path: str,
        success_criteria: str,
    ) -> EvalResult:
        """
        Evaluate whether the screenshot satisfies the success criterion.

        Args:
            screenshot_path: Path to the current screenshot (labeled or plain).
            success_criteria: The success criterion text for the current subtask.

        Returns:
            EvalResult with passed (bool) and reason (str).
        """
        if not screenshot_path:
            return EvalResult(passed=False, reason="No screenshot available for evaluation.")

        user_prompt = EVALUATOR_USER_TEMPLATE.format(success_criteria=success_criteria)

        system_msg = {"role": "system", "content": EVALUATOR_SYSTEM_PROMPT}

        last_error = None
        for attempt in range(1, self.max_retries + 1):
            try:
                user_messages = self.agent.prompt_to_message_visual(user_prompt, screenshot_path)
                messages = [system_msg, *user_messages]
                response = self.agent.act(messages)

                result = self._parse_response(response)
                return result

            except Exception as e:
                last_error = str(e)
                print(f"[Evaluator] Attempt {attempt} failed: {e}")

        # Fallback: conservative fail
        print(f"[Evaluator] All {self.max_retries} attempts failed. Defaulting to FAIL.")
        return EvalResult(
            passed=False,
            reason=f"Evaluator failed to produce a valid response. Last error: {last_error}"
        )

    def _parse_response(self, response: str) -> EvalResult:
        """
        Parse the LLM response into an EvalResult.

        Handles:
        - Clean JSON object
        - JSON wrapped in markdown code fences
        - JSON embedded in prose
        """
        text = response.strip()

        # Strip markdown code fences
        text = re.sub(r'^```(?:json)?\s*', '', text, flags=re.MULTILINE)
        text = re.sub(r'\s*```\s*$', '', text, flags=re.MULTILINE)
        text = text.strip()

        # Try to extract a JSON object even if surrounded by prose
        obj_match = re.search(r'\{.*\}', text, re.DOTALL)
        if obj_match:
            text = obj_match.group(0)

        data = json.loads(text)

        if not isinstance(data, dict):
            raise ValueError(f"Expected JSON object, got {type(data).__name__}")

        passed = data.get("passed")
        reason = data.get("reason", "").strip()

        if passed is None:
            raise ValueError("JSON object missing 'passed' field")

        # Normalize: accept string "true"/"false" as well as booleans
        if isinstance(passed, str):
            passed = passed.lower() in ("true", "1", "yes")
        else:
            passed = bool(passed)

        if not reason:
            reason = "No reason provided."

        return EvalResult(passed=passed, reason=reason)


================================================
FILE: PhoneClaw/executor.py
================================================
"""iOS executor for PhoneClaw - adapts iOS actions to Android-Lab interface."""

import os
import time
from typing import Optional, List

from PhoneClaw.actions import IOSActionHandler, _physical_to_logical, _logical_to_physical
from PhoneClaw.screenshot import get_screenshot, save_screenshot, Screenshot
from PhoneClaw.hierarchy import IOSElement, get_page_source, get_ios_elements


class IOSExecutor:
    """
    iOS executor that adapts iOS device control to Android-Lab's executor interface.
    """

    def __init__(self, wda_url: str = "http://localhost:8100", session_id: Optional[str] = None):
        self.action_handler = IOSActionHandler(wda_url=wda_url, session_id=session_id)
        self.wda_url = wda_url
        self.session_id = session_id
        self.current_screenshot: Optional[Screenshot] = None
        self.current_return = None
        self.is_finish = False
        self.finish_message: Optional[str] = None
        self.elem_list: List[IOSElement] = []
        self.current_screenshot_path: Optional[str] = None
        # Cached physical screen size (set lazily by _get_screen_physical_size)
        self._phys_w: Optional[int] = None
        self._phys_h: Optional[int] = None

    def get_screenshot(self) -> Screenshot:
        """Get current screenshot."""
        self.current_screenshot = get_screenshot(
            wda_url=self.wda_url,
            session_id=self.session_id,
        )
        return self.current_screenshot

    # ------------------------------------------------------------------
    # Coordinate helpers
    # ------------------------------------------------------------------

    def _get_screen_physical_size(self) -> tuple[int, int]:
        """
        Return the physical (pixel) screen dimensions, queried once and cached.

        WDA reports logical coordinates; physical = logical × SCALE_FACTOR.
        Uses the actual screenshot file when available for maximum accuracy
        (handles non-standard scale factors such as iPhone SE's 2×).
        Falls back to WDA logical × SCALE_FACTOR when no screenshot is saved yet.
        """
        if self._phys_w and self._phys_h:
            return self._phys_w, self._phys_h

        # Try reading dimensions from the most recent screenshot file
        if self.current_screenshot_path and os.path.exists(self.current_screenshot_path):
            try:
                import cv2
                img = cv2.imread(self.current_screenshot_path)
                if img is not None:
                    h, w = img.shape[:2]
                    self._phys_w, self._phys_h = w, h
                    return self._phys_w, self._phys_h
            except Exception:
                pass

        # Fallback: WDA logical size × SCALE_FACTOR
        from PhoneClaw.actions import SCALE_FACTOR
        w_logical, h_logical = self.action_handler.get_screen_size()
        self._phys_w = w_logical * SCALE_FACTOR
        self._phys_h = h_logical * SCALE_FACTOR
        return self._phys_w, self._phys_h

    def _rel_to_physical(self, rx: float, ry: float) -> tuple[int, int]:
        """
        Convert normalized relative coordinates [0, 1] to physical pixel coordinates.

        (0.0, 0.0) → top-left corner
        (1.0, 1.0) → bottom-right corner
        Values are clamped to [0, 1] before conversion.
        """
        rx = max(0.0, min(1.0, float(rx)))
        ry = max(0.0, min(1.0, float(ry)))
        w, h = self._get_screen_physical_size()
        return round(rx * w), round(ry * h)

    def tap(self, x: int, y: int) -> dict:
        """Tap at coordinates (x, y)."""
        success = self.action_handler.tap(x, y)
        self.current_return = {
            "operation": "do",
            "action": "Tap",
            "kwargs": {"element": [x, y]}
        }
        return self.current_return

    def text(self, input_str: str) -> dict:
        """Type text into the currently focused input field."""
        self.action_handler.clear_text()
        time.sleep(0.5)
        success = self.action_handler.type_text(input_str)
        time.sleep(0.5)
        self.action_handler.hide_keyboard()
        time.sleep(0.5)
        self.current_return = {
            "operation": "do",
            "action": "Type",
            "kwargs": {"text": input_str}
        }
        return self.current_return

    def type(self, input_str: str) -> dict:
        """Alias for text method."""
        return self.text(input_str)

    def long_press(self, x: int, y: int) -> dict:
        """Long press at coordinates (x, y)."""
        success = self.action_handler.long_press(x, y)
        self.current_return = {
            "operation": "do",
            "action": "Long Press",
            "kwargs": {"element": [x, y]}
        }
        return self.current_return

    def swipe(self, x: int, y: int, direction: str, dist: str = "medium") -> dict:
        """Swipe from coordinates (x, y) in a named direction (legacy / do() interface)."""
        screen_width_logical, screen_height_logical = self.action_handler.get_screen_size()

        x_logical, y_logical = _physical_to_logical(x, y)

        dist_multiplier = {"short": 0.3, "medium": 0.5, "long": 0.7}.get(dist, 0.5)

        if direction == "up":
            end_x_logical = x_logical
            end_y_logical = max(0, int(y_logical - screen_height_logical * dist_multiplier))
        elif direction == "down":
            end_x_logical = x_logical
            end_y_logical = min(screen_height_logical, int(y_logical + screen_height_logical * dist_multiplier))
        elif direction == "left":
            end_x_logical = max(0, int(x_logical - screen_width_logical * dist_multiplier))
            end_y_logical = y_logical
        elif direction == "right":
            end_x_logical = min(screen_width_logical, int(x_logical + screen_width_logical * dist_multiplier))
            end_y_logical = y_logical
        else:
            end_x_logical = x_logical
            end_y_logical = min(screen_height_logical, int(y_logical + screen_height_logical * dist_multiplier))

        end_x, end_y = _logical_to_physical(end_x_logical, end_y_logical)

        success = self.action_handler.swipe(x, y, end_x, end_y)
        self.current_return = {
            "operation": "do",
            "action": "Swipe",
            "kwargs": {
                "element": [x, y],
                "direction": direction,
                "dist": dist
            }
        }
        return self.current_return

    def swipe_coords(self, x1: int, y1: int, x2: int, y2: int) -> dict:
        """
        Swipe from (x1, y1) to (x2, y2) using explicit physical pixel coordinates.

        This is the primary swipe method used by the coordinate-based agent.
        Both points are in screenshot physical coordinates; conversion to WDA
        logical coordinates is handled internally by action_handler.swipe().
        """
        success = self.action_handler.swipe(x1, y1, x2, y2)
        self.current_return = {
            "operation": "do",
            "action": "Swipe",
            "kwargs": {"from": [x1, y1], "to": [x2, y2]}
        }
        return self.current_return

    def back(self) -> dict:
        """Navigate back (swipe from left edge on iOS)."""
        success = self.action_handler.back()
        self.current_return = {
            "operation": "do",
            "action": "Back",
            "kwargs": {}
        }
        return self.current_return

    def home(self) -> dict:
        """Press the home button."""
        success = self.action_handler.home()
        self.current_return = {
            "operation": "do",
            "action": "Home",
            "kwargs": {}
        }
        return self.current_return

    def wait(self, interval: int = 5) -> dict:
        """Wait for specified interval."""
        if interval < 0 or interval > 10:
            interval = 5
        time.sleep(interval)
        self.current_return = {
            "operation": "do",
            "action": "Wait",
            "kwargs": {"interval": interval}
        }
        return self.current_return

    def enter(self) -> dict:
        """Press Enter key (hides keyboard on iOS)."""
        self.action_handler.hide_keyboard()
        self.current_return = {
            "operation": "do",
            "action": "Enter",
            "kwargs": {}
        }
        return self.current_return

    def launch(self, app_name: str) -> dict:
        """Launch an app by name."""
        success = self.action_handler.launch_app(app_name)
        self.current_return = {
            "operation": "do",
            "action": "Launch",
            "kwargs": {"app_name": app_name}
        }
        return self.current_return

    def finish(self, message: Optional[str] = None) -> dict:
        """Finish the current subtask (used by executor code snippets)."""
        self.is_finish = True
        if message:
            self.finish_message = message
        self.current_return = {
            "operation": "finish",
            "action": "finish",
            "kwargs": {"message": message}
        }
        return self.current_return

    def reset_finish(self):
        """Reset is_finish flag for next subtask."""
        self.is_finish = False
        self.finish_message = None

    def get_current_app(self) -> str:
        """Get the currently active app name."""
        return self.action_handler.get_current_app()

    def get_screen_size(self) -> tuple[int, int]:
        """Get the screen dimensions."""
        return self.action_handler.get_screen_size()

    def set_elem_list(self, xml_path_or_string: str):
        """Set element list from iOS XML source."""
        if os.path.exists(xml_path_or_string):
            with open(xml_path_or_string, 'r', encoding='utf-8') as f:
                xml_string = f.read()
        else:
            xml_string = xml_path_or_string

        self.elem_list = get_ios_elements(xml_string)

    def tap_by_index(self, index: int) -> dict:
        """Tap element by index (1-based)."""
        if not self.elem_list:
            error_msg = (
                "Element list is empty. Please ensure XML is parsed and set_elem_list() is called."
            )
            print(f"Error: {error_msg}")
            self.current_return = {
                "operation": "error",
                "action": "Tap",
                "kwargs": {"index": index, "error": error_msg}
            }
            raise ValueError(error_msg)
        assert 0 < index <= len(self.elem_list), f"Tap Index {index} out of range (available: 1-{len(self.elem_list)})"

        tl, br = self.elem_list[index - 1].bbox
        x_logical, y_logical = (tl[0] + br[0]) // 2, (tl[1] + br[1]) // 2
        x, y = _logical_to_physical(x_logical, y_logical)

        return self.tap(x, y)

    def long_press_by_index(self, index: int) -> dict:
        """Long press element by index (1-based)."""
        if not self.elem_list:
            raise ValueError("Element list is empty. Please ensure XML is parsed and set_elem_list() is called.")
        assert 0 < index <= len(self.elem_list), f"Long Press Index {index} out of range (available: 1-{len(self.elem_list)})"

        tl, br = self.elem_list[index - 1].bbox
        x_logical, y_logical = (tl[0] + br[0]) // 2, (tl[1] + br[1]) // 2
        x, y = _logical_to_physical(x_logical, y_logical)

        return self.long_press(x, y)

    def swipe_by_index(self, index: int, direction: str, dist: str = "medium") -> dict:
        """Swipe element by index (1-based)."""
        if not self.elem_list:
            raise ValueError("Element list is empty. Please ensure XML is parsed and set_elem_list() is called.")
        assert 0 < index <= len(self.elem_list), f"Swipe Index {index} out of range (available: 1-{len(self.elem_list)})"

        tl, br = self.elem_list[index - 1].bbox
        x_logical, y_logical = (tl[0] + br[0]) // 2, (tl[1] + br[1]) // 2
        x, y = _logical_to_physical(x_logical, y_logical)

        return self.swipe(x, y, direction, dist)

    def __call__(self, code_snippet: str):
        """
        Execute a coordinate-based code snippet from the VLM.

        The VLM outputs normalized relative coordinates in [0, 1].
        This method converts them to physical pixels before dispatching.

        Supported calls:
            tap(rx, ry)                      - tap at relative position (rx, ry)
            long_press(rx, ry)               - long press at relative position
            swipe(rx1, ry1, rx2, ry2)        - swipe from rel (rx1,ry1) to (rx2,ry2)
            type("text")  /  text("text")    - type text
            back()
            home()
            wait(seconds)
            finish("message")

        All rx/ry values are floats in [0, 1]:
            (0.0, 0.0) = top-left corner
            (1.0, 1.0) = bottom-right corner
        """
        import re

        if not code_snippet:
            print("Warning: code_snippet is empty or None, skipping execution")
            self.current_return = {
                "operation": "skip",
                "action": "skip",
                "kwargs": {"reason": "Empty code snippet"}
            }
            return self.current_return

        # --- Relative-coordinate wrappers ---
        # These accept [0,1] floats from the VLM and convert to physical pixels
        # before forwarding to the underlying executor methods.

        def _tap(rx, ry):
            px, py = self._rel_to_physical(rx, ry)
            print(f"[Exec] tap(rel=({rx:.3f},{ry:.3f}) → phys=({px},{py}))")
            return self.tap(px, py)

        def _long_press(rx, ry):
            px, py = self._rel_to_physical(rx, ry)
            print(f"[Exec] long_press(rel=({rx:.3f},{ry:.3f}) → phys=({px},{py}))")
            return self.long_press(px, py)

        def _swipe(rx1, ry1, rx2, ry2):
            px1, py1 = self._rel_to_physical(rx1, ry1)
            px2, py2 = self._rel_to_physical(rx2, ry2)
            print(f"[Exec] swipe(rel=({rx1:.3f},{ry1:.3f})→({rx2:.3f},{ry2:.3f})"
                  f" → phys=({px1},{py1})→({px2},{py2}))")
            return self.swipe_coords(px1, py1, px2, py2)

        local_context = {
            'tap':        _tap,
            'long_press': _long_press,
            'swipe':      _swipe,
            'type':       self.text,
            'text':       self.text,
            'back':       self.back,
            'home':       self.home,
            'wait':       self.wait,
            'finish':     self.finish,
            'launch':     self.launch,
        }

        # Strip accidental leading zeros from integer literals (e.g. 01 → 1) that
        # would be Python SyntaxErrors.
        # IMPORTANT: use a negative lookbehind (?<!\.) so that decimal fractions
        # such as 0.095 or 0.06 are NOT touched.  Without it the word boundary
        # between the decimal point and the digit would cause:
        #   0.095 → 0.95   (0.095 interpreted as "09" with leading zero stripped)
        #   0.06  → 0.6    (same issue)
        code_snippet = re.sub(r'(?<!\.)\b0+(\d)', r'\1', code_snippet)

        try:
            exec(code_snippet, {}, local_context)
        except Exception as e:
            print(f"Error executing code snippet '{code_snippet}': {e}")
            import traceback
            traceback.print_exc()
            self.current_return = {
                "operation": "error",
                "action": "error",
                "kwargs": {"error": str(e), "code": code_snippet}
            }

        return self.current_return

    def do(self, action=None, element=None, **kwargs):
        """Execute an action - compatible with Android-Lab's do() interface."""
        assert action in [
            "Tap", "Type", "Swipe", "Enter", "Home", "Back", "Long Press", "Wait", "Launch", "Call_API"
        ], f"Unsupported Action: {action}"

        if action == "Tap":
            if isinstance(element, list) and len(element) == 4:
                center_x = (element[0] + element[2]) / 2
                center_y = (element[1] + element[3]) / 2
            elif isinstance(element, list) and len(element) == 2:
                center_x, center_y = element
            else:
                raise ValueError("Invalid element format for Tap")
            return self.tap(int(center_x), int(center_y))

        elif action == "Type":
            assert "text" in kwargs, "text is required for Type action"
            return self.text(kwargs["text"])

        elif action == "Swipe":
            assert "direction" in kwargs, "direction is required for Swipe action"
            if element is None:
                screen_width_logical, screen_height_logical = self.get_screen_size()
                center_x, center_y = _logical_to_physical(
                    screen_width_logical // 2,
                    screen_height_logical // 2
                )
            elif isinstance(element, list) and len(element) == 4:
                center_x = (element[0] + element[2]) / 2
                center_y = (element[1] + element[3]) / 2
            elif isinstance(element, list) and len(element) == 2:
                center_x, center_y = element
            else:
                raise ValueError("Invalid element format for Swipe")
            dist = kwargs.get("dist", "medium")
            return self.swipe(int(center_x), int(center_y), kwargs["direction"], dist)

        elif action == "Enter":
            return self.enter()

        elif action == "Home":
            return self.home()

        elif action == "Back":
            return self.back()

        elif action == "Long Press":
            if isinstance(element, list) and len(element) == 4:
                center_x = (element[0] + element[2]) / 2
                center_y = (element[1] + element[3]) / 2
            elif isinstance(element, list) and len(element) == 2:
                center_x, center_y = element
            else:
                raise ValueError("Invalid element format for Long Press")
            return self.long_press(int(center_x), int(center_y))

        elif action == "Wait":
            interval = kwargs.get("interval", 5)
            return self.wait(interval)

        elif action == "Launch":
            assert "app" in kwargs or "app_name" in kwargs, "app or app_name is required for Launch action"
            app_name = kwargs.get("app") or kwargs.get("app_name")
            return self.launch(app_name)

        elif action == "Call_API":
            instruction = kwargs.get("instruction", "")
            with_screen_info = kwargs.get("with_screen_info", True)
            self.current_return = {
                "operation": "do",
                "action": "Call_API",
                "kwargs": {
                    "instruction": instruction,
                    "with_screen_info": with_screen_info
                }
            }
            return self.current_return

        else:
            raise NotImplementedError(f"Action {action} not implemented")

    def update_screenshot(self, prefix=None, suffix=None):
        """Update screenshot and save to screenshot_dir."""
        screenshot = self.get_screenshot()

        if hasattr(self, 'screenshot_dir'):
            timestamp = time.time()
            if prefix is None and suffix is None:
                screenshot_path = f"{self.screenshot_dir}/screenshot-{timestamp}.png"
            elif prefix is not None and suffix is None:
                screenshot_path = f"{self.screenshot_dir}/screenshot-{prefix}-{timestamp}.png"
            elif prefix is None and suffix is not None:
                screenshot_path = f"{self.screenshot_dir}/screenshot-{timestamp}-{suffix}.png"
            else:
                screenshot_path = f"{self.screenshot_dir}/screenshot-{prefix}-{timestamp}-{suffix}.png"

            os.makedirs(os.path.dirname(screenshot_path), exist_ok=True)
            save_screenshot(screenshot, screenshot_path)
            self.current_screenshot_path = screenshot_path
            self.current_screenshot = screenshot_path

        return screenshot


================================================
FILE: PhoneClaw/experience.py
================================================
"""Experience log for PhoneClaw — procedural memory.

Persists a JSON file at ~/.PhoneClaw/experience_log.json that accumulates
execution know-how across all tasks:

  lessons       – app-specific and general-purpose lessons derived from task
                  traces (successful paths, failed approaches, UI knowledge)
  reinforcement – existing lessons gain confidence each time they are re-confirmed

The experience log is used in two ways:

  1. BEFORE execution  – get_hints_for(app, subtask) injects relevant lessons
     into the Executor system prompt so the VLM avoids previously-failed
     approaches and prefers previously-successful ones.

  2. AFTER execution   – extract_and_record(task, subtask_logs, ...) calls the
     VLM to derive structured lessons from the completed task trace and stores
     them for future runs.

Lesson types
------------
  successful_navigation  – a confirmed sequence or coordinate that reaches a goal
  failed_approach        – a coordinate / action that was tried and failed
  ui_knowledge           – layout fact about an app's UI (tab positions, etc.)
  timing                 – when to wait, how long animations take, etc.
  general                – cross-app advice
"""

from __future__ import annotations

import json
import re
from datetime import datetime
from pathlib import Path
from typing import Optional

DEFAULT_LOG_PATH = Path(__file__).parent / "data" / "experience_log.json"
SCHEMA_VERSION = 1

# Maximum number of lessons to retain (oldest pruned first)
MAX_LESSONS = 500

# Confidence tiers and their sort weights (higher = shown first)
CONFIDENCE_WEIGHT = {"high": 3, "medium": 2, "low": 1}

# Auto-compaction threshold: compact an app's lessons when its count reaches this
COMPACT_THRESHOLD = 20

# Target lesson count per app after compaction (roughly 1/3 of threshold)
COMPACT_TARGET = 8


# ---------------------------------------------------------------------------
# ExperienceLog
# ---------------------------------------------------------------------------

class ExperienceLog:
    """
    Persistent store of execution lessons for the PhoneClaw agent.

    Typical usage::

        exp = ExperienceLog()

        # before executing a subtask
        hints = exp.get_hints_for("Meituan", "Navigate to orders page")

        # after a complete task
        exp.extract_and_record(task, subtask_logs, final_answer, agent)
    """

    def __init__(self, log_path: Optional[str] = None):
        self.path = Path(log_path) if log_path else DEFAULT_LOG_PATH
        self.path.parent.mkdir(parents=True, exist_ok=True)
        self.data = self._load()

    # ------------------------------------------------------------------
    # Persistence
    # ------------------------------------------------------------------

    def _load(self) -> dict:
        if self.path.exists():
            try:
                with open(self.path, encoding="utf-8") as f:
                    raw = json.load(f)
                if raw.get("schema_version", 0) < SCHEMA_VERSION:
                    raw = self._migrate(raw)
                return raw
            except Exception as exc:
                print(f"[Experience] Warning: could not load log ({exc}). Starting fresh.")
        return self._empty_log()

    def save(self) -> None:
        self.data["last_updated"] = datetime.now().isoformat()
        with open(self.path, "w", encoding="utf-8") as f:
            json.dump(self.data, f, ensure_ascii=False, indent=2)

    def _empty_log(self) -> dict:
        return {
            "schema_version": SCHEMA_VERSION,
            "created_at": datetime.now().isoformat(),
            "last_updated": datetime.now().isoformat(),
            "stats": {
                "total_lessons": 0,
                "tasks_processed": 0,
                "compactions": 0,
            },
            "compaction_history": [],
            "lessons": [],
        }

    def _migrate(self, old: dict) -> dict:
        fresh = self._empty_log()
        for k in fresh:
            if k in old:
                fresh[k] = old[k]
        fresh["schema_version"] = SCHEMA_VERSION
        return fresh

    # ------------------------------------------------------------------
    # Adding lessons
    # ------------------------------------------------------------------

    def add_lesson(
        self,
        app: str,
        lesson_type: str,
        description: str,
        source_task: str,
        confidence: str = "medium",
    ) -> bool:
        """Store a single lesson.

        Performs semantic deduplication scoped to the same app: uses
        embedding cosine similarity (falls back to token-level Jaccard when
        the embedding API is unavailable).  If a semantically equivalent
        lesson already exists its ``reinforced`` counter is incremented and
        confidence may be upgraded; no new entry is created.

        Returns:
            True if new lesson was created; False if an existing one was reinforced.
        """
        from PhoneClaw.embeddings import is_semantic_duplicate

        description = description.strip()
        lessons: list = self.data["lessons"]

        # Restrict comparison to same-app lessons (bundle-ID-aware)
        same_app = [l for l in lessons if self._app_matches(app, l.get("app"))]
        same_app_texts = [l["description"] for l in same_app]

        dup_idx = is_semantic_duplicate(description, same_app_texts)
        if dup_idx >= 0:
            existing = same_app[dup_idx]
            existing["reinforced"] = existing.get("reinforced", 1) + 1
            if (
                CONFIDENCE_WEIGHT.get(confidence, 0)
                > CONFIDENCE_WEIGHT.get(existing["confidence"], 0)
            ):
                existing["confidence"] = confidence
            existing["last_seen"] = datetime.now().isoformat()
            self.save()
            return False

        # New lesson
        self.data["lessons"].append({
            "id": self.data["stats"]["total_lessons"] + 1,
            "app": app,
            "lesson_type": lesson_type,
            "description": description,
            "source_task": source_task[:120],
            "confidence": confidence,
            "reinforced": 1,
            "timestamp": datetime.now().isoformat(),
            "last_seen": datetime.now().isoformat(),
        })
        self.data["stats"]["total_lessons"] += 1

        # Prune oldest low-confidence lessons when over limit
        if len(self.data["lessons"]) > MAX_LESSONS:
            self.data["lessons"].sort(
                key=lambda x: (
                    CONFIDENCE_WEIGHT.get(x["confidence"], 1),
                    x.get("reinforced", 1),
                )
            )
            self.data["lessons"] = self.data["lessons"][-(MAX_LESSONS):]

        self.save()
        return True

    # ------------------------------------------------------------------
    # Hint injection for the Executor
    # ------------------------------------------------------------------

    @staticmethod
    def _app_matches(query: Optional[str], lesson_app: Optional[str]) -> bool:
        """Return True when *query* and *lesson_app* refer to the same iOS app.

        Matching is attempted in two ways:
        1. Case-insensitive name equality  (fast, no imports)
        2. Same bundle ID via APP_PACKAGES_IOS  (handles Chinese ↔ English
           aliases, e.g. "Xiaohongshu" == "小红书" because both map to
           "com.xingin.discover")
        """
        if not query or not lesson_app:
            return False
        if query.lower() == lesson_app.lower():
            return True
        try:
            from PhoneClaw.actions import APP_PACKAGES_IOS
            query_bundle = APP_PACKAGES_IOS.get(query, "")
            lesson_bundle = APP_PACKAGES_IOS.get(lesson_app, "")
            if query_bundle and lesson_bundle and query_bundle == lesson_bundle:
                return True
        except Exception:
            pass
        return False

    def get_hints_for(
        self,
        app_name: Optional[str],
        subtask_instruction: str,
        max_hints: int = 8,
    ) -> str:
        """
        Return a formatted ``## Experience Notes`` block to append to the
        Executor system prompt.

        Selects lessons that are relevant to *app_name* and/or the keywords
        in *subtask_instruction*.  Returns an empty string when there are no
        applicable lessons.

        App matching is bundle-ID-aware: English and Chinese aliases for the
        same app are treated as identical (e.g. "Xiaohongshu" matches lessons
        stored under "小红书").
        """
        if not self.data["lessons"]:
            return ""

        candidates: list[dict] = []

        # Keywords from the subtask for lightweight relevance filtering
        keywords = set(re.findall(r'\w+', subtask_instruction.lower()))

        for lesson in self.data["lessons"]:
            score = 0

            # App match — uses bundle-ID comparison to handle Chinese/English aliases
            if self._app_matches(app_name, lesson.get("app")):
                score += 4
            elif lesson.get("app") in ("", "general", None):
                score += 1

            # Keyword overlap with lesson description
            lesson_words = set(re.findall(r'\w+', lesson["description"].lower()))
            overlap = len(keywords & lesson_words)
            score += overlap

            # Confidence and reinforcement boost
            score += CONFIDENCE_WEIGHT.get(lesson["confidence"], 1)
            score += min(lesson.get("reinforced", 1) - 1, 3)  # cap bonus at 3

            if score >= 4:
                candidates.append((score, lesson))

        if not candidates:
            return ""

        # Sort by score descending, take top N
        candidates.sort(key=lambda x: x[0], reverse=True)
        top = [item for _, item in candidates[:max_hints]]

        lines = ["\n## Experience Notes (from past executions — use these to avoid repeating known mistakes)"]

        # Group by lesson type for readability
        successes = [l for l in top if l["lesson_type"] == "successful_navigation"]
        failures  = [l for l in top if l["lesson_type"] == "failed_approach"]
        ui_facts  = [l for l in top if l["lesson_type"] == "ui_knowledge"]
        others    = [l for l in top if l["lesson_type"] not in (
            "successful_navigation", "failed_approach", "ui_knowledge"
        )]

        if successes:
            lines.append("✓ What has worked before:")
            for l in successes:
                conf = f"[{l['confidence']}, confirmed {l['reinforced']}×]"
                lines.append(f"  • {l['description']}  {conf}")

        if failures:
            lines.append("✗ What has FAILED before — do NOT repeat:")
            for l in failures:
                conf = f"[seen {l['reinforced']}×]"
                lines.append(f"  • {l['description']}  {conf}")

        if ui_facts:
            lines.append("ℹ UI knowledge:")
            for l in ui_facts:
                lines.append(f"  • {l['description']}")

        for l in others:
            lines.append(f"  • {l['description']}")

        return "\n".join(lines) + "\n"

    # ------------------------------------------------------------------
    # VLM-powered lesson extraction
    # ------------------------------------------------------------------

    def extract_and_record(
        self,
        task: str,
        subtask_logs: list[dict],
        final_answer: Optional[str],
        agent,
    ) -> list[str]:
        """
        Ask the VLM to derive structured lessons from the task execution trace,
        then store each lesson.

        Args:
            task:          The original task instruction.
            subtask_logs:  List of per-subtask dicts built by RalphLoop (see loop.py).
            final_answer:  Final answer / outcome, if any.
            agent:         VLM agent with act(messages) -> str.

        Returns:
            List of newly added lesson description strings.
        """
        from PhoneClaw.prompts import (
            EXPERIENCE_EXTRACT_SYSTEM_PROMPT,
            EXPERIENCE_EXTRACT_USER_TEMPLATE,
        )

        trace_summary = self._build_trace_summary(task, subtask_logs, final_answer)
        if not trace_summary.strip():
            return []

        user_content = EXPERIENCE_EXTRACT_USER_TEMPLATE.format(
            trace_summary=trace_summary,
        )

        messages = [
            {"role": "system", "content": EXPERIENCE_EXTRACT_SYSTEM_PROMPT},
            {"role": "user", "content": user_content},
        ]

        try:
            response = agent.act(messages)
            raw_lessons = self._parse_lessons_response(response)
        except Exception as exc:
            print(f"[Experience] Could not extract lessons: {exc}")
            return []

        self.data["stats"]["tasks_processed"] += 1

        added: list[str] = []
        for item in raw_lessons:
            app       = item.get("app", "general") or "general"
            ltype     = item.get("lesson_type", "general")
            desc      = str(item.get("description", "")).strip()
            conf      = item.get("confidence", "medium")

            if not desc or len(desc) < 8:
                continue

            # Normalise lesson type
            if ltype not in (
                "successful_navigation", "failed_approach",
                "ui_knowledge", "timing", "general"
            ):
                ltype = "general"

            is_new = self.add_lesson(
                app=app,
                lesson_type=ltype,
                description=desc,
                source_task=task,
                confidence=conf,
            )
            added.append(desc)
            status = "new" if is_new else "reinforced"
            print(f"[Experience] {status}: [{app}] {desc[:80]}")

        # Auto-compact any app that has accumulated too many lessons
        self.compact_if_needed(agent)

        return added

    def _build_trace_summary(
        self,
        task: str,
        subtask_logs: list[dict],
        final_answer: Optional[str],
    ) -> str:
        """Format the execution trace into readable text for the VLM."""
        lines = [f"Task: {task}\n"]

        for entry in subtask_logs:
            status = "PASSED" if entry.get("passed") else "FAILED"
            app_tag = f" [app: {entry['app']}]" if entry.get("app") else ""
            lines.append(f"Subtask{app_tag}: {entry['instruction']}")
            lines.append(f"  Outcome: {status}")

            actions = entry.get("actions", [])
            if actions:
                for act in actions:
                    result_icon = "✓" if act.get("passed") else "✗"
                    reason = act.get("reason", "")[:100]
                    lines.append(f"    {result_icon} {act['action']}  → {reason}")
            lines.append("")

        if final_answer:
            lines.append(f"Final answer: {final_answer}")

        return "\n".join(lines)

    def _parse_lessons_response(self, response: str) -> list[dict]:
        """Parse the VLM's JSON array response into lesson dicts."""
        try:
            start = response.index("[")
            end = response.rindex("]") + 1
            items = json.loads(response[start:end])
            if isinstance(items, list):
                return [i for i in items if isinstance(i, dict)]
        except (ValueError, json.JSONDecodeError):
            pass
        return []

    # ------------------------------------------------------------------
    # Compaction: consolidate redundant lessons via VLM
    # ------------------------------------------------------------------

    def compact_if_needed(
        self,
        agent,
        threshold: int = COMPACT_THRESHOLD,
        target: int = COMPACT_TARGET,
    ) -> list[str]:
        """Check each app's lesson count and compact any that exceed *threshold*.

        Compaction calls the VLM to merge near-duplicate lessons, remove
        low-value entries (e.g. individual keystrokes), and generalise
        coordinates — reducing storage and improving hint quality.

        Args:
            agent:     VLM agent with act(messages) -> str.
            threshold: Compact an app when it has at least this many lessons.
            target:    Desired lesson count per app after compaction.

        Returns:
            List of app names that were compacted.
        """
        # Count per-app lessons
        app_counts: dict[str, int] = {}
        for lesson in self.data["lessons"]:
            app = lesson.get("app") or "general"
            app_counts[app] = app_counts.get(app, 0) + 1

        compacted: list[str] = []
        for app, count in app_counts.items():
            if count >= threshold:
                print(
                    f"[Experience] '{app}' has {count} lessons "
                    f"(threshold={threshold}) — compacting..."
                )
                n_before, n_after = self.compact_app_lessons(
                    app_name=app, agent=agent, target=target
                )
                if n_after < n_before:
                    compacted.append(app)
                    print(
                        f"[Experience] '{app}' compacted: "
                        f"{n_before} → {n_after} lessons"
                    )
                else:
                    print(
                        f"[Experience] '{app}' compaction returned no improvement."
                    )

        return compacted

    def compact_app_lessons(
        self,
        app_name: str,
        agent,
        target: int = COMPACT_TARGET,
    ) -> tuple[int, int]:
        """Use the VLM to consolidate all lessons for *app_name* into a
        compact, high-quality set.

        The raw lessons are replaced in-place with the consolidated output.
        A record is appended to ``compaction_history``.

        Args:
            app_name: Name of the app whose lessons to compact.
            agent:    VLM agent with act(messages) -> str.
            target:   Desired lesson count after compaction.

        Returns:
            (n_before, n_after) lesson counts.
        """
        from PhoneClaw.prompts import (
            EXPERIENCE_COMPACT_SYSTEM_PROMPT,
            EXPERIENCE_COMPACT_USER_TEMPLATE,
        )

        app_lessons = [
            l for l in self.data["lessons"]
            if self._app_matches(app_name, l.get("app"))
        ]
        other_lessons = [
            l for l in self.data["lessons"]
            if not self._app_matches(app_name, l.get("app"))
        ]

        n_before = len(app_lessons)
        if n_before == 0:
            return 0, 0

        # Build a compact representation to send to VLM (omit internal fields)
        lessons_for_vlm = [
            {
                "description": l["description"],
                "lesson_type": l["lesson_type"],
                "confidence": l["confidence"],
                "reinforced": l.get("reinforced", 1),
            }
            for l in app_lessons
        ]

        system_content = EXPERIENCE_COMPACT_SYSTEM_PROMPT.replace(
            "{target_count}", str(target)
        )
        user_content = EXPERIENCE_COMPACT_USER_TEMPLATE.format(
            app_name=app_name,
            lesson_count=n_before,
            target_count=target,
            lessons_json=json.dumps(lessons_for_vlm, ensure_ascii=False, indent=2),
        )

        messages = [
            {"role": "system", "content": system_content},
            {"role": "user", "content": user_content},
        ]

        try:
            response = agent.act(messages)
            compact_items = self._parse_lessons_response(response)
        except Exception as exc:
            print(f"[Experience] Compaction VLM call failed for '{app_name}': {exc}")
            return n_before, n_before

        if not compact_items:
            print(f"[Experience] VLM returned no items for '{app_name}' — keeping original.")
            return n_before, n_before

        now = datetime.now().isoformat()
        new_lessons: list[dict] = []
        for item in compact_items:
            desc = str(item.get("description", "")).strip()
            if not desc:
                continue
            new_lessons.append({
                "id": self.data["stats"]["total_lessons"] + len(new_lessons) + 1,
                "app": app_name,
                "lesson_type": item.get("lesson_type", "general"),
                "description": desc,
                "source_task": "compaction",
                "confidence": item.get("confidence", "medium"),
                "reinforced": max(1, int(item.get("reinforced", 1))),
                "timestamp": now,
                "last_seen": now,
                "compacted": True,
            })

        # Replace app lessons with compacted set
        self.data["lessons"] = other_lessons + new_lessons

        # Update stats
        self.data["stats"].setdefault("compactions", 0)
        self.data["stats"]["compactions"] += 1
        self.data["stats"]["total_lessons"] = len(self.data["lessons"])

        # Record history entry
        self.data.setdefault("compaction_history", []).append({
            "app": app_name,
            "before": n_before,
            "after": len(new_lessons),
            "timestamp": now,
        })

        self.save()
        return n_before, len(new_lessons)

    def compact_all(self, agent, target: int = COMPACT_TARGET) -> dict[str, tuple[int, int]]:
        """Compact lessons for ALL apps regardless of lesson count.

        Useful for a one-off cleanup of an existing log that has accumulated
        many redundant entries.

        Returns:
            Dict mapping app_name → (n_before, n_after).
        """
        apps = list({
            (l.get("app") or "general")
            for l in self.data["lessons"]
        })
        results: dict[str, tuple[int, int]] = {}
        for app in apps:
            print(f"[Experience] Compacting all lessons for '{app}'...")
            results[app] = self.compact_app_lessons(app, agent, target=target)
        return results

    # ------------------------------------------------------------------
    # Display helpers
    # ------------------------------------------------------------------

    def summary_banner(self) -> str:
        stats = self.data["stats"]
        return (
            f"[Experience] Log: {self.path}  |  "
            f"Lessons: {stats['total_lessons']}  |  "
            f"Tasks processed: {stats['tasks_processed']}"
        )

    def get_lessons_for_app(self, app_name: str) -> list[dict]:
        """Return all lessons for a given app, sorted by confidence.

        App matching is bundle-ID-aware so that English and Chinese aliases
        (e.g. "Xiaohongshu" and "小红书") return the same set of lessons.
        """
        return sorted(
            [l for l in self.data["lessons"] if self._app_matches(app_name, l.get("app"))],
            key=lambda x: (
                CONFIDENCE_WEIGHT.get(x["confidence"], 1),
                x.get("reinforced", 1),
            ),
            reverse=True,
        )

    def __repr__(self) -> str:
        stats = self.data["stats"]
        return (
            f"ExperienceLog(lessons={stats['total_lessons']}, "
            f"tasks={stats['tasks_processed']}, path={self.path})"
        )


================================================
FILE: PhoneClaw/hierarchy.py
================================================
"""iOS UI Hierarchy - get and parse iOS page source for element labeling."""

import xml.etree.ElementTree as ET
from typing import List, Optional, Tuple
from dataclasses import dataclass


@dataclass
class IOSElement:
    """Represents an iOS UI element."""
    uid: str
    bbox: Tuple[Tuple[int, int], Tuple[int, int]]  # ((x1, y1), (x2, y2))
    attrib: str  # "clickable" or "focusable"
    element_type: str  # XCUIElementTypeButton, etc.
    name: Optional[str] = None
    label: Optional[str] = None
    identifier: Optional[str] = None


def get_page_source(
    wda_url: str = "http://localhost:8100",
    session_id: Optional[str] = None,
    timeout: int = 10
) -> Optional[str]:
    """
    Get iOS page source (XML hierarchy) via WebDriverAgent.

    Args:
        wda_url: WebDriverAgent URL.
        session_id: Optional WDA session ID.
        timeout: Request timeout in seconds.

    Returns:
        XML string of the page source, or None if failed.
    """
    try:
        import requests

        urls_to_try = []
        if session_id:
            urls_to_try.append(f"{wda_url.rstrip('/')}/session/{session_id}/source")
        urls_to_try.append(f"{wda_url.rstrip('/')}/source")

        last_error = None
        for url in urls_to_try:
            try:
                response = requests.get(url, timeout=timeout, verify=False)

                if response.status_code == 200:
                    try:
                        data = response.json()
                        source = None

                        if isinstance(data, dict):
                            source = data.get("value")

                            if isinstance(source, dict):
                                source = source.get("source") or source.get("value")

                            if source is None:
                                source = data.get("source")

                            if source is None and isinstance(data.get("value"), dict):
                                source = data.get("value", {}).get("source")

                            if isinstance(source, dict):
                                source = source.get("source") or source.get("value")
                        else:
                            source = str(data) if data else None

                        if source and isinstance(source, str) and len(source.strip()) > 0:
                            source = source.strip()
                            if (source.startswith('"') and source.endswith('"')) or \
                               (source.startswith("'") and source.endswith("'")):
                                source = source[1:-1]

                            source = source.replace('\\n', '\n').replace('\\t', '\t').replace('\\r', '\r')
                            source = source.replace('\\"', '"').replace("\\'", "'")

                            source_stripped = source.strip()
                            if source_stripped.startswith('<') or '<?xml' in source_stripped[:100]:
                                try:
                                    ET.fromstring(source_stripped)
                                    return source
                                except ET.ParseError:
                                    print(f"Warning: XML from {url} may have parsing issues, returning anyway")
                                    return source
                            else:
                                return source
                        else:
                            print(f"Warning: Empty or invalid page source from {url}")

                    except ValueError:
                        if response.text and len(response.text.strip()) > 0:
                            text = response.text.strip()
                            if text.startswith('<') or '<?xml' in text[:100]:
                                return text

                elif response.status_code == 404:
                    continue
                elif response.status_code == 500:
                    last_error = f"Server error (500) from {url}"
                    continue
                else:
                    last_error = f"HTTP {response.status_code} from {url}"
                    continue

            except Exception as e:
                last_error = f"Error getting page source from {url}: {e}"
                continue

        if last_error:
            print(f"Failed to get page source. Last error: {last_error}")
        return None

    except ImportError:
        print("Error: requests library required. Install: pip install requests")
        return None
    except Exception as e:
        print(f"Error getting page source: {e}")
        return None


def parse_bounds(bounds_str: str) -> Optional[Tuple[Tuple[int, int], Tuple[int, int]]]:
    """
    Parse bounds string from iOS XML.

    iOS bounds format: "{{x, y}, {width, height}}" or "x,y,width,height"
    """
    if not bounds_str:
        return None

    try:
        if "{{" in bounds_str:
            bounds_str = bounds_str.replace("{{", "").replace("}}", "").replace("{", "").replace("}", "")
            parts = bounds_str.split(",")
            if len(parts) >= 4:
                x = int(float(parts[0].strip()))
                y = int(float(parts[1].strip()))
                width = int(float(parts[2].strip()))
                height = int(float(parts[3].strip()))
                return ((x, y), (x + width, y + height))

        parts = bounds_str.split(",")
        if len(parts) >= 4:
            x = int(float(parts[0].strip()))
            y = int(float(parts[1].strip()))
            width = int(float(parts[2].strip()))
            height = int(float(parts[3].strip()))
            return ((x, y), (x + width, y + height))

        return None
    except Exception as e:
        print(f"Error parsing bounds '{bounds_str}': {e}")
        return None


def get_element_bounds(element: ET.Element) -> Optional[Tuple[Tuple[int, int], Tuple[int, int]]]:
    """Get bounds from an iOS XML element."""
    bounds_str = element.get('bounds', '')
    if bounds_str:
        bbox = parse_bounds(bounds_str)
        if bbox:
            return bbox

    try:
        x_str = element.get('x', '')
        y_str = element.get('y', '')
        width_str = element.get('width', '')
        height_str = element.get('height', '')

        if x_str and y_str and width_str and height_str:
            x = int(float(x_str))
            y = int(float(y_str))
            width = int(float(width_str))
            height = int(float(height_str))
            return ((x, y), (x + width, y + height))
    except (ValueError, TypeError):
        pass

    return None


def get_element_id(element: ET.Element) -> str:
    """Generate a unique ID for an iOS element."""
    element_type = element.tag if hasattr(element, 'tag') else element.get('type', 'Unknown')

    bbox = get_element_bounds(element)
    if bbox:
        elem_w = bbox[1][0] - bbox[0][0]
        elem_h = bbox[1][1] - bbox[0][1]
    else:
        elem_w, elem_h = 0, 0

    identifier = element.get('name') or element.get('identifier') or element.get('label', '')

    if identifier:
        elem_id = f"{element_type}_{identifier.replace(' ', '_').replace(':', '_')}"
    else:
        elem_id = f"{element_type}_{elem_w}_{elem_h}"

    return elem_id


def is_interactive_element(element: ET.Element) -> bool:
    """Check if an iOS element is interactive (clickable/focusable)."""
    interactive_types = [
        'XCUIElementTypeButton',
        'XCUIElementTypeCell',
        'XCUIElementTypeTextField',
        'XCUIElementTypeSecureTextField',
        'XCUIElementTypeSearchField',
        'XCUIElementTypeSlider',
        'XCUIElementTypeSwitch',
        'XCUIElementTypeTab',
        'XCUIElementTypeLink',
        'XCUIElementTypeImage',
        'XCUIElementTypeIcon',
        'XCUIElementTypeStaticText',
    ]

    element_type = element.tag if hasattr(element, 'tag') else element.get('type', '')

    is_interactive_type = any(interactive_type in element_type for interactive_type in interactive_types)

    if not is_interactive_type:
        return False

    enabled = element.get('enabled', 'true')
    if enabled == 'false':
        return False

    visible = element.get('visible', 'true')
    if visible == 'false':
        return False

    bbox = get_element_bounds(element)
    if not bbox:
        return False

    x1, y1 = bbox[0]
    x2, y2 = bbox[1]
    width = x2 - x1
    height = y2 - y1

    if width <= 0 or height <= 0:
        return False

    if x1 == 0 and y1 == 0 and x2 == 0 and y2 == 0:
        return False

    return True


def traverse_ios_tree(
    xml_string: str,
    elem_list: List[IOSElement],
    attrib: str = "clickable",
    add_index: bool = False
):
    """Traverse iOS XML tree and extract interactive elements."""
    try:
        root = ET.fromstring(xml_string)
    except ET.ParseError:
        return
    except Exception:
        return

    def traverse(node, path=[]):
        path = path + [node]

        if is_interactive_element(node):
            bbox = get_element_bounds(node)

            if bbox:
                center = ((bbox[0][0] + bbox[1][0]) // 2, (bbox[0][1] + bbox[1][1]) // 2)

                close = False
                for e in elem_list:
                    e_bbox = e.bbox
                    e_center = ((e_bbox[0][0] + e_bbox[1][0]) // 2,
                               (e_bbox[0][1] + e_bbox[1][1]) // 2)
                    dist = ((center[0] - e_center[0]) ** 2 +
                           (center[1] - e_center[1]) ** 2) ** 0.5
                    if dist <= 5:
                        close = True
                        break

                if not close:
                    elem_id = get_element_id(node)

                    if len(path) > 1:
                        parent_id = get_element_id(path[-2])
                        elem_id = f"{parent_id}_{elem_id}"

                    if add_index:
                        index = node.get('index', '0')
                        elem_id += f"_{index}"

                    element = IOSElement(
                        uid=elem_id,
                        bbox=bbox,
                        attrib=attrib,
                        element_type=node.tag if hasattr(node, 'tag') else node.get('type', ''),
                        name=node.get('name'),
                        label=node.get('label'),
                        identifier=node.get('identifier')
                    )
                    elem_list.append(element)

        for child in node:
            traverse(child, path)

    traverse(root)


def get_ios_elements(xml_string: str) -> List[IOSElement]:
    """Extract interactive elements from iOS XML."""
    if not xml_string or len(xml_string.strip()) == 0:
        return []

    clickable_list = []
    focusable_list = []

    traverse_ios_tree(xml_string, clickable_list, "clickable", True)
    traverse_ios_tree(xml_string, focusable_list, "focusable", True)

    elem_list = list(clickable_list)

    for elem in focusable_list:
        bbox = elem.bbox
        center = ((bbox[0][0] + bbox[1][0]) // 2, (bbox[0][1] + bbox[1][1]) // 2)
        close = False
        for e in clickable_list:
            e_bbox = e.bbox
            e_center = ((e_bbox[0][0] + e_bbox[1][0]) // 2,
                       (e_bbox[0][1] + e_bbox[1][1]) // 2)
            dist = ((center[0] - e_center[0]) ** 2 +
                   (center[1] - e_center[1]) ** 2) ** 0.5
            if dist <= 10:
                close = True
                break
        if not close:
            elem_list.append(elem)

    return elem_list


================================================
FILE: PhoneClaw/keepalive.py
================================================
"""Screen keepalive for PhoneClaw.

Strategy
--------
Primary — ``idleTimerDisabled`` (no touch required)
    WDA exposes ``POST /wda/settings`` which maps directly to Apple's
    ``[[UIApplication sharedApplication] setIdleTimerDisabled:YES]``.
    Setting this flag tells iOS to never engage the auto-lock idle timer for
    the duration of the WDA session, without touching any UI element at all.

    A background thread runs ``GET /status`` every *interval* seconds purely
    to keep the WDA HTTP session from timing out (some proxies/firewalls drop
    idle TCP connections after 30–60 s).

Fallback — periodic touch (when ``idleTimerDisabled`` is not supported)
    Older / custom WDA builds may not expose the ``idleTimerDisabled``
    setting.  In that case the keepalive falls back to sending a synthetic
    touch event every *interval* seconds via the W3C Actions API.

    Touch target: the **horizontal centre, vertical middle** of the screen
    (x = 50 %, y = 50 %).  The middle of the screen is the one area that
    is most reliably neutral across all apps — it avoids:
    - Status bar (scroll-to-top trigger)
    - Dynamic Island / notch (live-activity expansion)
    - Bottom home-indicator strip (may be interactive in some apps)
    - Left/right edges (back-swipe gesture zones)
    The downside is that in rare cases it may tap on a button in the current
    app; this is acceptable because the fallback is only used when the cleaner
    API is unavailable.

Usage::

    keepalive = ScreenKeepalive(wda_url="http://localhost:8100",
                                session_id="...",
                                interval=25)
    keepalive.start()
    ...                 # runs in background while your main code executes
    keepalive.stop()    # or just let the process exit (daemon thread auto-dies)
"""

import threading
from typing import Optional

import requests


class ScreenKeepalive:
    """
    Prevents the iOS device screen from sleeping during an interactive session.

    On ``start()``:
      1. Sends ``POST /wda/settings {"settings": {"idleTimerDisabled": true}}``
         to disable iOS auto-lock at the system level — no touches needed.
      2. Starts a lightweight daemon thread that pings ``GET /status`` every
         *interval* seconds to keep the WDA HTTP session alive.

    On ``stop()``:
      1. Re-enables the idle timer via ``idleTimerDisabled: false``.
      2. Stops the background thread.

    If the ``idleTimerDisabled`` setting is not supported by the WDA build, a
    warning is printed and the keepalive falls back to a periodic synthetic
    touch event (see module docstring).
    """

    def __init__(
        self,
        wda_url: str = "http://localhost:8100",
        session_id: Optional[str] = None,
        interval: float = 25.0,
        verbose: bool = True,
    ):
        """
        Args:
            wda_url:    WebDriverAgent base URL.
            session_id: WDA session ID (required).
            interval:   Seconds between WDA heartbeat pings / fallback taps.
                        Should be shorter than the device auto-lock timeout.
            verbose:    Print a brief log line on start/stop.
        """
        self.wda_url = wda_url.rstrip("/")
        self.session_id = session_id
        self.interval = interval
        self.verbose = verbose

        self._stop = threading.Event()
        self._thread: Optional[threading.Thread] = None
        # True when idleTimerDisabled was successfully set — so we know to
        # clear it on stop().
        self._idle_timer_disabled = False
        # True when the primary API is unavailable and we use touch fallback.
        self._using_touch_fallback = False

    # ------------------------------------------------------------------
    # Public interface
    # ------------------------------------------------------------------

    def start(self) -> None:
        """Enable screen keepalive and start the background heartbeat thread."""
        if self._thread and self._thread.is_alive():
            return

        self._idle_timer_disabled = self._set_idle_timer_disabled(True)

        if self._idle_timer_disabled:
            if self.verbose:
                print(
                    f"[Keepalive] idleTimerDisabled=true — screen will stay on "
                    f"without touch events.  Heartbeat every {self.interval}s."
                )
            loop_target = self._heartbeat_loop
        else:
            # idleTimerDisabled not supported by this WDA build — fall back to
            # a periodic touch event.
            self._using_touch_fallback = True
            if self.verbose:
                print(
                    f"[Keepalive] idleTimerDisabled not supported — falling back "
                    f"to touch keepalive every {self.interval}s."
                )
            loop_target = self._touch_loop

        self._stop.clear()
        self._thread = threading.Thread(
            target=loop_target,
            name="ScreenKeepalive",
            daemon=True,
        )
        self._thread.start()

    def stop(self) -> None:
        """Stop keepalive and restore the idle timer."""
        self._stop.set()
        if self._thread and self._thread.is_alive():
            self._thread.join(timeout=self.interval + 2)

        if self._idle_timer_disabled:
            self._set_idle_timer_disabled(False)
            self._idle_timer_disabled = False

        if self.verbose:
            print("[Keepalive] Stopped.")

    # ------------------------------------------------------------------
    # Primary: system-level idle timer control
    # ------------------------------------------------------------------

    def _set_idle_timer_disabled(self, disabled: bool) -> bool:
        """
        Call ``POST /wda/settings`` to enable or disable the iOS idle timer.

        Returns True on success, False if the setting is unsupported or the
        request fails.
        """
        url = f"{self.wda_url}/wda/settings"
        try:
            r = requests.post(
                url,
                json={"settings": {"idleTimerDisabled": disabled}},
                timeout=8,
            )
            if r.ok:
                return True
            # WDA returns 400/500 when the setting is unknown
            return False
        except Exception:
            return False

    # ------------------------------------------------------------------
    # Primary loop: lightweight WDA heartbeat (no UI interaction)
    # ------------------------------------------------------------------

    def _heartbeat_loop(self) -> None:
        """Ping ``GET /status`` to keep the WDA TCP session alive."""
        while not self._stop.wait(self.interval):
            try:
                requests.get(f"{self.wda_url}/status", timeout=8)
            except Exception as exc:
                if self.verbose:
                    print(f"[Keepalive] Heartbeat warning: {exc}")

    # ------------------------------------------------------------------
    # Fallback loop: synthetic touch event
    # ------------------------------------------------------------------

    def _touch_loop(self) -> None:
        """Send a synthetic touch to the screen centre every *interval* seconds."""
        while not self._stop.wait(self.interval):
            try:
                self._touch_centre()
            except Exception as exc:
                if self.verbose:
                    print(f"[Keepalive] Touch warning: {exc}")

    def _touch_centre(self) -> None:
        """
        Send a single synthetic tap to the centre of the screen (50 %, 50 %).

        The screen centre is chosen as the least-disruptive touch position
        available as a fallback: it avoids the status bar (scroll-to-top),
        Dynamic Island / notch, home-indicator strip, and edge-swipe zones.
        """
        w, h = self._get_logical_size()
        x = w // 2
        y = h // 2

        url = f"{self.wda_url}/session/{self.session_id}/actions"
        payload = {
            "actions": [
                {
                    "type": "pointer",
                    "id": "keepalive_finger",
                    "parameters": {"pointerType": "touch"},
                    "actions": [
                        {"type": "pointerMove", "duration": 0, "x": x, "y": y},
                        {"type": "pointerDown", "button": 0},
                        {"type": "pause",       "duration": 50},
                        {"type": "pointerUp",   "button": 0},
                    ],
                }
            ]
        }
        r = requests.post(url, json=payload, timeout=8)
        if self.verbose:
            status = "ok" if r.ok else f"HTTP {r.status_code}"
            print(f"[Keepalive] Touch centre ({x}, {y}) — {status}")

    def _get_logical_size(self) -> tuple[int, int]:
        """Return logical screen dimensions, queried once and cached."""
        if hasattr(self, "_logi_w") and self._logi_w:
            return self._logi_w, self._logi_h  # type: ignore[return-value]

        try:
            url = f"{self.wda_url}/session/{self.session_id}/window/size"
            r = requests.get(url, timeout=5)
            if r.ok:
                v = r.json().get("value", {})
                self._logi_w = int(v.get("width",  393))
                self._logi_h = int(v.get("height", 852))
                return self._logi_w, self._logi_h
        except Exception:
            pass

        self._logi_w, self._logi_h = 393, 852
        return self._logi_w, self._logi_h


================================================
FILE: PhoneClaw/labeling.py
================================================
"""iOS Screenshot Labeling - draw bounding boxes and labels on screenshots."""

import cv2
import os
from typing import List, Optional
from PhoneClaw.hierarchy import IOSElement

try:
    import pyshine as ps
except ImportError:
    try:
        import puttext as ps
    except ImportError:
        ps = None

IOS_SCALE_FACTOR = 3


def _get_scale_factor(img_path: str) -> float:
    """
    Calculate scale factor between logical coordinates and physical screenshot.

    Returns:
        Scale factor (typically 3.0 for modern iPhones).
    """
    try:
        img = cv2.imread(img_path)
        if img is None:
            return IOS_SCALE_FACTOR

        height, width = img.shape[:2]

        if width >= 1100:
            if abs(width / 3 - 393) < 10:
                return 3.0
            elif abs(width / 3 - 390) < 10:
                return 3.0
            elif abs(width / 2 - 375) < 10:
                return 2.0
            else:
                return width / 375.0
        else:
            return 1.0
    except Exception:
        return IOS_SCALE_FACTOR


def draw_bbox_multi_ios(
    img_path: str,
    output_path: str,
    elem_list: List[IOSElement],
    record_mode: bool = False,
    dark_mode: bool = False,
    scale_factor: Optional[float] = None
):
    """
    Draw bounding boxes and labels on iOS screenshot.

    Args:
        img_path: Path to input screenshot.
        output_path: Path to save labeled screenshot.
        elem_list: List of IOSElement objects (bboxes in logical coordinates).
        record_mode: Whether to use record mode coloring.
        dark_mode: Whether to use dark mode colors.
        scale_factor: Optional scale factor to convert logical to physical coordinates.
    """
    if not os.path.exists(img_path):
        print(f"Error: Image file not found: {img_path}")
        return None

    imgcv = cv2.imread(img_path)
    if imgcv is None:
        print(f"Error: Failed to read image: {img_path}")
        return None

    if scale_factor is None:
        scale_factor = _get_scale_factor(img_path)

    count = 1
    for elem in elem_list:
        try:
            if not elem.bbox or not isinstance(elem.bbox, (tuple, list)) or len(elem.bbox) < 2:
                count += 1
                continue

            top_left = elem.bbox[0]
            bottom_right = elem.bbox[1]

            if not top_left or not bottom_right:
                count += 1
                continue

            if not isinstance(top_left, (tuple, list)) or len(top_left) < 2:
                count += 1
                continue

            if not isinstance(bottom_right, (tuple, list)) or len(bottom_right) < 2:
                count += 1
                continue

            left = int(top_left[0] * scale_factor)
            top = int(top_left[1] * scale_factor)
            right = int(bottom_right[0] * scale_factor)
            bottom = int(bottom_right[1] * scale_factor)

            if not all(isinstance(coord, (int, float)) for coord in [left, top, right, bottom]):
                count += 1
                continue

            if any(not (isinstance(coord, (int, float)) and -1000000 < coord < 1000000)
                   for coord in [left, top, right, bottom]):
                count += 1
                continue

            if right <= left or bottom <= top:
                count += 1
                continue

            label = str(count)

            if record_mode:
                if elem.attrib == "clickable":
                    color = (250, 0, 0)
                elif elem.attrib == "focusable":
                    color = (0, 0, 250)
                else:
                    color = (0, 250, 0)

                if ps:
                    imgcv = ps.putBText(
                        imgcv, label,
                        text_offset_x=(left + right) // 2 + 10,
                        text_offset_y=(top + bottom) // 2 + 10,
                        vspace=10, hspace=10, font_scale=1, thickness=2,
                        background_RGB=color, text_RGB=(255, 250, 250), alpha=0.5
                    )
                else:
                    cv2.rectangle(imgcv, (left, top), (right, bottom), color, 2)
                    cv2.putText(imgcv, label, ((left + right) // 2, (top + bottom) // 2),
                               cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 250, 250), 2)
            else:
                text_color = (10, 10, 10) if dark_mode else (255, 250, 250)
                bg_color = (255, 250, 250) if dark_mode else (10, 10, 10)

                if ps:
                    imgcv = ps.putBText(
                        imgcv, label,
                        text_offset_x=(left + right) // 2 + 10,
                        text_offset_y=(top + bottom) // 2 + 10,
                        vspace=10, hspace=10, font_scale=2, thickness=2,
                        background_RGB=bg_color, text_RGB=text_color, alpha=0.5
                    )
                else:
                    cv2.rectangle(imgcv, (left, top), (right, bottom), (0, 255, 0), 2)
                    cv2.putText(imgcv, label, ((left + right) // 2, (top + bottom) // 2),
                               cv2.FONT_HERSHEY_SIMPLEX, 2, text_color, 2)
        except Exception as e:
            print(f"ERROR: An exception occurs while labeling the image\n{e}")

        count += 1

    os.makedirs(os.path.dirname(os.path.abspath(output_path)), exist_ok=True)
    cv2.imwrite(output_path, imgcv)
    return imgcv


================================================
FILE: PhoneClaw/learn.py
================================================
"""Learning mode for PhoneClaw — record human demonstrations and extract lessons.

How it works
------------
1. DemoRecorder polls the device screen at ~8 fps using WDA.
2. Each frame pair is compared.  When a significant change is detected:
   a. The diff image is analysed with OpenCV HoughCircles to locate the iOS
      "Show Touches" indicator (a semi-transparent circle that appears at the
      tap point when Settings → Developer → Show Touches is enabled).
   b. If no circle is found the centroid of the largest changed region is used
      as a fallback estimate.
3. Each changed frame plus its estimated tap coordinate is saved to disk.
4. After recording ends the VLM is called for each frame to extract reusable
   navigation lessons which are stored in the ExperienceLog.

Prerequisites for best results
-------------------------------
Enable "Show Touches" on the iOS device before starting the demo:

  iOS 16+:  Settings → Privacy & Security → Developer Mode → Show Touches
  Older:    Settings → Accessibility → Touch → Show Touches

With "Show Touches" active, every tap leaves a white-circle overlay visible
in WDA screenshots.  HoughCircles detects the circle and returns the exact
tap centre.  Without it the module still works but falls back to the centre
of the changed screen region (less precise for large animations).
"""

from __future__ import annotations

import base64
import io
import json
import threading
import time
from dataclasses import dataclass
from pathlib import Path
from typing import Optional

import cv2
import numpy as np
from PIL import Image, ImageDraw

from PhoneClaw.screenshot import get_screenshot

DEFAULT_DEMO_BASE = Path(__file__).parent / "data" / "demos"

# ---------------------------------------------------------------------------
# HoughCircles parameters — tuned for typical iOS Retina screenshots.
# The touch indicator is a semi-transparent circle roughly 25–40 logical pts
# in diameter; at 3× scale that is ~75–120 px, but WDA returns screenshots
# at logical resolution (typically 390–430 pt wide), so effective radii are
# 12–50 px.
# ---------------------------------------------------------------------------
_HOUGH_DP = 1.5         # inverse ratio of accumulator resolution
_HOUGH_MIN_DIST = 40    # minimum distance between detected circle centres
_HOUGH_PARAM1 = 60      # Canny edge upper threshold applied to diff image
_HOUGH_PARAM2 = 16      # accumulator threshold — lower = more permissive
_HOUGH_MIN_R = 12       # minimum radius in px
_HOUGH_MAX_R = 55       # maximum radius in px

# Pixel intensity threshold for the abs-diff map (0-255).
_DIFF_PIXEL_THRESHOLD = 20

# Default minimum fraction of pixels that must change to count as an event.
_CHANGE_THRESHOLD_DEFAULT = 0.003   # 0.3 %


# ---------------------------------------------------------------------------
# Frame data
# ---------------------------------------------------------------------------

@dataclass
class DemoFrame:
    """One recorded screen-change event."""
    idx: int
    timestamp: float
    screenshot_b64: str     # annotated screenshot (tap marked with red circle)
    width: int
    height: int
    tap_x_rel: Optional[float]   # normalised [0, 1], None if not detected
    tap_y_rel: Optional[float]
    detection_method: str        # "hough_circles" | "diff_centroid" | "none"
    change_pct: float            # fraction of pixels that changed [0, 1]


# ---------------------------------------------------------------------------
# Touch indicator detection
# ---------------------------------------------------------------------------

def _b64_to_gray(b64: str) -> np.ndarray:
    """Decode a base-64 PNG and return a grayscale numpy array."""
    data = base64.b64decode(b64)
    arr = np.frombuffer(data, dtype=np.uint8)
    img = cv2.imdecode(arr, cv2.IMREAD_COLOR)
    if img is None:
        raise ValueError("Could not decode screenshot image")
    return cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)


def _compute_diff(
    gray_before: np.ndarray, gray_after: np.ndarray
) -> tuple[np.ndarray, float]:
    """Return (abs_diff_image, fraction_of_changed_pixels)."""
    diff = cv2.absdiff(gray_before, gray_after)
    _, binary = cv2.threshold(diff, _DIFF_PIXEL_THRESHOLD, 255, cv2.THRESH_BINARY)
    change_pct = float(np.count_nonzero(binary)) / binary.size
    return diff, change_pct


def _detect_tap(
    diff: np.ndarray,
    img_w: int,
    img_h: int,
) -> tuple[Optional[float], Optional[float], str]:
    """Estimate the tap position from an abs-diff frame.

    Method 1 (preferred): HoughCircles on a Gaussian-blurred diff image.
        The "Show Touches" overlay manifests as a roughly circular bright
        region in the diff map.  When detected we return the circle centre.

    Method 2 (fallback): Centroid of the largest connected changed component.
        Works even without Show Touches, but is less precise when a large
        area of the UI changes (e.g. a page transition animation).

    Returns:
        (rel_x, rel_y, method_name) — coordinates in [0, 1] relative to the
        image dimensions, or (None, None, "none") if localisation failed.
    """
    # ── Method 1: HoughCircles on diff ────────────────────────────────────
    diff_blur = cv2.GaussianBlur(diff, (5, 5), 1.5)
    circles = cv2.HoughCircles(
        diff_blur,
        cv2.HOUGH_GRADIENT,
        dp=_HOUGH_DP,
        minDist=_HOUGH_MIN_DIST,
        param1=_HOUGH_PARAM1,
        param2=_HOUGH_PARAM2,
        minRadius=_HOUGH_MIN_R,
        maxRadius=_HOUGH_MAX_R,
    )

    if circles is not None:
        # Among all candidate circles, pick the one with the largest radius
        # (most likely to be the finger-down indicator rather than UI noise).
        best = max(circles[0], key=lambda c: c[2])
        return float(best[0]) / img_w, float(best[1]) / img_h, "hough_circles"

    # ── Method 2: Centroid of largest changed region ───────────────────────
    _, binary = cv2.threshold(diff, _DIFF_PIXEL_THRESHOLD, 255, cv2.THRESH_BINARY)

    num_labels, _labels, stats, centroids = cv2.connectedComponentsWithStats(
        binary, connectivity=8
    )
    if num_labels <= 1:
        return None, None, "none"

    # Label 0 is background; find the largest foreground component.
    best_label = 1 + int(np.argmax(stats[1:, cv2.CC_STAT_AREA]))
    cx, cy = centroids[best_label]
    return float(cx) / img_w, float(cy) / img_h, "diff_centroid"


# ---------------------------------------------------------------------------
# DemoRecorder
# ---------------------------------------------------------------------------

class DemoRecorder:
    """Records a human demonstration on the device and extracts lessons.

    Typical usage::

        recorder = DemoRecorder(
            wda_url="http://localhost:8100",
            session_id="abc123",
            app_name="美团",
            task_description="查看历史订单",
            experience=exp_log,
        )
        recorder.start()
        input("\\nPerform the demo on the device, then press Enter to stop...\\n")
        recorder.stop()
        lessons = recorder.analyze_and_learn(agent=exec_agent)
        print(recorder.summary())
    """

    def __init__(
        self,
        wda_url: str,
        session_id: str,
        app_name: str,
        task_description: str,
        demo_dir: Optional[Path] = None,
        poll_interval: float = 0.12,               # seconds between polls (~8 fps)
        change_threshold: float = _CHANGE_THRESHOLD_DEFAULT,
        experience=None,                            # Optional[ExperienceLog]
    ):
        self.wda_url = wda_url
        self.session_id = session_id
        self.app_name = app_name
        self.task_description = task_description
        self.poll_interval = poll_interval
        self.change_threshold = change_threshold
        self.experience = experience

        ts = int(time.time())
        safe_app = app_name.replace(" ", "_").replace("/", "_")[:20]
        self.demo_dir: Path = demo_dir or (DEFAULT_DEMO_BASE / f"{safe_app}_{ts}")
        self.demo_dir.mkdir(parents=True, exist_ok=True)

        self.frames: list[DemoFrame] = []
        self._stop_event = threading.Event()
        self._thread: Optional[threading.Thread] = None
        self._prev_gray: Optional[np.ndarray] = None
        self._frame_idx: int = 0

    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------

    def start(self) -> None:
        """Start background polling in a daemon thread."""
        self._stop_event.clear()
        self._thread = threading.Thread(
            target=self._poll_loop, name="demo-recorder", daemon=True
        )
        self._thread.start()
        print(
            f"\n[Learn] Recording started"
            f"\n        App  : {self.app_name}"
            f"\n        Task : {self.task_description}"
            f"\n        Dir  : {self.demo_dir}"
            f"\n"
            f"\n[Learn] TIP — enable 'Show Touches' on the device for precise"
            f"\n        tap detection: Settings → Developer → Show Touches\n"
        )

    def stop(self) -> None:
        """Stop polling and wait for the background thread to exit."""
        self._stop_event.set()
        if self._thread and self._thread.is_alive():
            self._thread.join(timeout=5)
        print(
            f"\n[Learn] Recording stopped — {len(self.frames)} event(s) captured."
        )

    # ------------------------------------------------------------------
    # Background polling loop
    # ------------------------------------------------------------------

    def _poll_loop(self) -> None:
        """Grab screenshots continuously and emit events on changes."""
        while not self._stop_event.is_set():
            try:
                shot = get_screenshot(
                    wda_url=self.wda_url,
                    session_id=self.session_id,
                )
                try:
                    gray = _b64_to_gray(shot.base64_data)
                except Exception:
                    time.sleep(self.poll_interval)
                    continue

                if self._prev_gray is not None:
                    diff, change_pct = _compute_diff(self._prev_gray, gray)
                    if change_pct >= self.change_threshold:
                        tap_x, tap_y, method = _detect_tap(
                            diff, shot.width, shot.height
                        )
                        self._on_event(
                            b64=shot.base64_data,
                            width=shot.width,
                            height=shot.height,
                            tap_x=tap_x,
                            tap_y=tap_y,
                            method=method,
                            change_pct=change_pct,
                        )

                self._prev_gray = gray

            except Exception:
                pass  # transient WDA errors — keep polling

            time.sleep(self.poll_interval)

    def _on_event(
        self,
        b64: str,
        width: int,
        height: int,
        tap_x: Optional[float],
        tap_y: Optional[float],
        method: str,
        change_pct: float,
    ) -> None:
        """Handle a detected screen-change event."""
        self._frame_idx += 1
        idx = self._frame_idx

        # Draw a red circle marker at the detected tap position
        annotated = (
            _annotate_tap(b64, tap_x, tap_y, width, height)
            if tap_x is not None
            else b64
        )

        frame = DemoFrame(
            idx=idx,
            timestamp=time.time(),
            screenshot_b64=annotated,
            width=width,
            height=height,
            tap_x_rel=tap_x,
            tap_y_rel=tap_y,
            detection_method=method,
            change_pct=change_pct,
        )
        self.frames.append(frame)

        # Persist frame to disk immediately
        _save_b64_png(annotated, self.demo_dir / f"frame_{idx:04d}.png")

        pct_str = f"{change_pct * 100:.1f}%"
        coord_str = (
            f"tap≈({tap_x:.3f}, {tap_y:.3f}) [{method}]"
            if tap_x is not None
            else "tap: not detected"
        )
        print(f"[Learn] Frame {idx:3d}  Δ{pct_str:>6}  {coord_str}")

    # ------------------------------------------------------------------
    # VLM analysis
    # ------------------------------------------------------------------

    def analyze_and_learn(self, agent) -> list[str]:
        """Send each recorded frame to the VLM and store the extracted lessons.

        For each frame the VLM receives:
          - The annotated screenshot (tap marker drawn in red)
          - The detected tap coordinate (normalised %)
          - App name, task description, step number, change magnitude

        Returns a flat list of lesson descriptions that were added to the
        ExperienceLog (empty list when no experience object is provided).
        """
        if not self.frames:
            print("[Learn] No frames to analyse.")
            return []

        from PhoneClaw.prompts import (
            DEMO_ANALYSIS_SYSTEM_PROMPT,
            DEMO_ANALYSIS_USER_TEMPLATE,
        )

        all_lessons: list[str] = []
        total = len(self.frames)
        print(f"\n[Learn] Analysing {total} frame(s) with VLM...")

        for frame in self.frames:
            tap_x_pct = (
                f"{frame.tap_x_rel * 100:.1f}" if frame.tap_x_rel is not None else "?"
            )
            tap_y_pct = (
                f"{frame.tap_y_rel * 100:.1f}" if frame.tap_y_rel is not None else "?"
            )

            if frame.detection_method == "hough_circles":
                detection_note = (
                    " (precise — detected via Show Touches indicator)"
                )
            elif frame.detection_method == "diff_centroid":
                detection_note = (
                    " (approximate — estimated from changed region centroid;"
                    " enable Show Touches for better accuracy)"
                )
            else:
                detection_note = (
                    " (unknown — large UI transition, tap position not localised)"
                )

            user_text = DEMO_ANALYSIS_USER_TEMPLATE.format(
                app_name=self.app_name,
                task_description=self.task_description,
                step_num=frame.idx,
                total_steps=total,
                tap_x_pct=tap_x_pct,
                tap_y_pct=tap_y_pct,
                detection_note=detection_note,
                change_pct=round(frame.change_pct * 100, 1),
            )

            messages = [
                {"role": "system", "content": DEMO_ANALYSIS_SYSTEM_PROMPT},
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": user_text},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": (
                                    f"data:image/png;base64,{frame.screenshot_b64}"
                                )
                            },
                        },
                    ],
                },
            ]

            try:
                raw = agent.act(messages)
                lessons = _parse_lessons(raw)
            except Exception as exc:
                print(f"[Learn]   Frame {frame.idx}: VLM error — {exc}")
                continue

            added = 0
            for lesson in lessons:
                desc = lesson.get("description", "").strip()
                if not desc:
                    continue
                all_lessons.append(desc)
                if self.experience is not None:
                    stored = self.experience.add_lesson(
                        app=lesson.get("app") or self.app_name,
                        lesson_type=lesson.get("lesson_type", "ui_knowledge"),
                        description=desc,
                        source_task=self.task_description,
                        confidence=lesson.get("confidence", "medium"),
                    )
                    if stored:
                        added += 1

            print(
                f"[Learn]   Frame {frame.idx}: "
                f"{len(lessons)} lesson(s) extracted, {added} new."
            )

        if self.experience is not None:
            self.experience.save()
            # Auto-compact if the demo pushed any app over the threshold
            self.experience.compact_if_needed(agent)

        # Persist a summary JSON alongside the frames
        _save_summary(self.demo_dir / "demo_summary.json", self, all_lessons)

        print(
            f"\n[Learn] Done — {len(all_lessons)} lesson(s) extracted and "
            f"saved to ExperienceLog."
        )
        return all_lessons

    # ------------------------------------------------------------------
    # Info
    # ------------------------------------------------------------------

    def summary(self) -> str:
        detected = sum(1 for f in self.frames if f.tap_x_rel is not None)
        method_counts: dict[str, int] = {}
        for f in self.frames:
            method_counts[f.detection_method] = (
                method_counts.get(f.detection_method, 0) + 1
            )
        method_str = "  ".join(
            f"{m}: {n}" for m, n in sorted(method_counts.items())
        )
        return (
            f"\nDemo recording summary:\n"
            f"  App              : {self.app_name}\n"
            f"  Task             : {self.task_description}\n"
            f"  Frames captured  : {len(self.frames)}\n"
            f"  Tap detected     : {detected}/{len(self.frames)}\n"
            f"  Detection methods: {method_str or 'n/a'}\n"
            f"  Output dir       : {self.demo_dir}\n"
        )


# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------

def _annotate_tap(
    b64: str,
    rel_x: float,
    rel_y: float,
    width: int,
    height: int,
    radius: int = 22,
) -> str:
    """Overlay a red circle on the screenshot at the detected tap location.

    Returns the annotated screenshot as base-64 PNG, or the original on error.
    """
    try:
        data = base64.b64decode(b64)
        img = Image.open(io.BytesIO(data)).convert("RGBA")
        overlay = Image.new("RGBA", img.size, (0, 0, 0, 0))
        draw = ImageDraw.Draw(overlay)
        px = int(rel_x * width)
        py = int(rel_y * height)
        # Outer ring
        draw.ellipse(
            [px - radius, py - radius, px + radius, py + radius],
            outline=(255, 50, 50, 230),
            width=4,
        )
        # Centre dot
        dot_r = 6
        draw.ellipse(
            [px - dot_r, py - dot_r, px + dot_r, py + dot_r],
            fill=(255, 50, 50, 200),
        )
        annotated = Image.alpha_composite(img, overlay).convert("RGB")
        buf = io.BytesIO()
        annotated.save(buf, format="PNG")
        return base64.b64encode(buf.getvalue()).decode()
    except Exception:
        return b64


def _save_b64_png(b64: str, path: Path) -> None:
    try:
        path.write_bytes(base64.b64decode(b64))
    except Exception:
        pass


def _parse_lessons(raw: str) -> list[dict]:
    """Extract a JSON array from a raw VLM response string."""
    raw = raw.strip()
    start = raw.find("[")
    end = raw.rfind("]")
    if start == -1 or end == -1 or end <= start:
        return []
    try:
        return json.loads(raw[start: end + 1])
    except json.JSONDecodeError:
        return []


def _save_summary(path: Path, recorder: DemoRecorder, lessons: list[str]) -> None:
    data = {
        "app": recorder.app_name,
        "task": recorder.task_description,
        "recorded_at": time.strftime("%Y-%m-%dT%H:%M:%S"),
        "frames_total": len(recorder.frames),
        "lessons_extracted": len(lessons),
        "lessons": lessons,
        "frames": [
            {
                "idx": f.idx,
                "tap_x_rel": f.tap_x_rel,
                "tap_y_rel": f.tap_y_rel,
                "detection_method": f.detection_method,
                "change_pct": round(f.change_pct * 100, 2),
            }
            for f in recorder.frames
        ],
    }
    path.write_text(
        json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8"
    )


================================================
FILE: PhoneClaw/loop.py
================================================
"""Ralph Loop orchestrator for PhoneClaw.

Implements the core EXECUTE → EVALUATE → FIX → REPEAT cycle:

  Outer loop: iterate over subtasks (advance on PASS, retry on FAIL)
  Inner loop: execute one action, evaluate, fix if needed

Supports:
  - Filesystem-based state persistence (resume after interruption)
  - Per-subtask fix retry limit
  - Global round count cap
  - Structured logging via PhoneClawRecorder
"""

import re
import sys
import time
from pathlib import Path
from typing import Optional

# Allow running from the Android-Lab root
sys.path.insert(0, str(Path(__file__).parent.parent))

from PhoneClaw.state import TaskState, StateManager, SubTask
from PhoneClaw.recorder import PhoneClawRecorder
from PhoneClaw.prompts import (
    EXECUTOR_SYSTEM_PROMPT,
    EXECUTOR_FIX_CONTEXT_TEMPLATE,
    FINAL_ANSWER_SYSTEM_PROMPT,
    FINAL_ANSWER_USER_TEMPLATE,
)

# Android-Lab utility for extracting code snippets from VLM responses
try:
    from evaluation.definition import get_code_snippet_cot_v3
except ImportError:
    def get_code_snippet_cot_v3(response: str) -> Optional[str]:
        """Fallback: extract text between <CALLED_FUNCTION>…</CALLED_FUNCTION>."""
        match = re.search(r'<CALLED_FUNCTION>\s*(.*?)\s*</CALLED_FUNCTION>', response, re.DOTALL)
        return match.group(1).strip() if match else None


class RalphLoop:
    """
    Ralph Loop controller.

    Usage::

        loop = RalphLoop(
            controller=controller,
            executor=executor,
            agent=agent,
            evaluator=evaluator,
            recorder=recorder,
            state_manager=state_manager,
            max_rounds=100,
            max_fix_retries=3,
            request_interval=2.0,
        )
        loop.run(state)
    """

    def __init__(
        self,
        controller,
        executor,
        agent,
        evaluator,
        recorder: PhoneClawRecorder,
        state_manager: StateManager,
        max_rounds: int = 100,
        max_fix_retries: int = 3,
        request_interval: float = 2.0,
        skip_failed_subtasks: bool = True,
        experience=None,
    ):
        """
        Args:
            controller: IOSController for WDA device control.
            executor: IOSExecutor for action dispatch.
            agent: VLM agent with act() and prompt_to_message_visual() methods.
            evaluator: SubTaskEvaluator instance.
            recorder: PhoneClawRecorder for trace logging.
            state_manager: StateManager for filesystem persistence.
            max_rounds: Global cap on total action rounds across all subtasks.
            max_fix_retries: Max fix attempts per subtask before giving up and advancing.
            request_interval: Seconds to sleep between action rounds.
            skip_failed_subtasks: If True, advance to next subtask after max_fix_retries.
                                   If False, abort the entire task.
            experience: Optional ExperienceLog instance.  When provided, relevant
                        lessons are injected into every Executor prompt, and new
                        lessons are extracted after the task completes.
        """
        self.controller = controller
        self.executor = executor
        self.agent = agent
        self.evaluator = evaluator
        self.recorder = recorder
        self.state_manager = state_manager
        self.max_rounds = max_rounds
        self.max_fix_retries = max_fix_retries
        self.request_interval = request_interval
        self.skip_failed_subtasks = skip_failed_subtasks
        self.experience = experience

        # Set by run(); callers (e.g. run_phoneclaw) can read this after run() returns
        self.last_final_answer: Optional[str] = None

        # Tracks which app is currently in the foreground (inferred from launch() calls)
        self._current_app: Optional[str] = None
        # Per-subtask execution log built during run(); used for experience extraction
        self._subtask_logs: list[dict] = []

    # ------------------------------------------------------------------
    # Main entry point
    # ------------------------------------------------------------------

    def run(self, state: TaskState) -> TaskState:
        """
        Execute the Ralph Loop until all subtasks pass or limits are reached.

        Args:
            state: Initial (or resumed) TaskState.

        Returns:
            Final TaskState after the loop completes.
        """
        # Reset per-task state
        self._current_app = None
        self._subtask_logs = []

        print(f"\n{'='*60}")
        print(f"[RalphLoop] Starting task: {state.task_instruction}")
        print(f"[RalphLoop] Subtasks: {len(state.subtasks)}, Max rounds: {self.max_rounds}")
        print(f"{'='*60}\n")

        while not state.is_complete and state.round_count < self.max_rounds:
            subtask = state.current_subtask
            if subtask is None:
                break

            print(f"\n[RalphLoop] --- Subtask #{subtask.id}: {subtask.instruction} ---")
            print(f"[RalphLoop]     Criteria: {subtask.success_criteria}")

            # Update recorder with current subtask context
            self.recorder.set_current_subtask(
                idx=state.current_subtask_idx,
                instruction=subtask.instruction,
                criteria=subtask.success_criteria,
            )

            # Reset executor's finish flag for this subtask
            self.executor.reset_finish()

            # --- EXECUTE phase ---
            fix_hint: Optional[str] = None
            advanced = self._execute_subtask(state, subtask, fix_hint)

            if advanced:
                # Successfully advanced: persist state and continue outer loop
                self.state_manager.save(state)
                continue

            # If we reach here the subtask either ran out of fix retries or was skipped
            self.state_manager.save(state)

            if state.status == "failed":
                break

        # Mark task as completed if all subtasks done
        if state.is_complete and state.status == "running":
            state.status = "completed"
        elif state.round_count >= self.max_rounds and not state.is_complete:
            state.status = "failed"
            print(f"\n[RalphLoop] Max rounds ({self.max_rounds}) reached. Task incomplete.")

        # --- FINAL ANSWER: extract the answer to the user's question ---
        final_answer: Optional[str] = None
        if state.status == "completed":
            final_answer = self._generate_final_answer(state)

        # Expose for callers (e.g. run_phoneclaw memory recording)
        self.last_final_answer = final_answer

        # --- EXPERIENCE: extract lessons from this task's trace ---
        if self.experience is not None and self._subtask_logs:
            print("\n[Experience] Extracting lessons from task trace...")
            self.experience.extract_and_record(
                task=state.task_instruction,
                subtask_logs=self._subtask_logs,
                final_answer=final_answer,
                agent=self.agent,
            )

        self.state_manager.save(state)
        self.recorder.log_task_complete(
            all_passed=state.status == "completed",
            summary=state.summary(),
            final_answer=final_answer,
        )

        print(f"\n{'='*60}")
        print(f"[RalphLoop] Task finished. Status: {state.status}")
        print(state.summary())
        if final_answer:
            print(f"\n{'='*60}")
            print("[PhoneClaw] ANSWER")
            print(f"{'='*60}")
            print(final_answer)
            print(f"{'='*60}\n")
        print(f"{'='*60}\n")

        return state

    # ------------------------------------------------------------------
    # Final answer extraction (runs once after all subtasks complete)
    # ------------------------------------------------------------------

    def _generate_final_answer(self, state: TaskState) -> Optional[str]:
        """
        After all subtasks pass, take a fresh screenshot and ask the VLM to
        directly answer the user's original question based on what is on screen.

        Also uses any finish() message stored by the executor as a fallback.

        Returns:
            The answer string, or None if extraction failed.
        """
        # If the executor already recorded an explicit finish() answer, use it
        # as a first-pass hint (we still run the VLM for a clean natural-language answer)
        finish_hint = getattr(self.executor, "finish_message", None)

        try:
            # Take a fresh screenshot for the final answer step
            self.executor.update_screenshot(prefix="final_answer")
            screenshot = self.executor.current_screenshot_path

            user_content = FINAL_ANSWER_USER_TEMPLATE.format(
                task_instruction=state.task_instruction,
            )

            # If the executor captured information via finish(), include it as context
            if finish_hint:
                user_content += (
                    f"\n\nNote: the agent's last action reported: \"{finish_hint}\""
                )

            system_msg = {"role": "system", "content": FINAL_ANSWER_SYSTEM_PROMPT}
            user_messages = self.agent.prompt_to_message_visual(user_content, screenshot)
            messages = [system_msg, *user_messages]

            print("\n[RalphLoop] Generating final answer...")
            answer = self.agent.act(messages)
            return answer.strip() if answer else None

        except Exception as e:
            import traceback
            traceback.print_exc()
            print(f"[RalphLoop] Warning: could not generate final answer: {e}")
            return finish_hint  # fall back to executor's finish() message

    # ------------------------------------------------------------------
    # Subtask execution loop
    # ------------------------------------------------------------------

    def _execute_subtask(
        self,
        state: TaskState,
        subtask: SubTask,
        initial_fix_hint: Optional[str],
    ) -> bool:
        """
        Inner EXECUTE → EVALUATE → FIX → REPEAT loop for a single subtask.

        Returns:
            True if the subtask passed and state was advanced.
            False if max retries exceeded or task aborted.
        """
        fix_hint = initial_fix_hint

        # Accumulate (action, eval_reason) pairs for every failed attempt so the
        # VLM can see exactly what it already tried and avoid repeating itself.
        failed_actions: list[str] = []

        # Detect current app from launch() call in the subtask instruction
        app_match = re.search(r'launch\("([^"]+)"\)', subtask.instruction)
        if app_match:
            self._current_app = app_match.group(1)

        # Per-subtask execution log entry for experience extraction
        subtask_entry: dict = {
            "instruction": subtask.instruction,
            "app": self._current_app,
            "passed": False,
            "actions": [],  # list of {action, passed, reason}
        }

        # Each subtask gets its own action rounds, bounded by global max_rounds
        while state.round_count < self.max_rounds:
            state.round_count += 1
            print(f"\n[RalphLoop] Round {state.round_count} | Subtask #{subtask.id} | "
                  f"Fix attempt {subtask.fix_retries}/{self.max_fix_retries}")

            # --- EXECUTE: one action step ---
            rsp, exe_res, code_snippet, prompt_his = self._run_action_step(
                state, subtask, fix_hint, failed_actions
            )

            # Brief pause to let the UI settle
            time.sleep(self.request_interval)

            # --- EVALUATE ---
            # Take a fresh screenshot after the action for evaluation
            self.executor.update_screenshot(prefix=f"eval_{state.round_count}")
            eval_screenshot = (
                self.executor.current_screenshot_path
                or self.recorder.current_screenshot_path
            )

            eval_result = self.evaluator.evaluate(
                screenshot_path=eval_screenshot,
                success_criteria=subtask.success_criteria,
            )

            print(f"[Evaluator] Result: {'PASS' if eval_result.passed else 'FAIL'} — {eval_result.reason}")

            # Record this action in the subtask log
            subtask_entry["actions"].append({
                "action": code_snippet or "(no action extracted)",
                "passed": eval_result.passed or self.executor.is_finish,
                "reason": eval_result.reason[:120],
            })

            # Log the step with eval result
            self.recorder.update_after_cot(
                exe_res=exe_res,
                response=rsp,
                prompt_his=prompt_his,
                code_snippet=code_snippet,
                eval_result=eval_result.to_dict(),
                fix_attempt=subtask.fix_retries,
            )
            self.recorder.turn_number += 1

            # --- PASS: advance to next subtask ---
            if eval_result.passed or self.executor.is_finish:
                reason = eval_result.reason if eval_result.passed else "Agent called finish()"
                subtask_entry["passed"] = True
                self._subtask_logs.append(subtask_entry)
                state.mark_current_passed(reason)
                self.recorder.log_subtask_result(
                    subtask_idx=state.current_subtask_idx,
                    subtask={"instruction": subtask.instruction, "success_criteria": subtask.success_criteria},
                    passed=True,
                    reason=reason,
                )
                state.advance()
                print(f"[RalphLoop] Subtask #{subtask.id} PASSED. Advancing.")
                return True

            # --- FAIL: record what failed, check retry budget ---

            # Build a concise failure entry: "action → short reason"
            action_label = code_snippet or "(no action extracted)"
            reason_short = eval_result.reason[:100]
            repeat_warning = ""

            # Detect repeated identical action — make it explicit in the log
            if failed_actions and action_label != "(no action extracted)":
                prev_actions = [e.split(" →")[0].strip() for e in failed_actions]
                repeat_count = prev_actions.count(action_label)
                if repeat_count >= 1:
                    repeat_warning = f"  ⚠ REPEATED {repeat_count + 1}×"
                    print(f"[RalphLoop] Warning: identical action '{action_label}' "
                          f"has been tried {repeat_count + 1} time(s) and keeps failing.")

            failed_actions.append(f"  {action_label} → \"{reason_short}\"{repeat_warning}")

            subtask.fix_retries += 1

            if subtask.fix_retries > self.max_fix_retries:
                subtask_entry["passed"] = False
                self._subtask_logs.append(subtask_entry)
                state.mark_current_failed(eval_result.reason)
                self.recorder.log_subtask_result(
                    subtask_idx=state.current_subtask_idx,
                    subtask={"instruction": subtask.instruction, "success_criteria": subtask.success_criteria},
                    passed=False,
                    reason=eval_result.reason,
                )
                print(f"[RalphLoop] Subtask #{subtask.id} FAILED after {self.max_fix_retries} retries.")

                if self.skip_failed_subtasks:
                    print(f"[RalphLoop] Skipping to next subtask.")
                    state.advance()
                    return False
                else:
                    print(f"[RalphLoop] Aborting task.")
                    state.status = "failed"
                    return False

            # --- FIX: pass latest evaluator reason forward ---
            fix_hint = eval_result.reason
            print(f"[RalphLoop] FIX attempt {subtask.fix_retries}/{self.max_fix_retries}. "
                  f"Hint: {fix_hint[:80]}...")

        # Global round cap reached inside inner loop
        return False

    # ------------------------------------------------------------------
    # Single action step
    # ------------------------------------------------------------------

    def _run_action_step(
        self,
        state: TaskState,
        subtask: SubTask,
        fix_hint: Optional[str],
        failed_actions: Optional[list] = None,
    ):
        """
        Capture screenshot, call VLM, execute action, update recorder.

        Args:
            failed_actions: List of strings describing previously failed actions
                            for this subtask (injected into the fix context so the
                            VLM knows exactly what NOT to repeat).

        Returns:
            (response, exe_res, code_snippet, prompt_his)
        """
        # Capture screenshot + XML (no labeled overlay — agent uses raw coordinates)
        self.recorder.update_before(
            controller=self.controller,
            need_screenshot=True,
            need_labeled=False,
        )

        image_path = self.recorder.current_screenshot_path

        # Build executor system prompt with current subtask context
        fix_context = ""
        if fix_hint and subtask.fix_retries > 0:
            summary = (
                "\n".join(failed_actions)
                if failed_actions
                else "  (none recorded)"
            )
            fix_context = EXECUTOR_FIX_CONTEXT_TEMPLATE.format(
                fix_attempt=subtask.fix_retries,
                fail_reason=fix_hint,
                success_criteria=subtask.success_criteria,
                failed_actions_summary=summary,
            )

        # Inject relevant past-execution hints from the experience log
        experience_notes = ""
        if self.experience is not None:
            experience_notes = self.experience.get_hints_for(
                app_name=self._current_app,
                subtask_instruction=subtask.instruction,
            )

        system_content = (
            EXECUTOR_SYSTEM_PROMPT.format(
                subtask_instruction=subtask.instruction,
                overall_task=state.task_instruction,
            )
            + fix_context
            + experience_notes
        )

        system_msg = {"role": "system", "content": system_content}

        # Build user message with history + current screenshot
        history_tail = self.recorder.history[-4:] if self.recorder.history else []
        history_text = "\n".join(history_tail) if history_tail else "[]"
        user_text = (
            f"Current subtask: {subtask.instruction}\n"
            f"History:\n{history_text}\n"
            f"Current screen: <image>"
        )

        try:
            user_messages = self.agent.prompt_to_message_visual(user_text, image_path)
            messages = [system_msg, *user_messages]
            rsp = self.agent.act(messages)
        except Exception as e:
            import traceback
            traceback.print_exc()
            rsp = f"Error calling agent: {e}"

        # Extract and execute the code snippet
        code_snippet = get_code_snippet_cot_v3(rsp)

        if code_snippet:
            try:
                exe_res = self.executor(code_snippet)
            except Exception as e:
                print(f"[RalphLoop] Error executing code snippet: {e}")
                exe_res = {"operation": "error", "action": "error", "kwargs": {"error": str(e)}}
        else:
            print("[RalphLoop] Warning: Could not extract code snippet from response.")
            exe_res = {"operation": "skip", "action": "skip", "kwargs": {"reason": "No code snippet"}}

        # Extract state assessment for history
        pattern = r'<STATE_ASSESSMENT>\s*(.*?)\s*</STATE_ASSESSMENT>'
        match = re.search(pattern, rsp, re.DOTALL)
        prompt_his = match.group(1) if match else None

        return rsp, exe_res, code_snippet, prompt_his


================================================
FILE: PhoneClaw/memory.py
================================================
"""User memory / profile for PhoneClaw interactive mode.

Persists a JSON file at PhoneClaw/data/user_profile.json that accumulates
knowledge across sessions:

  - Task history  – what was done, what answers were found
  - User profile  – inferred name, location, language
  - App statistics – how often each app is used
  - Insights      – facts/habits extracted by LLM from completed tasks
  - Patterns      – frequently-used task types

The profile is loaded once at session start and injected into the Planner
prompt so the LLM can make more informed decisions (e.g. knows the user's
city, preferred apps, or past answers to similar questions).

After every completed task the module calls the VLM to extract any new
user insights and stores them in the profile.
"""

from __future__ import annotations

import json
import re
from datetime import datetime
from pathlib import Path
from typing import Optional, TYPE_CHECKING

if TYPE_CHECKING:
    pass

# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

DEFAULT_PROFILE_DIR = Path(__file__).parent / "data"
DEFAULT_PROFILE_PATH = DEFAULT_PROFILE_DIR / "user_profile.json"
SCHEMA_VERSION = 1

# Maximum number of task-history entries kept in the file
MAX_TASK_HISTORY = 200
# Maximum number of insight entries kept
MAX_INSIGHTS = 100


# ---------------------------------------------------------------------------
# UserMemory
# ---------------------------------------------------------------------------

class UserMemory:
    """
    Persistent user profile and task history.

    Typical usage::

        memory = UserMemory()               # load or create profile
        memory.start_session()              # increment session counter

        # … before planning …
        context = memory.get_planner_context()   # inject into planner prompt

        # … after task …
        task_id = memory.record_task(...)
        memory.extract_insights(task, answer, task_id, agent)
    """

    def __init__(self, profile_path: Optional[str] = None):
        self.path = Path(profile_path) if profile_path else DEFAULT_PROFILE_PATH
        self.path.parent.mkdir(parents=True, exist_ok=True)
        self.data = self._load()

    # ------------------------------------------------------------------
    # Load / save
    # ------------------------------------------------------------------

    def _load(self) -> dict:
        if self.path.exists():
            try:
                with open(self.path, encoding="utf-8") as f:
                    raw = json.load(f)
                # Migrate older schema if needed
                if raw.get("schema_version", 0) < SCHEMA_VERSION:
                    raw = self._migrate(raw)
                return raw
            except Exception as exc:
                print(f"[Memory] Warning: could not load profile ({exc}). Starting fresh.")
        return self._empty_profile()

    def save(self) -> None:
        self.data["last_updated"] = datetime.now().isoformat()
        with open(self.path, "w", encoding="utf-8") as f:
            json.dump(self.data, f, ensure_ascii=False, indent=2)

    def _migrate(self, old: dict) -> dict:
        """Best-effort migration from older schema versions."""
        fresh = self._empty_profile()
        # Copy over any keys that still exist in the new schema
        for k in fresh:
            if k in old:
                fresh[k] = old[k]
        fresh["schema_version"] = SCHEMA_VERSION
        return fresh

    def _empty_profile(self) -> dict:
        return {
            "schema_version": SCHEMA_VERSION,
            "created_at": datetime.now().isoformat(),
            "last_updated": datetime.now().isoformat(),
            "stats": {
                "total_sessions": 0,
                "total_tasks": 0,
                "completed_tasks": 0,
                "failed_tasks": 0,
            },
            "profile": {
                "inferred_name": None,
                "inferred_location": None,
                "primary_language": "zh-CN",
                "timezone_hint": None,
                "notes": [],
            },
            "app_usage": {},
            "task_history": [],
            "insights": [],
            "frequent_patterns": {},
        }

    # ------------------------------------------------------------------
    # Session management
    # ------------------------------------------------------------------

    def start_session(self) -> None:
        """Call once at the very start of an interactive session."""
        self.data["stats"]["total_sessions"] += 1
        self.save()

    # ------------------------------------------------------------------
    # Task recording
    # ------------------------------------------------------------------

    def record_task(
        self,
        task: str,
        status: str,
        final_answer: Optional[str],
        apps_used: list[str],
        rounds: int,
        duration_seconds: float,
    ) -> int:
        """
        Append a completed task to history and update counters.

        Returns:
            task_id — 1-based sequential ID for this task entry.
        """
        stats = self.data["stats"]
        stats["total_tasks"] += 1
        if status == "completed":
            stats["completed_tasks"] += 1
        else:
            stats["failed_tasks"] += 1

        # Update per-app usage counters
        now = datetime.now().isoformat()
        for app in apps_used:
            entry = self.data["app_usage"].setdefault(app, {"count": 0, "last_used": None})
            entry["count"] += 1
            entry["last_used"] = now

        history: list = self.data["task_history"]
        task_id = len(history) + 1
        history.append({
            "id": task_id,
            "timestamp": now,
            "task": task,
            "status": status,
            "final_answer": final_answer,
            "apps_used": apps_used,
            "rounds": rounds,
            "duration_seconds": round(duration_seconds, 1),
        })

        # Trim to keep file size manageable
        if len(history) > MAX_TASK_HISTORY:
            self.data["task_history"] = history[-MAX_TASK_HISTORY:]

        self.save()
        return task_id

    def add_insight(
        self,
        text: str,
        source_task_id: int,
        confidence: str = "medium",
    ) -> bool:
        """Store a single insight string.

        Performs semantic deduplication via embedding cosine similarity
        (falls back to token-level Jaccard when the embedding API is
        unavailable).  If a semantically equivalent insight already exists
        its ``reinforced`` counter is incremented and its confidence may be
        upgraded; no new entry is created.

        Returns:
            True if the insight was new and added; False if it was a duplicate.
        """
        from PhoneClaw.embeddings import is_semantic_duplicate

        text = text.strip()
        if not text:
            return False

        existing = self.data["insights"]
        existing_texts = [i["text"] for i in existing]

        dup_idx = is_semantic_duplicate(text, existing_texts)
        if dup_idx >= 0:
            entry = existing[dup_idx]
            entry["reinforced"] = entry.get("reinforced", 1) + 1
            _conf_weight = {"high": 3, "medium": 2, "low": 1}
            if _conf_weight.get(confidence, 0) > _conf_weight.get(
                entry.get("confidence", "medium"), 0
            ):
                entry["confidence"] = confidence
            entry["last_seen"] = datetime.now().isoformat()
            self.save()
            return False

        self.data["insights"].append({
            "text": text,
            "confidence": confidence,
            "source_task_id": source_task_id,
            "timestamp": datetime.now().isoformat(),
            "reinforced": 1,
        })

        if len(self.data["insights"]) > MAX_INSIGHTS:
            self.data["insights"] = self.data["insights"][-MAX_INSIGHTS:]

        self.save()
        return True

    # ------------------------------------------------------------------
    # LLM-powered insight extraction
    # ------------------------------------------------------------------

    def extract_insights(
        self,
        task: str,
        final_answer: Optional[str],
        task_id: int,
        agent,
    ) -> list[str]:
        """
        Ask the VLM to extract user-relevant facts from a completed task.

        The agent must implement: agent.act(messages: list[dict]) -> str

        Returns:
            List of new insight strings that were added to the profile.
        """
        from PhoneClaw.prompts import MEMORY_EXTRACT_SYSTEM_PROMPT, MEMORY_EXTRACT_USER_TEMPLATE

        context_parts = [f"Task: {task}"]
        if final_answer:
            context_parts.append(f"Result/Answer: {final_answer}")
        task_context = "\n".join(context_parts)

        existing_summary = self._existing_profile_summary()

        user_content = MEMORY_EXTRACT_USER_TEMPLATE.format(
            task_context=task_context,
            existing_profile=existing_summary,
        )

        messages = [
            {"role": "system", "content": MEMORY_EXTRACT_SYSTEM_PROMPT},
            {"role": "user", "content": user_content},
        ]

        try:
            response = agent.act(messages)
            raw_insights = self._parse_insights(response)

            added: list[str] = []
            for text in raw_insights:
                if self.add_insight(text, source_task_id=task_id):
                    added.append(text)

            if added:
                print(f"[Memory] +{len(added)} new insight(s) extracted.")
                for ins in added:
                    print(f"  • {ins}")
            return added

        except Exception as exc:
            print(f"[Memory] Could not extract insights: {exc}")
            return []

    # ------------------------------------------------------------------
    # Memory-first retrieval
    # ------------------------------------------------------------------

    def query(self, question: str, agent) -> tuple[bool, Optional[str]]:
        """
        Check whether the user's profile / task history already contains
        a confident answer to *question*.  If yes, return it immediately
        so the caller can skip device interaction entirely.

        The agent must implement: agent.act(messages: list[dict]) -> str

        Returns:
            (can_answer, answer)
            can_answer – True if the memory contains a reliable answer
            answer     – the answer string, or None when can_answer is False
        """
        profile_text = self._build_full_profile_text()
        if not profile_text:
            return False, None

        from PhoneClaw.prompts import (
            MEMORY_QUERY_SYSTEM_PROMPT,
            MEMORY_QUERY_USER_TEMPLATE,
        )

        user_content = MEMORY_QUERY_USER_TEMPLATE.format(
            question=question,
            profile=profile_text,
        )

        messages = [
            {"role": "system", "content": MEMORY_QUERY_SYSTEM_PROMPT},
            {"role": "user", "content": user_content},
        ]

        try:
            response = agent.act(messages)
            can_answer, answer = self._parse_query_response(response)
            return can_answer, answer
        except Exception as exc:
            print(f"[Memory] Query error: {exc}")
            return False, None

    def _parse_query_response(self, response: str) -> tuple[bool, Optional[str]]:
        """Parse the VLM's JSON response for a memory query."""
        # Try strict JSON parse
        try:
            start = response.index("{")
            end = response.rindex("}") + 1
            data = json.loads(response[start:end])
            can_answer = bool(data.get("can_answer", False))
            answer = data.get("answer") or None
            if can_answer and answer:
                return True, str(answer).strip()
            return False, None
        except (ValueError, json.JSONDecodeError):
            pass

        # Fallback: if the model returned NOT_FOUND as plain text, treat as miss
        if "NOT_FOUND" in response or "无法回答" in response or "not found" in response.lower():
            return False, None

        # If there is a non-trivial response that doesn't look like a refusal,
        # treat it as a direct answer (last-resort heuristic)
        stripped = response.strip()
        if len(stripped) > 10:
            return True, stripped

        return False, None

    def _build_full_profile_text(self) -> str:
        """
        Build a comprehensive text dump of everything in the profile.

        This is what the VLM reads when deciding whether the memory can
        answer a question without touching the device.
        """
        parts: list[str] = []

        p = self.data["profile"]
        if p.get("inferred_name"):
            parts.append(f"User name: {p['inferred_name']}")
        if p.get("inferred_location"):
            parts.append(f"Location: {p['inferred_location']}")
        if p.get("primary_language"):
            parts.append(f"Language preference: {p['primary_language']}")
        if p.get("notes"):
            parts.append("Notes: " + "; ".join(p["notes"]))

        # All insights (the richest source)
        if self.data["insights"]:
            parts.append("\n## User Facts & Insights")
            for ins in self.data["insights"]:
                parts.append(f"  - {ins['text']}")

        # Task history with recorded answers
        answered = [t for t in self.data["task_history"] if t.get("final_answer")]
        if answered:
            parts.append("\n## Past Task Answers")
            for t in answered[-50:]:
                ts = t["timestamp"][:10]
                parts.append(f"  [{ts}] Q: {t['task']}")
                parts.append(f"         A: {t['final_answer']}")

        # App usage stats (useful for "which apps do I use most?" type queries)
        if self.data["app_usage"]:
            top_apps = sorted(
                self.data["app_usage"].items(),
                key=lambda x: x[1]["count"],
                reverse=True,
            )[:10]
            parts.append("\n## App Usage")
            for app, v in top_apps:
                parts.append(f"  - {app}: {v['count']} times")

        return "\n".join(parts) if parts else ""

    def _parse_insights(self, response: str) -> list[str]:
        """Parse the VLM response into a list of insight strings."""
        # Try JSON array first
        try:
            start = response.index("[")
            end = response.rindex("]") + 1
            items = json.loads(response[start:end])
            return [str(item).strip() for item in items if str(item).strip()]
        except (ValueError, json.JSONDecodeError):
            pass

        # Fallback: extract bullet / numbered list lines
        lines = []
        for line in response.splitlines():
            stripped = re.sub(r"^[\s\-\*\d\.\)]+", "", line).strip()
            if len(stripped) > 8:
                lines.append(stripped)
        return lines[:10]

    def _existing_profile_summary(self) -> str:
        """Compact summary of already-known facts (helps LLM avoid duplicates)."""
        p = self.data["profile"]
        parts: list[str] = []

        if p.get("inferred_name"):
            parts.append(f"User name: {p['inferred_name']}")
        if p.get("inferred_location"):
            parts.append(f"Location: {p['inferred_location']}")
        if p.get("notes"):
            parts.append("Profile notes: " + "; ".join(p["notes"][:5]))

        top_apps = sorted(
            self.data["app_usage"].items(),
            key=lambda x: x[1]["count"],
            reverse=True,
        )[:6]
        if top_apps:
            parts.append(
                "Frequent apps: "
                + ", ".join(f"{a}({v['count']}×)" for a, v in top_apps)
            )

        if self.data["insights"]:
            recent = self.data["insights"][-6:]
            parts.append(
                "Recent insights:\n"
                + "\n".join(f"  - {i['text']}" for i in recent)
            )

        return "\n".join(parts) if parts else "(none yet)"

    # ------------------------------------------------------------------
    # Planner context injection
    # ------------------------------------------------------------------

    def get_planner_context(self) -> str:
        """
        Return a formatted string to inject into the Planner system prompt.

        Provides background about the user so the planner can produce
        better-informed subtask lists.

        Returns empty string if the profile has no useful information yet.
        """
        p = self.data["profile"]
        lines: list[str] = []

        if p.get("inferred_name"):
            lines.append(f"- Name / handle: {p['inferred_name']}")
        if p.get("inferred_location"):
            lines.append(f"- Location: {p['inferred_location']}")
        if p.get("primary_language"):
            lines.append(f"- Primary language: {p['primary_language']}")

        top_apps = sorted(
            self.data["app_usage"].items(),
            key=lambda x: x[1]["count"],
            reverse=True,
        )[:6]
        if top_apps:
            app_str = ", ".join(f"{a} ({v['count']}×)" for a, v in top_apps)
            lines.append(f"- Frequently used apps: {app_str}")

        recent_insights = self.data["insights"][-10:]
        if recent_insights:
            lines.append("- Known facts about this user:")
            for ins in recent_insights:
                lines.append(f"  • {ins['text']}")

        recent_tasks = self.data["task_history"][-4:]
        if recent_tasks:
            lines.append("- Recent tasks (for context):")
            for t in recent_tasks:
                icon = "✓" if t["status"] == "completed" else "✗"
                answer_hint = f" → {t['final_answer'][:60]}" if t.get("final_answer") else ""
                lines.append(f"  {icon} {t['task'][:80]}{answer_hint}")

        if not lines:
            return ""

        return "## User Profile (from memory)\n" + "\n".join(lines)

    # ------------------------------------------------------------------
    # Display helpers
    # ------------------------------------------------------------------

    def session_banner(self) -> str:
        """Brief banner shown at the start of an interactive session."""
        stats = self.data["stats"]
        p = self.data["profile"]

        parts = [
            f"[Memory] Profile: {self.path}",
            (
                f"[Memory] Sessions: {stats['total_sessions']}  |  "
                f"Tasks: {stats['completed_tasks']} completed / "
                f"{stats['failed_tasks']} failed  |  "
                f"Insights: {len(self.data['insights'])}"
            ),
        ]

        if p.get("inferred_name"):
            parts.append(f"[Memory] User: {p['inferred_name']}")
        if p.get("inferred_location"):
            parts.append(f"[Memory] Location: {p['inferred_location']}")

        return "\n".join(parts)

    def get_profile_path(self) -> str:
        return str(self.path)

    def __repr__(self) -> str:
        stats = self.data["stats"]
        return (
            f"UserMemory(tasks={stats['total_tasks']}, "
            f"insights={len(self.data['insights'])}, "
            f"path={self.path})"
        )


================================================
FILE: PhoneClaw/planner.py
================================================
"""Task Planner for PhoneClaw.

Uses an LLM to decompose a high-level task description into an ordered list of
atomic subtasks, each with a visually-verifiable success criterion.
"""

import json
import re
from typing import Li
Download .txt
gitextract_hys_i7ka/

├── .gitignore
├── Communication.md
├── LICENSE
├── PhoneClaw/
│   ├── README.md
│   ├── __init__.py
│   ├── actions.py
│   ├── agent.py
│   ├── connection.py
│   ├── controller.py
│   ├── embeddings.py
│   ├── evaluator.py
│   ├── executor.py
│   ├── experience.py
│   ├── hierarchy.py
│   ├── keepalive.py
│   ├── labeling.py
│   ├── learn.py
│   ├── loop.py
│   ├── memory.py
│   ├── planner.py
│   ├── prompts.py
│   ├── recorder.py
│   ├── run_phoneclaw.py
│   ├── screenshot.py
│   └── state.py
├── README.md
├── adb_client.py
├── agent/
│   ├── __init__.py
│   ├── llm/
│   │   ├── glm4.py
│   │   └── qwen_llm_model.py
│   ├── mllm/
│   │   ├── claude_model.py
│   │   └── qwen_model.py
│   ├── model.py
│   └── utils.py
├── app_data/
│   └── README.md
├── configs/
│   ├── example_xml_cloud_hyper.yaml
│   ├── glm-4-plus-mac-XML.yaml
│   ├── gpt-4o-linux-XML.yaml
│   └── gpt-4o-mac-SoM.yaml
├── docs/
│   ├── modify_androidlab.md
│   ├── new_apps.md
│   ├── prepare_for_linux.md
│   └── prepare_for_mac.md
├── eval.py
├── evaluation/
│   ├── auto_test.py
│   ├── config/
│   │   ├── bluecoins.yaml
│   │   ├── calendar.yaml
│   │   ├── cantook.yaml
│   │   ├── chrome.yaml
│   │   ├── clock.yaml
│   │   ├── contacts.yaml
│   │   ├── gmail.yaml
│   │   ├── map.yaml
│   │   ├── pimusic.yaml
│   │   ├── reddit.yaml
│   │   ├── setting.yaml
│   │   ├── tiktok.yaml
│   │   └── zoom.yaml
│   ├── configs.py
│   ├── definition.py
│   ├── docker_utils.py
│   ├── evaluation.py
│   ├── parallel.py
│   ├── task.py
│   ├── tasks/
│   │   ├── __init__.py
│   │   ├── bluecoins/
│   │   │   ├── __init__.py
│   │   │   └── bluecoins.py
│   │   ├── calendar/
│   │   │   ├── __init__.py
│   │   │   └── calendar.py
│   │   ├── cantook/
│   │   │   ├── __init__.py
│   │   │   └── cantook.py
│   │   ├── chrome/
│   │   │   ├── __init__.py
│   │   │   └── chrome.py
│   │   ├── clock/
│   │   │   ├── __init__.py
│   │   │   └── clock.py
│   │   ├── contacts/
│   │   │   ├── __init__.py
│   │   │   └── contacts.py
│   │   ├── gmail/
│   │   │   ├── __init__.py
│   │   │   └── gmail.py
│   │   ├── human_ground_turth/
│   │   │   └── ground_truth_length.json
│   │   ├── llm_evaluator.py
│   │   ├── map_me/
│   │   │   ├── __init__.py
│   │   │   └── map.py
│   │   ├── pimusic/
│   │   │   ├── __init__.py
│   │   │   └── pimusic.py
│   │   ├── reddit/
│   │   │   ├── __init__.py
│   │   │   └── reddit.py
│   │   ├── setting/
│   │   │   ├── __init__.py
│   │   │   └── setting.py
│   │   ├── tiktok/
│   │   │   ├── __init__.py
│   │   │   └── tiktok.py
│   │   └── zoom/
│   │       ├── __init__.py
│   │       └── zoom.py
│   └── utils.py
├── generate_result.py
├── ios_agent/
│   ├── README.md
│   ├── __init__.py
│   ├── actions.py
│   ├── application/
│   │   └── mail/
│   │       ├── __init__.py
│   │       ├── pipeline.py
│   │       ├── rag_system.py
│   │       └── run_mail_and_rag.sh
│   ├── connection.py
│   ├── controller.py
│   ├── executor.py
│   ├── hierarchy.py
│   ├── labeling.py
│   ├── prompts.py
│   ├── recorder.py
│   ├── run_ios_agent.py
│   ├── screenshot.py
│   └── task.py
├── model_training/
│   ├── R1-V/
│   │   └── src/
│   │       ├── distill_r1/
│   │       │   ├── README.md
│   │       │   ├── create_hf_dataset.py
│   │       │   ├── filter_r1.py
│   │       │   ├── generate_scene_qa_pairs.ipynb
│   │       │   ├── prompt.py
│   │       │   └── query_r1.py
│   │       ├── eval/
│   │       │   ├── logs/
│   │       │   │   ├── counting_results_superclevr_200_qwen2vl_2b_instruct_grpo100_legacy.json
│   │       │   │   ├── counting_results_superclevr_200_qwen2vl_2b_instruct_legacy.json
│   │       │   │   ├── geoqa_test_qwen2vl_7b_grpo_2epochs_legacy.json
│   │       │   │   └── geoqa_test_qwen2vl_7b_instruct_legacy.json
│   │       │   ├── prompts/
│   │       │   │   ├── geoqa_test_prompts.jsonl
│   │       │   │   └── superclevr_test200_counting_problems.jsonl
│   │       │   ├── test_qwen2vl_counting_superclevr.py
│   │       │   ├── test_qwen2vl_geoqa.py
│   │       │   └── test_qwen2vl_geoqa_multigpu.py
│   │       ├── r1-v/
│   │       │   ├── .gitignore
│   │       │   ├── LICENSE
│   │       │   ├── Makefile
│   │       │   ├── configs/
│   │       │   │   ├── ddp.yaml
│   │       │   │   ├── qwen2vl_sft_config.yaml
│   │       │   │   ├── zero2.yaml
│   │       │   │   └── zero3.yaml
│   │       │   ├── local_scripts/
│   │       │   │   ├── create_vision_cot_data.py
│   │       │   │   ├── lmms_eval_qwen2vl.sh
│   │       │   │   ├── prepare_hf_data.py
│   │       │   │   ├── train_aria_moe.sh
│   │       │   │   ├── train_qwen2_vl.sh
│   │       │   │   ├── zero1_no_optimizer.json
│   │       │   │   ├── zero2.json
│   │       │   │   ├── zero2_fix.json
│   │       │   │   ├── zero3.json
│   │       │   │   ├── zero3.yaml
│   │       │   │   ├── zero3_offload.json
│   │       │   │   └── zero3_offload_fix.json
│   │       │   ├── run_grpo.sh
│   │       │   ├── setup.cfg
│   │       │   ├── setup.py
│   │       │   └── src/
│   │       │       └── open_r1/
│   │       │           ├── __init__.py
│   │       │           ├── evaluate.py
│   │       │           ├── generate.py
│   │       │           ├── grpo.py
│   │       │           ├── grpo_agent.py
│   │       │           ├── sft.py
│   │       │           └── trainer/
│   │       │               ├── __init__.py
│   │       │               ├── grpo_trainer.py
│   │       │               ├── vllm_grpo_trainer.py
│   │       │               └── vllm_grpo_trainer_modified.py
│   │       ├── requirements.txt
│   │       └── scripts/
│   │           ├── run_grpo_clevr.sh
│   │           ├── run_grpo_vllm.sh
│   │           ├── run_grpo_vllm_qwen25vl.sh
│   │           ├── run_grpo_vllm_qwen25vl_agent.sh
│   │           ├── run_sft_clevr.sh
│   │           └── test_grpo_geoqa_multigpu.sh
│   ├── README.md
│   └── llama_factory_scripts/
│       ├── deepspeed_config_zero2.json
│       ├── deepspeed_config_zero3.json
│       └── full_tuning_setting_3B_vl_fix.sh
├── page_executor/
│   ├── __init__.py
│   ├── simple_vision_executor.py
│   ├── text_executor.py
│   └── utils.py
├── prepare_data/
│   ├── README.md
│   ├── rl/
│   │   └── convert_to_hf_vl.py
│   └── visual_model_data/
│       ├── alpaca_format_o1_data_visual_cot.json
│       ├── data_maker.py
│       ├── o1_data_visual_cot_all.json
│       ├── o1_data_visual_cot_pimusic.json
│       └── sft_data_maker.py
├── recorder/
│   ├── __init__.py
│   └── json_recoder.py
├── requirements.txt
├── templates/
│   ├── __init__.py
│   ├── android_screenshot_template.py
│   ├── packages.py
│   ├── seeact_screenshot_prompts.py
│   └── text_only_mobile.py
├── test_script/
│   ├── all_test_cloud_v1_hyper.sh
│   └── all_test_cloud_v1_hyper_add.sh
├── tools/
│   ├── check_result_multiprocess.py
│   ├── delete_blank.py
│   ├── modify_mobile_to_avd.py
│   └── modify_mobile_to_docker.py
├── utils_mobile/
│   ├── and_controller.py
│   ├── specialCheck.py
│   ├── utils.py
│   └── xml_tool.py
└── vllm_script/
    └── run_server_visual_cot.py
Download .txt
SYMBOL INDEX (2015 symbols across 95 files)

FILE: PhoneClaw/actions.py
  function _physical_to_logical (line 113) | def _physical_to_logical(x: int, y: int) -> Tuple[int, int]:
  function _logical_to_physical (line 118) | def _logical_to_physical(x: int, y: int) -> Tuple[int, int]:
  function _get_wda_session_url (line 123) | def _get_wda_session_url(wda_url: str, session_id: Optional[str], endpoi...
  class IOSActionHandler (line 132) | class IOSActionHandler:
    method __init__ (line 135) | def __init__(
    method tap (line 143) | def tap(self, x: int, y: int, delay: float = 1.0) -> bool:
    method double_tap (line 172) | def double_tap(self, x: int, y: int, delay: float = 1.0) -> bool:
    method long_press (line 205) | def long_press(self, x: int, y: int, duration: float = 3.0, delay: flo...
    method swipe (line 235) | def swipe(
    method back (line 270) | def back(self, delay: float = 1.0) -> bool:
    method home (line 298) | def home(self, delay: float = 1.0) -> bool:
    method launch_app (line 310) | def launch_app(self, app_name: str, delay: float = 2.0) -> bool:
    method type_text (line 356) | def type_text(self, text: str, frequency: int = 60) -> bool:
    method clear_text (line 371) | def clear_text(self) -> bool:
    method hide_keyboard (line 393) | def hide_keyboard(self) -> bool:
    method get_current_app (line 404) | def get_current_app(self) -> str:
    method get_screen_size (line 429) | def get_screen_size(self) -> tuple[int, int]:

FILE: PhoneClaw/agent.py
  function _handle_backoff (line 26) | def _handle_backoff(details):
  function _handle_giveup (line 32) | def _handle_giveup(details):
  class OpenRouterAgent (line 36) | class OpenRouterAgent:
    method __init__ (line 51) | def __init__(
    method act (line 106) | def act(self, messages: List[Dict[str, Any]]) -> str:
    method prompt_to_message_visual (line 133) | def prompt_to_message_visual(
    method prompt_to_message_text (line 188) | def prompt_to_message_text(self, prompt: str) -> Dict[str, Any]:

FILE: PhoneClaw/connection.py
  class ConnectionType (line 10) | class ConnectionType(Enum):
  class DeviceInfo (line 17) | class DeviceInfo:
  class IOSConnection (line 27) | class IOSConnection:
    method __init__ (line 36) | def __init__(self, wda_url: str = "http://localhost:8100"):
    method list_devices (line 40) | def list_devices(self) -> list[DeviceInfo]:
    method _get_device_details (line 86) | def _get_device_details(self, udid: str) -> dict[str, str]:
    method is_connected (line 115) | def is_connected(self, device_id: Optional[str] = None) -> bool:
    method is_wda_ready (line 124) | def is_wda_ready(self, timeout: int = 2) -> bool:
    method start_wda_session (line 138) | def start_wda_session(self) -> tuple[bool, str]:
    method get_wda_status (line 166) | def get_wda_status(self) -> dict:

FILE: PhoneClaw/controller.py
  class IOSController (line 12) | class IOSController:
    method __init__ (line 20) | def __init__(self, wda_url: str = "http://localhost:8100", session_id:...
    method get_device_size (line 30) | def get_device_size(self) -> Tuple[int, int]:
    method get_current_activity (line 34) | def get_current_activity(self) -> str:
    method get_current_app (line 38) | def get_current_app(self) -> str:
    method tap (line 42) | def tap(self, x: int, y: int) -> bool:
    method text (line 46) | def text(self, input_str: str) -> bool:
    method long_press (line 55) | def long_press(self, x: int, y: int, duration: int = 3000) -> bool:
    method swipe (line 59) | def swipe(self, x: int, y: int, direction: str, dist: str = "medium", ...
    method back (line 84) | def back(self) -> bool:
    method home (line 88) | def home(self) -> bool:
    method enter (line 92) | def enter(self) -> bool:
    method launch_app (line 96) | def launch_app(self, app_name: str) -> bool:
    method save_screenshot (line 100) | def save_screenshot(self, file_path: str) -> bool:
    method get_screenshot (line 105) | def get_screenshot(self) -> Screenshot:
    method get_xml (line 109) | def get_xml(self, prefix: str = "", save_dir: str = "") -> str:

FILE: PhoneClaw/embeddings.py
  function _get_client (line 45) | def _get_client():
  function _normalise (line 69) | def _normalise(text: str) -> str:
  function _jaccard_tokens (line 76) | def _jaccard_tokens(a: str, b: str) -> float:
  function _cosine (line 87) | def _cosine(v1: list[float], v2: list[float]) -> float:
  function embed (line 101) | def embed(text: str) -> Optional[list[float]]:
  function similarity (line 127) | def similarity(text_a: str, text_b: str) -> float:
  function is_semantic_duplicate (line 139) | def is_semantic_duplicate(

FILE: PhoneClaw/evaluator.py
  class EvalResult (line 14) | class EvalResult:
    method __init__ (line 17) | def __init__(self, passed: bool, reason: str):
    method to_dict (line 21) | def to_dict(self) -> Dict[str, Any]:
    method __repr__ (line 24) | def __repr__(self) -> str:
  class SubTaskEvaluator (line 29) | class SubTaskEvaluator:
    method __init__ (line 38) | def __init__(self, agent, max_retries: int = 2):
    method evaluate (line 47) | def evaluate(
    method _parse_response (line 90) | def _parse_response(self, response: str) -> EvalResult:

FILE: PhoneClaw/executor.py
  class IOSExecutor (line 12) | class IOSExecutor:
    method __init__ (line 17) | def __init__(self, wda_url: str = "http://localhost:8100", session_id:...
    method get_screenshot (line 31) | def get_screenshot(self) -> Screenshot:
    method _get_screen_physical_size (line 43) | def _get_screen_physical_size(self) -> tuple[int, int]:
    method _rel_to_physical (line 74) | def _rel_to_physical(self, rx: float, ry: float) -> tuple[int, int]:
    method tap (line 87) | def tap(self, x: int, y: int) -> dict:
    method text (line 97) | def text(self, input_str: str) -> dict:
    method type (line 112) | def type(self, input_str: str) -> dict:
    method long_press (line 116) | def long_press(self, x: int, y: int) -> dict:
    method swipe (line 126) | def swipe(self, x: int, y: int, direction: str, dist: str = "medium") ...
    method swipe_coords (line 164) | def swipe_coords(self, x1: int, y1: int, x2: int, y2: int) -> dict:
    method back (line 180) | def back(self) -> dict:
    method home (line 190) | def home(self) -> dict:
    method wait (line 200) | def wait(self, interval: int = 5) -> dict:
    method enter (line 212) | def enter(self) -> dict:
    method launch (line 222) | def launch(self, app_name: str) -> dict:
    method finish (line 232) | def finish(self, message: Optional[str] = None) -> dict:
    method reset_finish (line 244) | def reset_finish(self):
    method get_current_app (line 249) | def get_current_app(self) -> str:
    method get_screen_size (line 253) | def get_screen_size(self) -> tuple[int, int]:
    method set_elem_list (line 257) | def set_elem_list(self, xml_path_or_string: str):
    method tap_by_index (line 267) | def tap_by_index(self, index: int) -> dict:
    method long_press_by_index (line 288) | def long_press_by_index(self, index: int) -> dict:
    method swipe_by_index (line 300) | def swipe_by_index(self, index: int, direction: str, dist: str = "medi...
    method __call__ (line 312) | def __call__(self, code_snippet: str):
    method do (line 401) | def do(self, action=None, element=None, **kwargs):
    method update_screenshot (line 483) | def update_screenshot(self, prefix=None, suffix=None):

FILE: PhoneClaw/experience.py
  class ExperienceLog (line 57) | class ExperienceLog:
    method __init__ (line 72) | def __init__(self, log_path: Optional[str] = None):
    method _load (line 81) | def _load(self) -> dict:
    method save (line 93) | def save(self) -> None:
    method _empty_log (line 98) | def _empty_log(self) -> dict:
    method _migrate (line 112) | def _migrate(self, old: dict) -> dict:
    method add_lesson (line 124) | def add_lesson(
    method _app_matches (line 197) | def _app_matches(query: Optional[str], lesson_app: Optional[str]) -> b...
    method get_hints_for (line 220) | def get_hints_for(
    method extract_and_record (line 310) | def extract_and_record(
    method _build_trace_summary (line 390) | def _build_trace_summary(
    method _parse_lessons_response (line 418) | def _parse_lessons_response(self, response: str) -> list[dict]:
    method compact_if_needed (line 434) | def compact_if_needed(
    method compact_app_lessons (line 483) | def compact_app_lessons(
    method compact_all (line 596) | def compact_all(self, agent, target: int = COMPACT_TARGET) -> dict[str...
    method summary_banner (line 619) | def summary_banner(self) -> str:
    method get_lessons_for_app (line 627) | def get_lessons_for_app(self, app_name: str) -> list[dict]:
    method __repr__ (line 642) | def __repr__(self) -> str:

FILE: PhoneClaw/hierarchy.py
  class IOSElement (line 9) | class IOSElement:
  function get_page_source (line 20) | def get_page_source(
  function parse_bounds (line 124) | def parse_bounds(bounds_str: str) -> Optional[Tuple[Tuple[int, int], Tup...
  function get_element_bounds (line 158) | def get_element_bounds(element: ET.Element) -> Optional[Tuple[Tuple[int,...
  function get_element_id (line 184) | def get_element_id(element: ET.Element) -> str:
  function is_interactive_element (line 205) | def is_interactive_element(element: ET.Element) -> bool:
  function traverse_ios_tree (line 255) | def traverse_ios_tree(
  function get_ios_elements (line 317) | def get_ios_elements(xml_string: str) -> List[IOSElement]:

FILE: PhoneClaw/keepalive.py
  class ScreenKeepalive (line 47) | class ScreenKeepalive:
    method __init__ (line 66) | def __init__(
    method start (line 98) | def start(self) -> None:
    method stop (line 131) | def stop(self) -> None:
    method _set_idle_timer_disabled (line 148) | def _set_idle_timer_disabled(self, disabled: bool) -> bool:
    method _heartbeat_loop (line 173) | def _heartbeat_loop(self) -> None:
    method _touch_loop (line 186) | def _touch_loop(self) -> None:
    method _touch_centre (line 195) | def _touch_centre(self) -> None:
    method _get_logical_size (line 228) | def _get_logical_size(self) -> tuple[int, int]:

FILE: PhoneClaw/labeling.py
  function _get_scale_factor (line 19) | def _get_scale_factor(img_path: str) -> float:
  function draw_bbox_multi_ios (line 48) | def draw_bbox_multi_ios(

FILE: PhoneClaw/learn.py
  class DemoFrame (line 74) | class DemoFrame:
  function _b64_to_gray (line 91) | def _b64_to_gray(b64: str) -> np.ndarray:
  function _compute_diff (line 101) | def _compute_diff(
  function _detect_tap (line 111) | def _detect_tap(
  class DemoRecorder (line 168) | class DemoRecorder:
    method __init__ (line 187) | def __init__(
    method start (line 221) | def start(self) -> None:
    method stop (line 238) | def stop(self) -> None:
    method _poll_loop (line 251) | def _poll_loop(self) -> None:
    method _on_event (line 288) | def _on_event(
    method analyze_and_learn (line 337) | def analyze_and_learn(self, agent) -> list[str]:
    method summary (line 459) | def summary(self) -> str:
  function _annotate_tap (line 484) | def _annotate_tap(
  function _save_b64_png (line 523) | def _save_b64_png(b64: str, path: Path) -> None:
  function _parse_lessons (line 530) | def _parse_lessons(raw: str) -> list[dict]:
  function _save_summary (line 543) | def _save_summary(path: Path, recorder: DemoRecorder, lessons: list[str]...

FILE: PhoneClaw/loop.py
  function get_code_snippet_cot_v3 (line 37) | def get_code_snippet_cot_v3(response: str) -> Optional[str]:
  class RalphLoop (line 43) | class RalphLoop:
    method __init__ (line 63) | def __init__(
    method run (line 118) | def run(self, state: TaskState) -> TaskState:
    method _generate_final_answer (line 219) | def _generate_final_answer(self, state: TaskState) -> Optional[str]:
    method _execute_subtask (line 266) | def _execute_subtask(
    method _run_action_step (line 414) | def _run_action_step(

FILE: PhoneClaw/memory.py
  class UserMemory (line 49) | class UserMemory:
    method __init__ (line 66) | def __init__(self, profile_path: Optional[str] = None):
    method _load (line 75) | def _load(self) -> dict:
    method save (line 88) | def save(self) -> None:
    method _migrate (line 93) | def _migrate(self, old: dict) -> dict:
    method _empty_profile (line 103) | def _empty_profile(self) -> dict:
    method start_session (line 131) | def start_session(self) -> None:
    method record_task (line 140) | def record_task(
    method add_insight (line 189) | def add_insight(
    method extract_insights (line 246) | def extract_insights(
    method query (line 303) | def query(self, question: str, agent) -> tuple[bool, Optional[str]]:
    method _parse_query_response (line 343) | def _parse_query_response(self, response: str) -> tuple[bool, Optional...
    method _build_full_profile_text (line 370) | def _build_full_profile_text(self) -> str:
    method _parse_insights (line 417) | def _parse_insights(self, response: str) -> list[str]:
    method _existing_profile_summary (line 436) | def _existing_profile_summary(self) -> str:
    method get_planner_context (line 472) | def get_planner_context(self) -> str:
    method session_banner (line 523) | def session_banner(self) -> str:
    method get_profile_path (line 545) | def get_profile_path(self) -> str:
    method __repr__ (line 548) | def __repr__(self) -> str:

FILE: PhoneClaw/planner.py
  class TaskPlanner (line 15) | class TaskPlanner:
    method __init__ (line 24) | def __init__(self, agent, max_retries: int = 3):
    method plan (line 33) | def plan(self, task: str, user_context: str = "") -> List[SubTask]:
    method _parse_response (line 92) | def _parse_response(self, response: str) -> List[SubTask]:

FILE: PhoneClaw/recorder.py
  class PhoneClawRecorder (line 13) | class PhoneClawRecorder:
    method __init__ (line 23) | def __init__(self, task_id: str, instruction: str, page_executor, conf...
    method set_current_subtask (line 69) | def set_current_subtask(self, idx: int, instruction: str, criteria: str):
    method update_before (line 75) | def update_before(self, controller, need_screenshot: bool = False, nee...
    method update_after_cot (line 159) | def update_after_cot(
    method log_subtask_result (line 195) | def log_subtask_result(self, subtask_idx: int, subtask: dict, passed: ...
    method log_task_complete (line 211) | def log_task_complete(self, all_passed: bool, summary: str, final_answ...
    method get_latest_xml (line 228) | def get_latest_xml(self) -> str:
    method _save_trace (line 240) | def _save_trace(self):

FILE: PhoneClaw/run_phoneclaw.py
  class MobileClawConfig (line 71) | class MobileClawConfig:
    method __init__ (line 74) | def __init__(self, task_dir: str):
  function _build_agent (line 83) | def _build_agent(
  function _run_single_task (line 119) | def _run_single_task(
  function _run_interactive_loop (line 282) | def _run_interactive_loop(
  function _print_memory_summary (line 414) | def _print_memory_summary(memory: UserMemory) -> None:
  function _print_experience_summary (line 462) | def _print_experience_summary(experience: ExperienceLog) -> None:
  function _run_learn_mode (line 510) | def _run_learn_mode(
  function main (line 586) | def main():

FILE: PhoneClaw/screenshot.py
  class Screenshot (line 16) | class Screenshot:
  function get_screenshot (line 24) | def get_screenshot(
  function _get_screenshot_wda (line 53) | def _get_screenshot_wda(
  function _get_screenshot_idevice (line 86) | def _get_screenshot_idevice(
  function _create_fallback_screenshot (line 128) | def _create_fallback_screenshot(is_sensitive: bool) -> Screenshot:
  function save_screenshot (line 145) | def save_screenshot(screenshot: Screenshot, file_path: str) -> bool:
  function get_screenshot_png (line 157) | def get_screenshot_png(

FILE: PhoneClaw/state.py
  class SubTask (line 16) | class SubTask:
  class TaskState (line 28) | class TaskState:
    method current_subtask (line 42) | def current_subtask(self) -> Optional[SubTask]:
    method is_complete (line 49) | def is_complete(self) -> bool:
    method advance (line 53) | def advance(self):
    method mark_current_passed (line 58) | def mark_current_passed(self, reason: str):
    method mark_current_failed (line 67) | def mark_current_failed(self, reason: str):
    method increment_fix_retries (line 76) | def increment_fix_retries(self):
    method summary (line 83) | def summary(self) -> str:
  class StateManager (line 107) | class StateManager:
    method __init__ (line 116) | def __init__(self, state_dir: str):
    method _subtask_to_dict (line 127) | def _subtask_to_dict(self, st: SubTask) -> Dict[str, Any]:
    method _subtask_from_dict (line 130) | def _subtask_from_dict(self, d: Dict[str, Any]) -> SubTask:
    method _state_to_dict (line 133) | def _state_to_dict(self, state: TaskState) -> Dict[str, Any]:
    method _state_from_dict (line 146) | def _state_from_dict(self, d: Dict[str, Any]) -> TaskState:
    method save (line 161) | def save(self, state: TaskState):
    method load (line 167) | def load(self) -> Optional[TaskState]:
    method exists (line 179) | def exists(self) -> bool:
    method create (line 183) | def create(self, task_id: str, task_instruction: str, subtasks: List[S...

FILE: adb_client.py
  function list_all_devices (line 8) | def list_all_devices():
  function get_adb_device_name (line 20) | def get_adb_device_name(avd_name=None):
  class Config (line 34) | class Config:
  class EmulatorController (line 38) | class EmulatorController:
    method __init__ (line 39) | def __init__(self):
    method execute_adb (line 45) | def execute_adb(self, adb_command):
    method start_emulator (line 58) | def start_emulator(self, avd_name):
    method stop_emulator (line 104) | def stop_emulator(self, avd_name):
  function start (line 130) | def start():
  function stop (line 140) | def stop():
  function execute (line 150) | def execute():

FILE: agent/__init__.py
  function get_agent (line 19) | def get_agent(agent_module: str, **kwargs) -> Agent:

FILE: agent/llm/glm4.py
  class GLM4Agent (line 6) | class GLM4Agent(OpenAIAgent):
    method __init__ (line 7) | def __init__(
    method act (line 31) | def act(self, messages: List[Dict[str, Any]]) -> str:

FILE: agent/llm/qwen_llm_model.py
  class QwenLLMAgent (line 4) | class QwenLLMAgent(OpenAIAgent):
    method __init__ (line 5) | def __init__(

FILE: agent/mllm/claude_model.py
  class Claude_official (line 7) | class Claude_official(OpenAIAgent):
    method __init__ (line 8) | def __init__(
    method act (line 29) | def act(self, messages: List[Dict[str, Any]]) -> str:
    method format_message (line 59) | def format_message(self, messages: List[Dict[str, Any]]):

FILE: agent/mllm/qwen_model.py
  class QwenAgent (line 8) | class QwenAgent(OpenAIAgent):
    method __init__ (line 9) | def __init__(
    method act (line 30) | def act(self, messages: List[Dict[str, Any]]) -> str:
    method format_message (line 45) | def format_message(self, message):
    method prompt_to_message (line 50) | def prompt_to_message(self, prompt, images):

FILE: agent/model.py
  function handle_giveup (line 14) | def handle_giveup(details):
  function handle_backoff (line 20) | def handle_backoff(details):
  class Agent (line 29) | class Agent:
    method act (line 37) | def act(self, messages: List[Dict[str, Any]]) -> str:
    method prompt_to_message (line 40) | def prompt_to_message(self, prompt, images):
    method system_prompt (line 43) | def system_prompt(self, instruction) -> str:
  class OpenAIAgent (line 46) | class OpenAIAgent(Agent):
    method __init__ (line 47) | def __init__(
    method act (line 73) | def act(self, messages: List[Dict[str, Any]]) -> str:
    method prompt_to_message (line 85) | def prompt_to_message(self, prompt, images):
    method prompt_to_message_cloud (line 108) | def prompt_to_message_cloud(self, prompt, images):
    method prompt_to_message_text (line 131) | def prompt_to_message_text(self, prompt):
    method prompt_to_message_visual (line 138) | def prompt_to_message_visual(self, prompt,img, max_width=1000, max_hei...
    method system_prompt (line 165) | def system_prompt(self, instruction) -> str:
  class HTTPAgent (line 169) | class HTTPAgent(Agent):
    method __init__ (line 170) | def __init__(
    method update_messages (line 182) | def update_messages(self, body: Dict[str, Any], messages: List[Dict[st...
    method act (line 191) | def act(self, messages: List[Dict[str, Any]]):

FILE: agent/utils.py
  function encode_image (line 5) | def encode_image(image_path):
  function image_to_base64 (line 10) | def image_to_base64(image_path):
  function replace_image_url (line 15) | def replace_image_url(messages, throw_details=False, keep_path=False):

FILE: evaluation/auto_test.py
  class Instance (line 14) | class Instance():
    method __init__ (line 15) | def __init__(self, config, idx = 0):
    method initialize_worker (line 26) | def initialize_worker(self):
    method initialize_single_task (line 32) | def initialize_single_task(self, config = None):
    method stop_single_task (line 76) | def stop_single_task(self):
    method __del__ (line 96) | def __del__(self):
  class Docker_Instance (line 111) | class Docker_Instance(Instance):
    method __init__ (line 112) | def __init__(self, config, idx = 0):
    method initialize_worker (line 119) | def initialize_worker(self, config):
    method initialize_single_task (line 128) | def initialize_single_task(self,config):
    method stop_single_task (line 153) | def stop_single_task(self):
    method __del__ (line 159) | def __del__(self):
  class AutoTest (line 167) | class AutoTest():
    method __init__ (line 168) | def __init__(self, config: TaskConfig) -> None:
    method prepare_for_task (line 171) | def prepare_for_task(self):
    method start_emulator (line 184) | def start_emulator(self, instance):
    method run_serial (line 201) | def run_serial(self, tasks):
    method run_task (line 209) | def run_task(self, task_dict, instance):
    method get_agent (line 268) | def get_agent(self):
    method get_executor (line 271) | def get_executor(self):
  class TextOnlyMobileTask_AutoTest (line 275) | class TextOnlyMobileTask_AutoTest(AutoTest):
    method get_agent (line 276) | def get_agent(self):
    method get_executor (line 281) | def get_executor(self):
  class ScreenshotMobileTask_AutoTest (line 285) | class ScreenshotMobileTask_AutoTest(TextOnlyMobileTask_AutoTest):
    method get_agent (line 286) | def get_agent(self):
    method get_executor (line 291) | def get_executor(self):
  class ScreenshotMobileTask_AutoTest_for_show (line 295) | class ScreenshotMobileTask_AutoTest_for_show(ScreenshotMobileTask_AutoTe...
    method start_emulator_cmd (line 296) | def start_emulator_cmd(self, avd_name):
    method stop_emulator (line 320) | def stop_emulator(self, instance):
  class CogAgentTask_AutoTest (line 325) | class CogAgentTask_AutoTest(TextOnlyMobileTask_AutoTest):
    method get_agent (line 326) | def get_agent(self):
    method get_executor (line 331) | def get_executor(self):
  class ScreenSeeActTask_AutoTest (line 335) | class ScreenSeeActTask_AutoTest(TextOnlyMobileTask_AutoTest):
    method get_agent (line 336) | def get_agent(self):
  class ScreenReactTask_AutoTest (line 342) | class ScreenReactTask_AutoTest(TextOnlyMobileTask_AutoTest):
    method get_agent (line 343) | def get_agent(self):
    method get_executor (line 348) | def get_executor(self):
  class ScreenReactTask_AutoTest_Cloud_hyper (line 351) | class ScreenReactTask_AutoTest_Cloud_hyper(TextOnlyMobileTask_AutoTest):
    method get_agent (line 352) | def get_agent(self):
    method get_executor (line 357) | def get_executor(self):
  class TextOnlyReactTask_AutoTest (line 360) | class TextOnlyReactTask_AutoTest(TextOnlyMobileTask_AutoTest):
    method get_agent (line 361) | def get_agent(self):
  class TextOnlyFineTuneTask_AutoTest (line 367) | class TextOnlyFineTuneTask_AutoTest(TextOnlyMobileTask_AutoTest):
    method get_agent (line 368) | def get_agent(self):
  class TextOnlyFineTuneTask_long_AutoTest (line 374) | class TextOnlyFineTuneTask_long_AutoTest(TextOnlyMobileTask_AutoTest):
    method get_agent (line 375) | def get_agent(self):

FILE: evaluation/configs.py
  class AppConfig (line 9) | class AppConfig:
    method __init__ (line 10) | def __init__(self, file_path, output_dir=None):
    method load_params (line 20) | def load_params(self):
    method get_tasks (line 52) | def get_tasks(self):
    method get_metrics (line 57) | def get_metrics(self):
  class AppConfig_Sample (line 61) | class AppConfig_Sample:
    method __init__ (line 62) | def __init__(self, file_path, output_dir=None):
    method load_params (line 69) | def load_params(self):
    method get_tasks (line 87) | def get_tasks(self):
  class TaskConfig (line 94) | class TaskConfig:
    method subdir_config (line 111) | def subdir_config(self, subdir: str):
    method add_config (line 117) | def add_config(self, config):

FILE: evaluation/definition.py
  function encode_image (line 10) | def encode_image(image_path):
  function get_code_snippet_cot (line 15) | def get_code_snippet_cot(text):
  function handle_backoff (line 57) | def handle_backoff(details):
  function handle_giveup (line 61) | def handle_giveup(details):
  function detect_answer (line 67) | def detect_answer(question: str, model_answer: str, standard_answer: str...
  function detect_answer_test (line 82) | def detect_answer_test(args):
  function get_completion_glm (line 111) | def get_completion_glm(prompt, glm4_key):
  function get_completion_gpt (line 126) | def get_completion_gpt(prompt, model_name):
  function get_mobile_device (line 141) | def get_mobile_device():
  function get_mobile_device_and_name (line 164) | def get_mobile_device_and_name():

FILE: evaluation/docker_utils.py
  function run_docker_command (line 8) | def run_docker_command(command):
  function create_docker_container (line 14) | def create_docker_container(docker_image_name, docker_port, docker_local...
  function execute_command_in_container (line 29) | def execute_command_in_container(container_id, command):
  function remove_docker_container (line 39) | def remove_docker_container(container_id):
  function cp_docker (line 51) | def cp_docker(local_path, docker_path, container_id, local_to_docker=True):
  function send_post_request (line 70) | def send_post_request(url, headers, data, max_attempts=10, retry_interva...
  function start_avd (line 87) | def start_avd(port, avd_name):
  function execute_adb_command (line 95) | def execute_adb_command(port, command):
  function stop_avd (line 103) | def stop_avd(port, avd_name):

FILE: evaluation/evaluation.py
  class AutoTask (line 11) | class AutoTask():
    method __init__ (line 12) | def __init__(self, instruction, controller, page_executor, agent, reco...
    method set_system_prompt (line 29) | def set_system_prompt(self, instruction):
    method run_step (line 35) | def run_step(self, round_count):
  class TextOnlyTask (line 53) | class TextOnlyTask(AutoTask):
    method set_system_prompt (line 54) | def set_system_prompt(self, instruction):
  class ScreenshotTask (line 60) | class ScreenshotTask(TextOnlyTask):
    method run_step (line 61) | def run_step(self, round_count):
    method parse_starter_response (line 168) | def parse_starter_response(self, response):
  class CogAgentTask (line 193) | class CogAgentTask(TextOnlyTask):
    method run_step (line 194) | def run_step(self, round_count):
    method set_system_prompt (line 212) | def set_system_prompt(self, instruction):
  class ScreenshotReactTask (line 219) | class ScreenshotReactTask(ScreenshotTask):
    method set_system_prompt (line 220) | def set_system_prompt(self, instruction):
  class ScreenshotReactTask_Cloud_hyper (line 226) | class ScreenshotReactTask_Cloud_hyper(ScreenshotTask):
    method set_system_prompt (line 227) | def set_system_prompt(self, instruction):
  class ScreenSeeActTask (line 234) | class ScreenSeeActTask(TextOnlyTask):
    method set_system_prompt (line 236) | def set_system_prompt(self, instruction):
    method run_step (line 244) | def run_step(self, round_count):
  class TextOnlyReactTask (line 300) | class TextOnlyReactTask(TextOnlyTask):
    method set_system_prompt (line 301) | def set_system_prompt(self, instruction):
  class TextOnlyFineTuneTask (line 308) | class TextOnlyFineTuneTask(TextOnlyTask):
    method set_system_prompt (line 309) | def set_system_prompt(self, instruction):
    method run_step (line 315) | def run_step(self, round_count):
  class TextOnlyFineTuneTask_long (line 332) | class TextOnlyFineTuneTask_long(TextOnlyFineTuneTask):
    method set_system_prompt (line 333) | def set_system_prompt(self, instruction):

FILE: evaluation/parallel.py
  function task_done_callback (line 6) | def task_done_callback(future, docker_instance, free_dockers):
  function parallel_worker (line 10) | def parallel_worker(class_, config, parallel, tasks):

FILE: evaluation/task.py
  function dump_xml (line 24) | def dump_xml(xml_path):
  function calculate_partial_acc (line 31) | def calculate_partial_acc(dict):
  function compute_image_similarity (line 44) | def compute_image_similarity(image_paths):
  class Evaluation_Task (line 75) | class Evaluation_Task(Generic[T_INPUT, T_OUTPUT, T_TARGET]):
    method __init__ (line 76) | def __init__(self, config, traces, args, detail=False):
    method evaluate (line 92) | def evaluate(self, max_workers: int = 4) -> Dict[str, Any]:
    method _evaluate_single_task (line 108) | def _evaluate_single_task(self, task) -> None:
    method evaluate_old (line 190) | def evaluate_old(self) -> Dict[str, Any]:
    method add_metrics (line 260) | def add_metrics(self, task, traces, all_images, final_result):
    method save_single (line 282) | def save_single(self, task, result):
    method print_metric (line 298) | def print_metric(self):
  class SingleTask (line 327) | class SingleTask():
    method __init__ (line 328) | def __init__(self, args):
    method check_answer (line 333) | def check_answer(self, line):
    method judge_page (line 353) | def judge_page(self, xml_compressed_tree):
    method judge (line 356) | def judge(self, xml_compressed_tree, line):
    method save_answer (line 359) | def save_answer(self, answer):

FILE: evaluation/tasks/bluecoins/bluecoins.py
  function extract_bills_NewEditBK (line 16) | def extract_bills_NewEditBK(xml_compressed_tree) -> Dict:
  class SingleTask_bluecoins_1 (line 67) | class SingleTask_bluecoins_1(SingleTask):
    method judge_page (line 69) | def judge_page(self, line):
    method judge (line 74) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_1 (line 87) | class SingleTask_bluecoins_LLM_1(SingleTask):
    method __init__ (line 88) | def __init__(self, args):
    method judge_page (line 95) | def judge_page(self, line):
    method judge (line 100) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_2 (line 124) | class SingleTask_bluecoins_2(SingleTask):
    method judge_page (line 126) | def judge_page(self, line):
    method judge (line 131) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_2 (line 144) | class SingleTask_bluecoins_LLM_2(SingleTask):
    method __init__ (line 145) | def __init__(self, args):
    method judge_page (line 152) | def judge_page(self, line):
    method judge (line 157) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_3 (line 179) | class SingleTask_bluecoins_3(SingleTask):
    method judge_page (line 181) | def judge_page(self, line):
    method judge (line 186) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_3 (line 199) | class SingleTask_bluecoins_LLM_3(SingleTask):
    method __init__ (line 200) | def __init__(self, args):
    method judge_page (line 207) | def judge_page(self, line):
    method judge (line 212) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_4 (line 234) | class SingleTask_bluecoins_4(SingleTask):
    method judge_page (line 236) | def judge_page(self, line):
    method judge (line 241) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_4 (line 254) | class SingleTask_bluecoins_LLM_4(SingleTask):
    method __init__ (line 255) | def __init__(self, args):
    method judge_page (line 262) | def judge_page(self, line):
    method judge (line 267) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_5 (line 289) | class SingleTask_bluecoins_5(SingleTask):
    method judge_page (line 291) | def judge_page(self, line):
    method judge (line 296) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_5 (line 309) | class SingleTask_bluecoins_LLM_5(SingleTask):
    method __init__ (line 310) | def __init__(self, args):
    method judge_page (line 317) | def judge_page(self, line):
    method judge (line 322) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_6 (line 344) | class SingleTask_bluecoins_6(SingleTask):
    method judge_page (line 346) | def judge_page(self, xml_compressed_tree):
    method judge (line 352) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_6 (line 373) | class SingleTask_bluecoins_LLM_6(SingleTask):
    method __init__ (line 374) | def __init__(self, args):
    method judge_page (line 382) | def judge_page(self, line):
    method _get_screenshot_path (line 387) | def _get_screenshot_path(self, line):
    method judge (line 395) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_7 (line 434) | class SingleTask_bluecoins_7(SingleTask):
    method judge_page (line 436) | def judge_page(self, xml_compressed_tree):
    method judge (line 442) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_7 (line 467) | class SingleTask_bluecoins_LLM_7(SingleTask):
    method __init__ (line 468) | def __init__(self, args):
    method judge_page (line 477) | def judge_page(self, line):
    method _get_screenshot_path (line 482) | def _get_screenshot_path(self, line):
    method judge (line 490) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_8 (line 534) | class SingleTask_bluecoins_8(SingleTask):
    method judge_page (line 536) | def judge_page(self, xml_compressed_tree):
    method judge (line 542) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_8 (line 567) | class SingleTask_bluecoins_LLM_8(SingleTask):
    method __init__ (line 568) | def __init__(self, args):
    method judge_page (line 577) | def judge_page(self, line):
    method _get_screenshot_path (line 582) | def _get_screenshot_path(self, line):
    method judge (line 590) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_9 (line 634) | class SingleTask_bluecoins_9(SingleTask):
    method judge_page (line 636) | def judge_page(self, xml_compressed_tree):
    method judge (line 642) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_9 (line 671) | class SingleTask_bluecoins_LLM_9(SingleTask):
    method __init__ (line 672) | def __init__(self, args):
    method judge_page (line 682) | def judge_page(self, line):
    method _get_screenshot_path (line 687) | def _get_screenshot_path(self, line):
    method judge (line 695) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_10 (line 745) | class SingleTask_bluecoins_10(SingleTask):
    method judge_page (line 747) | def judge_page(self, xml_compressed_tree):
    method judge (line 753) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_10 (line 782) | class SingleTask_bluecoins_LLM_10(SingleTask):
    method __init__ (line 783) | def __init__(self, args):
    method judge_page (line 793) | def judge_page(self, line):
    method _get_screenshot_path (line 798) | def _get_screenshot_path(self, line):
    method judge (line 806) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_11 (line 856) | class SingleTask_bluecoins_11(SingleTask):
    method judge_page (line 859) | def judge_page(self, xml_compressed_tree):
    method judge (line 865) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_11 (line 893) | class SingleTask_bluecoins_LLM_11(SingleTask):
    method __init__ (line 894) | def __init__(self, args):
    method judge_page (line 907) | def judge_page(self, line):
    method _get_screenshot_path (line 912) | def _get_screenshot_path(self, line):
    method judge (line 920) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_12 (line 965) | class SingleTask_bluecoins_12(SingleTask):
    method judge_page (line 968) | def judge_page(self, xml_compressed_tree):
    method judge (line 974) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_12 (line 1002) | class SingleTask_bluecoins_LLM_12(SingleTask):
    method __init__ (line 1003) | def __init__(self, args):
    method judge_page (line 1016) | def judge_page(self, line):
    method _get_screenshot_path (line 1021) | def _get_screenshot_path(self, line):
    method judge (line 1029) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_13 (line 1074) | class SingleTask_bluecoins_13(SingleTask):
    method judge_page (line 1077) | def judge_page(self, xml_compressed_tree):
    method judge (line 1083) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_13 (line 1126) | class SingleTask_bluecoins_LLM_13(SingleTask):
    method __init__ (line 1127) | def __init__(self, args):
    method judge_page (line 1142) | def judge_page(self, line):
    method _get_screenshot_path (line 1147) | def _get_screenshot_path(self, line):
    method judge (line 1155) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_14 (line 1219) | class SingleTask_bluecoins_14(SingleTask):
    method judge_page (line 1222) | def judge_page(self, xml_compressed_tree):
    method judge (line 1228) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_14 (line 1276) | class SingleTask_bluecoins_LLM_14(SingleTask):
    method __init__ (line 1277) | def __init__(self, args):
    method judge_page (line 1293) | def judge_page(self, line):
    method _get_screenshot_path (line 1298) | def _get_screenshot_path(self, line):
    method judge (line 1306) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_15 (line 1376) | class SingleTask_bluecoins_15(SingleTask):
    method judge_page (line 1379) | def judge_page(self, xml_compressed_tree):
    method judge (line 1385) | def judge(self, xml_compressed_tree, line):
  class SingleTask_bluecoins_LLM_15 (line 1417) | class SingleTask_bluecoins_LLM_15(SingleTask):
    method __init__ (line 1418) | def __init__(self, args):
    method judge_page (line 1432) | def judge_page(self, line):
    method _get_screenshot_path (line 1437) | def _get_screenshot_path(self, line):
    method judge (line 1445) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/calendar/calendar.py
  class SingleTask_calendar_1 (line 12) | class SingleTask_calendar_1(SingleTask):
    method judge_page (line 14) | def judge_page(self, xml_compressed_tree):
    method judge (line 20) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_2 (line 34) | class SingleTask_calendar_2(SingleTask):
    method judge_page (line 36) | def judge_page(self, xml_compressed_tree):
    method judge (line 42) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_3 (line 56) | class SingleTask_calendar_3(SingleTask):
    method judge_page (line 58) | def judge_page(self, xml_compressed_tree):
    method judge (line 64) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_4 (line 78) | class SingleTask_calendar_4(SingleTask):
    method judge_page (line 80) | def judge_page(self, xml_compressed_tree):
    method judge (line 86) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_5 (line 100) | class SingleTask_calendar_5(SingleTask):
    method judge_page (line 102) | def judge_page(self, xml_compressed_tree):
    method judge (line 108) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_6 (line 122) | class SingleTask_calendar_6(SingleTask):
    method judge_page (line 124) | def judge_page(self, xml_compressed_tree):
    method judge (line 130) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_7 (line 154) | class SingleTask_calendar_7(SingleTask):
    method judge_page (line 156) | def judge_page(self, xml_compressed_tree):
    method judge (line 162) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_8 (line 186) | class SingleTask_calendar_8(SingleTask):
    method judge_page (line 188) | def judge_page(self, xml_compressed_tree):
    method judge (line 194) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_9 (line 218) | class SingleTask_calendar_9(SingleTask):
    method judge_page (line 220) | def judge_page(self, xml_compressed_tree):
    method judge (line 226) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_10 (line 253) | class SingleTask_calendar_10(SingleTask):
    method judge_page (line 255) | def judge_page(self, xml_compressed_tree):
    method judge (line 261) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_11 (line 267) | class SingleTask_calendar_11(SingleTask):
    method judge_page (line 269) | def judge_page(self, xml_compressed_tree):
    method judge (line 275) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_12 (line 287) | class SingleTask_calendar_12(SingleTask):
    method judge_page (line 289) | def judge_page(self, xml_compressed_tree):
    method judge (line 295) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_13 (line 311) | class SingleTask_calendar_13(SingleTask):
    method judge_page (line 313) | def judge_page(self, xml_compressed_tree):
    method judge (line 319) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_14 (line 325) | class SingleTask_calendar_14(SingleTask):
    method judge_page (line 327) | def judge_page(self, xml_compressed_tree):
    method judge (line 333) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_1 (line 344) | class SingleTask_calendar_LLM_1(SingleTask):
    method __init__ (line 345) | def __init__(self, args):
    method judge_page (line 353) | def judge_page(self, line):
    method _get_screenshot_path (line 358) | def _get_screenshot_path(self, line):
    method judge (line 366) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_2 (line 397) | class SingleTask_calendar_LLM_2(SingleTask):
    method __init__ (line 398) | def __init__(self, args):
    method judge_page (line 407) | def judge_page(self, line):
    method _get_screenshot_path (line 412) | def _get_screenshot_path(self, line):
    method judge (line 420) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_3 (line 451) | class SingleTask_calendar_LLM_3(SingleTask):
    method __init__ (line 452) | def __init__(self, args):
    method judge_page (line 461) | def judge_page(self, line):
    method _get_screenshot_path (line 466) | def _get_screenshot_path(self, line):
    method judge (line 474) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_4 (line 505) | class SingleTask_calendar_LLM_4(SingleTask):
    method __init__ (line 506) | def __init__(self, args):
    method judge_page (line 514) | def judge_page(self, line):
    method _get_screenshot_path (line 519) | def _get_screenshot_path(self, line):
    method judge (line 527) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_9 (line 558) | class SingleTask_calendar_LLM_9(SingleTask):
    method __init__ (line 559) | def __init__(self, args):
    method judge_page (line 567) | def judge_page(self, line):
    method _get_screenshot_path (line 572) | def _get_screenshot_path(self, line):
    method judge (line 580) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_10 (line 609) | class SingleTask_calendar_LLM_10(SingleTask):
    method __init__ (line 610) | def __init__(self, args):
    method judge_page (line 617) | def judge_page(self, line):
    method _get_screenshot_path (line 622) | def _get_screenshot_path(self, line):
    method judge (line 630) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_11 (line 651) | class SingleTask_calendar_LLM_11(SingleTask):
    method __init__ (line 652) | def __init__(self, args):
    method judge_page (line 660) | def judge_page(self, line):
    method _get_screenshot_path (line 665) | def _get_screenshot_path(self, line):
    method judge (line 673) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_12 (line 703) | class SingleTask_calendar_LLM_12(SingleTask):
    method __init__ (line 704) | def __init__(self, args):
    method judge_page (line 712) | def judge_page(self, line):
    method _get_screenshot_path (line 717) | def _get_screenshot_path(self, line):
    method judge (line 725) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_13 (line 759) | class SingleTask_calendar_LLM_13(SingleTask):
    method __init__ (line 760) | def __init__(self, args):
    method judge_page (line 767) | def judge_page(self, line):
    method _get_screenshot_path (line 772) | def _get_screenshot_path(self, line):
    method judge (line 780) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_14 (line 801) | class SingleTask_calendar_LLM_14(SingleTask):
    method __init__ (line 802) | def __init__(self, args):
    method judge_page (line 810) | def judge_page(self, line):
    method _get_screenshot_path (line 815) | def _get_screenshot_path(self, line):
    method judge (line 823) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_5 (line 853) | class SingleTask_calendar_LLM_5(SingleTask):
    method __init__ (line 854) | def __init__(self, args):
    method judge_page (line 862) | def judge_page(self, line):
    method _get_screenshot_path (line 867) | def _get_screenshot_path(self, line):
    method judge (line 875) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_6 (line 906) | class SingleTask_calendar_LLM_6(SingleTask):
    method __init__ (line 907) | def __init__(self, args):
    method judge_page (line 915) | def judge_page(self, line):
    method _get_screenshot_path (line 920) | def _get_screenshot_path(self, line):
    method judge (line 928) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_7 (line 957) | class SingleTask_calendar_LLM_7(SingleTask):
    method __init__ (line 958) | def __init__(self, args):
    method judge_page (line 966) | def judge_page(self, line):
    method _get_screenshot_path (line 971) | def _get_screenshot_path(self, line):
    method judge (line 979) | def judge(self, xml_compressed_tree, line):
  class SingleTask_calendar_LLM_8 (line 1008) | class SingleTask_calendar_LLM_8(SingleTask):
    method __init__ (line 1009) | def __init__(self, args):
    method judge_page (line 1017) | def judge_page(self, line):
    method _get_screenshot_path (line 1022) | def _get_screenshot_path(self, line):
    method judge (line 1030) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/cantook/cantook.py
  function extract_books_info (line 14) | def extract_books_info(xml_compressed_tree):
  function check_selected (line 24) | def check_selected(xml_compressed_tree, key_filter):
  class SingleTask_cantook_1 (line 42) | class SingleTask_cantook_1(SingleTask):
    method judge_page (line 44) | def judge_page(self, line):
    method judge (line 49) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_2 (line 62) | class SingleTask_cantook_2(SingleTask):
    method judge_page (line 64) | def judge_page(self, line):
    method judge (line 69) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_3 (line 82) | class SingleTask_cantook_3(SingleTask):
    method judge_page (line 84) | def judge_page(self, line):
    method judge (line 89) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_4 (line 102) | class SingleTask_cantook_4(SingleTask):
    method judge_page (line 104) | def judge_page(self, line):
    method judge (line 109) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_5 (line 122) | class SingleTask_cantook_5(SingleTask):
    method judge_page (line 124) | def judge_page(self, line):
    method judge (line 129) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_6 (line 142) | class SingleTask_cantook_6(SingleTask):
    method judge_page (line 144) | def judge_page(self, xml_compressed_tree):
    method judge (line 151) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_7 (line 169) | class SingleTask_cantook_7(SingleTask):
    method judge_page (line 171) | def judge_page(self, xml_compressed_tree):
    method judge (line 179) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_8 (line 197) | class SingleTask_cantook_8(SingleTask):
    method judge_page (line 199) | def judge_page(self, xml_compressed_tree):
    method judge (line 206) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_9 (line 228) | class SingleTask_cantook_9(SingleTask):
    method judge_page (line 230) | def judge_page(self, xml_compressed_tree):
    method judge (line 237) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_10 (line 259) | class SingleTask_cantook_10(SingleTask):
    method judge_page (line 261) | def judge_page(self, xml_compressed_tree):
    method judge (line 268) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_11 (line 289) | class SingleTask_cantook_11(SingleTask):
    method judge_page (line 292) | def judge_page(self, xml_compressed_tree):
    method judge (line 299) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_12 (line 325) | class SingleTask_cantook_12(SingleTask):
    method judge_page (line 327) | def judge_page(self, xml_compressed_tree):
    method judge (line 330) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_1 (line 352) | class SingleTask_cantook_LLM_1(SingleTask):
    method __init__ (line 353) | def __init__(self, args):
    method judge_page (line 360) | def judge_page(self, line):
    method judge (line 365) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_2 (line 387) | class SingleTask_cantook_LLM_2(SingleTask):
    method __init__ (line 388) | def __init__(self, args):
    method judge_page (line 395) | def judge_page(self, line):
    method judge (line 400) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_3 (line 422) | class SingleTask_cantook_LLM_3(SingleTask):
    method __init__ (line 423) | def __init__(self, args):
    method judge_page (line 430) | def judge_page(self, line):
    method judge (line 435) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_4 (line 457) | class SingleTask_cantook_LLM_4(SingleTask):
    method __init__ (line 458) | def __init__(self, args):
    method judge_page (line 465) | def judge_page(self, line):
    method judge (line 470) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_5 (line 492) | class SingleTask_cantook_LLM_5(SingleTask):
    method __init__ (line 493) | def __init__(self, args):
    method judge_page (line 500) | def judge_page(self, line):
    method judge (line 505) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_6 (line 527) | class SingleTask_cantook_LLM_6(SingleTask):
    method __init__ (line 528) | def __init__(self, args):
    method judge_page (line 539) | def judge_page(self, line):
    method _get_screenshot_path (line 544) | def _get_screenshot_path(self, line):
    method judge (line 552) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_7 (line 588) | class SingleTask_cantook_LLM_7(SingleTask):
    method __init__ (line 589) | def __init__(self, args):
    method judge_page (line 600) | def judge_page(self, line):
    method _get_screenshot_path (line 605) | def _get_screenshot_path(self, line):
    method judge (line 613) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_8 (line 649) | class SingleTask_cantook_LLM_8(SingleTask):
    method __init__ (line 650) | def __init__(self, args):
    method judge_page (line 662) | def judge_page(self, line):
    method _get_screenshot_path (line 667) | def _get_screenshot_path(self, line):
    method judge (line 675) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_9 (line 716) | class SingleTask_cantook_LLM_9(SingleTask):
    method __init__ (line 717) | def __init__(self, args):
    method judge_page (line 729) | def judge_page(self, line):
    method _get_screenshot_path (line 734) | def _get_screenshot_path(self, line):
    method judge (line 742) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_10 (line 783) | class SingleTask_cantook_LLM_10(SingleTask):
    method __init__ (line 784) | def __init__(self, args):
    method judge_page (line 795) | def judge_page(self, line):
    method _get_screenshot_path (line 800) | def _get_screenshot_path(self, line):
    method judge (line 808) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_11 (line 844) | class SingleTask_cantook_LLM_11(SingleTask):
    method __init__ (line 845) | def __init__(self, args):
    method judge_page (line 859) | def judge_page(self, line):
    method _get_screenshot_path (line 864) | def _get_screenshot_path(self, line):
    method judge (line 872) | def judge(self, xml_compressed_tree, line):
  class SingleTask_cantook_LLM_12 (line 919) | class SingleTask_cantook_LLM_12(SingleTask):
    method __init__ (line 920) | def __init__(self, args):
    method judge_page (line 932) | def judge_page(self, line):
    method _get_screenshot_path (line 937) | def _get_screenshot_path(self, line):
    method judge (line 945) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/chrome/chrome.py
  class SingleTask_Chrome_LLM_1 (line 10) | class SingleTask_Chrome_LLM_1(SingleTask):
    method __init__ (line 11) | def __init__(self, args):
    method judge_page (line 24) | def judge_page(self, line):
    method _get_text_content (line 29) | def _get_text_content(self, line):
    method judge (line 34) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Chrome_LLM_2 (line 48) | class SingleTask_Chrome_LLM_2(SingleTask):
    method __init__ (line 49) | def __init__(self, args):
    method judge_page (line 57) | def judge_page(self, line):
    method _get_screenshot_path (line 62) | def _get_screenshot_path(self, line):
    method judge (line 70) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Chrome_LLM_3 (line 84) | class SingleTask_Chrome_LLM_3(SingleTask):
    method __init__ (line 85) | def __init__(self, args):
    method judge_page (line 94) | def judge_page(self, line):
    method _get_text_content (line 99) | def _get_text_content(self, line):
    method judge (line 104) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Chrome_LLM_4 (line 118) | class SingleTask_Chrome_LLM_4(SingleTask):
    method __init__ (line 119) | def __init__(self, args):
    method judge_page (line 128) | def judge_page(self, line):
    method _get_screenshot_path (line 133) | def _get_screenshot_path(self, line):
    method judge (line 141) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Chrome_LLM_5 (line 155) | class SingleTask_Chrome_LLM_5(SingleTask):
    method __init__ (line 156) | def __init__(self, args):
    method judge_page (line 164) | def judge_page(self, line):
    method _get_screenshot_path (line 169) | def _get_screenshot_path(self, line):
    method judge (line 177) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Chrome_LLM_6 (line 191) | class SingleTask_Chrome_LLM_6(SingleTask):
    method __init__ (line 192) | def __init__(self, args):
    method judge_page (line 200) | def judge_page(self, line):
    method _get_screenshot_path (line 205) | def _get_screenshot_path(self, line):
    method judge (line 213) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Chrome_LLM_7 (line 227) | class SingleTask_Chrome_LLM_7(SingleTask):
    method __init__ (line 228) | def __init__(self, args):
    method judge_page (line 237) | def judge_page(self, line):
    method _get_screenshot_path (line 242) | def _get_screenshot_path(self, line):
    method judge (line 250) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/clock/clock.py
  function extract_alarms (line 14) | def extract_alarms(data):
  class SingleTask_Clock_General (line 106) | class SingleTask_Clock_General(SingleTask):
    method split_string (line 107) | def split_string(self, str, splitter):
  class SingleTask_Clock_1 (line 114) | class SingleTask_Clock_1(SingleTask_Clock_General):
    method judge (line 116) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_2 (line 161) | class SingleTask_Clock_2(SingleTask_Clock_General):
    method judge_page (line 163) | def judge_page(self, xml_compressed_tree):
    method judge (line 167) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_3 (line 211) | class SingleTask_Clock_3(SingleTask_Clock_General):
    method judge (line 213) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_4 (line 250) | class SingleTask_Clock_4(SingleTask_Clock_General):
    method judge (line 252) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_5 (line 288) | class SingleTask_Clock_5(SingleTask_Clock_General):
    method judge (line 290) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_6 (line 326) | class SingleTask_Clock_6(SingleTask_Clock_General):
    method judge (line 328) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_7 (line 372) | class SingleTask_Clock_7(SingleTask_Clock_General):
    method judge (line 374) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_8 (line 399) | class SingleTask_Clock_8(SingleTask_Clock_General):
    method get_time (line 401) | def get_time(self, str):
    method judge (line 410) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_9 (line 436) | class SingleTask_Clock_9(SingleTask_Clock_General):
    method judge (line 438) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_10 (line 463) | class SingleTask_Clock_10(SingleTask_Clock_General):
    method judge (line 465) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_11 (line 479) | class SingleTask_Clock_11(SingleTask_Clock_General):
    method judge (line 481) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_12 (line 495) | class SingleTask_Clock_12(SingleTask_Clock_General):
    method judge (line 497) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_13 (line 511) | class SingleTask_Clock_13(SingleTask_Clock_General):
    method judge (line 513) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_14 (line 527) | class SingleTask_Clock_14(SingleTask_Clock_General):
    method judge (line 529) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_15 (line 543) | class SingleTask_Clock_15(SingleTask_Clock_General):
    method judge (line 545) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_16 (line 562) | class SingleTask_Clock_16(SingleTask_Clock_General):
    method judge (line 564) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_17 (line 578) | class SingleTask_Clock_17(SingleTask_Clock_General):
    method judge (line 580) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_18 (line 601) | class SingleTask_Clock_18(SingleTask_Clock_General):
    method judge (line 603) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_19 (line 627) | class SingleTask_Clock_19(SingleTask_Clock_General):
    method judge (line 629) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_20 (line 665) | class SingleTask_Clock_20(SingleTask_Clock_General):
    method judge (line 667) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_21 (line 688) | class SingleTask_Clock_21(SingleTask_Clock_General):
    method judge_page (line 690) | def judge_page(self, xml_compressed_tree):
    method judge (line 697) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_22 (line 716) | class SingleTask_Clock_22(SingleTask_Clock_General):
    method judge (line 718) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_23 (line 732) | class SingleTask_Clock_23(SingleTask_Clock_General):
    method judge (line 734) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_24 (line 753) | class SingleTask_Clock_24(SingleTask_Clock_General):
    method judge (line 755) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_25 (line 775) | class SingleTask_Clock_25(SingleTask_Clock_General):
    method judge (line 777) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_26 (line 794) | class SingleTask_Clock_26(SingleTask_Clock_General):
    method judge (line 796) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_27 (line 818) | class SingleTask_Clock_27(SingleTask_Clock_General):
    method judge (line 820) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_1 (line 841) | class SingleTask_Clock_LLM_1(SingleTask):
    method __init__ (line 842) | def __init__(self, args):
    method judge_page (line 851) | def judge_page(self, line):
    method _get_screenshot_path (line 856) | def _get_screenshot_path(self, line):
    method judge (line 864) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_2 (line 916) | class SingleTask_Clock_LLM_2(SingleTask):
    method __init__ (line 917) | def __init__(self, args):
    method judge_page (line 927) | def judge_page(self, line):
    method _get_screenshot_path (line 932) | def _get_screenshot_path(self, line):
    method judge (line 940) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_3 (line 1002) | class SingleTask_Clock_LLM_3(SingleTask):
    method __init__ (line 1003) | def __init__(self, args):
    method judge_page (line 1012) | def judge_page(self, line):
    method _get_screenshot_path (line 1017) | def _get_screenshot_path(self, line):
    method judge (line 1025) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_4 (line 1079) | class SingleTask_Clock_LLM_4(SingleTask):
    method __init__ (line 1080) | def __init__(self, args):
    method judge_page (line 1089) | def judge_page(self, line):
    method _get_screenshot_path (line 1094) | def _get_screenshot_path(self, line):
    method judge (line 1102) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_5 (line 1156) | class SingleTask_Clock_LLM_5(SingleTask):
    method __init__ (line 1157) | def __init__(self, args):
    method judge_page (line 1166) | def judge_page(self, line):
    method _get_screenshot_path (line 1171) | def _get_screenshot_path(self, line):
    method judge (line 1179) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_6 (line 1233) | class SingleTask_Clock_LLM_6(SingleTask):
    method __init__ (line 1234) | def __init__(self, args):
    method judge_page (line 1244) | def judge_page(self, line):
    method _get_screenshot_path (line 1249) | def _get_screenshot_path(self, line):
    method judge (line 1257) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_7 (line 1319) | class SingleTask_Clock_LLM_7(SingleTask):
    method __init__ (line 1320) | def __init__(self, args):
    method judge_page (line 1327) | def judge_page(self, line):
    method _get_screenshot_path (line 1332) | def _get_screenshot_path(self, line):
    method judge (line 1340) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_8 (line 1380) | class SingleTask_Clock_LLM_8(SingleTask):
    method __init__ (line 1381) | def __init__(self, args):
    method judge_page (line 1388) | def judge_page(self, line):
    method _get_screenshot_path (line 1393) | def _get_screenshot_path(self, line):
    method judge (line 1401) | def judge(self, xml_compressed_tree, line):
    method get_time (line 1442) | def get_time(self, str):
  class SingleTask_Clock_LLM_9 (line 1453) | class SingleTask_Clock_LLM_9(SingleTask):
    method __init__ (line 1454) | def __init__(self, args):
    method judge_page (line 1461) | def judge_page(self, line):
    method _get_screenshot_path (line 1466) | def _get_screenshot_path(self, line):
    method judge (line 1474) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_10 (line 1515) | class SingleTask_Clock_LLM_10(SingleTask):
    method __init__ (line 1516) | def __init__(self, args):
    method judge_page (line 1523) | def judge_page(self, line):
    method judge (line 1528) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_11 (line 1550) | class SingleTask_Clock_LLM_11(SingleTask):
    method __init__ (line 1551) | def __init__(self, args):
    method judge_page (line 1558) | def judge_page(self, line):
    method judge (line 1563) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_12 (line 1585) | class SingleTask_Clock_LLM_12(SingleTask):
    method __init__ (line 1586) | def __init__(self, args):
    method judge_page (line 1593) | def judge_page(self, line):
    method judge (line 1598) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_13 (line 1620) | class SingleTask_Clock_LLM_13(SingleTask):
    method __init__ (line 1621) | def __init__(self, args):
    method judge_page (line 1628) | def judge_page(self, line):
    method judge (line 1633) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_14 (line 1655) | class SingleTask_Clock_LLM_14(SingleTask):
    method __init__ (line 1656) | def __init__(self, args):
    method judge_page (line 1663) | def judge_page(self, line):
    method judge (line 1668) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_15 (line 1690) | class SingleTask_Clock_LLM_15(SingleTask):
    method __init__ (line 1691) | def __init__(self, args):
    method judge_page (line 1699) | def judge_page(self, line):
    method _get_screenshot_path (line 1704) | def _get_screenshot_path(self, line):
    method judge (line 1712) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_16 (line 1747) | class SingleTask_Clock_LLM_16(SingleTask):
    method __init__ (line 1748) | def __init__(self, args):
    method judge_page (line 1755) | def judge_page(self, line):
    method judge (line 1760) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_17 (line 1782) | class SingleTask_Clock_LLM_17(SingleTask):
    method __init__ (line 1783) | def __init__(self, args):
    method judge_page (line 1790) | def judge_page(self, line):
    method _get_screenshot_path (line 1795) | def _get_screenshot_path(self, line):
    method judge (line 1803) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_18 (line 1841) | class SingleTask_Clock_LLM_18(SingleTask):
    method __init__ (line 1842) | def __init__(self, args):
    method judge_page (line 1851) | def judge_page(self, line):
    method _get_screenshot_path (line 1856) | def _get_screenshot_path(self, line):
    method judge (line 1864) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_19 (line 1908) | class SingleTask_Clock_LLM_19(SingleTask):
    method __init__ (line 1909) | def __init__(self, args):
    method judge_page (line 1917) | def judge_page(self, line):
    method _get_screenshot_path (line 1922) | def _get_screenshot_path(self, line):
    method judge (line 1930) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_20 (line 1984) | class SingleTask_Clock_LLM_20(SingleTask):
    method __init__ (line 1985) | def __init__(self, args):
    method judge_page (line 1992) | def judge_page(self, line):
    method _get_screenshot_path (line 1997) | def _get_screenshot_path(self, line):
    method judge (line 2005) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_21 (line 2042) | class SingleTask_Clock_LLM_21(SingleTask):
    method __init__ (line 2043) | def __init__(self, args):
    method judge_page (line 2050) | def judge_page(self, xml_compressed_tree):
    method _get_screenshot_path (line 2057) | def _get_screenshot_path(self, line):
    method judge (line 2065) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_22 (line 2100) | class SingleTask_Clock_LLM_22(SingleTask):
    method __init__ (line 2101) | def __init__(self, args):
    method judge_page (line 2108) | def judge_page(self, line):
    method _get_screenshot_path (line 2113) | def _get_screenshot_path(self, line):
    method judge (line 2121) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_23 (line 2151) | class SingleTask_Clock_LLM_23(SingleTask):
    method __init__ (line 2152) | def __init__(self, args):
    method judge_page (line 2159) | def judge_page(self, line):
    method _get_screenshot_path (line 2164) | def _get_screenshot_path(self, line):
    method judge (line 2172) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_24 (line 2207) | class SingleTask_Clock_LLM_24(SingleTask):
    method __init__ (line 2208) | def __init__(self, args):
    method judge_page (line 2215) | def judge_page(self, line):
    method _get_screenshot_path (line 2220) | def _get_screenshot_path(self, line):
    method judge (line 2228) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_25 (line 2264) | class SingleTask_Clock_LLM_25(SingleTask):
    method __init__ (line 2265) | def __init__(self, args):
    method judge_page (line 2272) | def judge_page(self, line):
    method _get_screenshot_path (line 2277) | def _get_screenshot_path(self, line):
    method judge (line 2285) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_26 (line 2319) | class SingleTask_Clock_LLM_26(SingleTask):
    method __init__ (line 2320) | def __init__(self, args):
    method judge_page (line 2327) | def judge_page(self, line):
    method _get_screenshot_path (line 2332) | def _get_screenshot_path(self, line):
    method judge (line 2340) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Clock_LLM_27 (line 2378) | class SingleTask_Clock_LLM_27(SingleTask):
    method __init__ (line 2379) | def __init__(self, args):
    method judge_page (line 2386) | def judge_page(self, line):
    method _get_screenshot_path (line 2391) | def _get_screenshot_path(self, line):
    method judge (line 2399) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/contacts/contacts.py
  class SingleTask_Contacts_1 (line 11) | class SingleTask_Contacts_1(SingleTask):
    method judge_page (line 13) | def judge_page(self, xml_compressed_tree):
    method judge (line 21) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_2 (line 46) | class SingleTask_Contacts_2(SingleTask):
    method judge_page (line 48) | def judge_page(self, xml_compressed_tree):
    method judge (line 55) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_3 (line 77) | class SingleTask_Contacts_3(SingleTask):
    method judge_page (line 79) | def judge_page(self, xml_compressed_tree):
    method judge (line 86) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_4 (line 108) | class SingleTask_Contacts_4(SingleTask):
    method judge_page (line 110) | def judge_page(self, xml_compressed_tree):
    method judge (line 117) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_5 (line 136) | class SingleTask_Contacts_5(SingleTask):
    method judge_page (line 138) | def judge_page(self, xml_compressed_tree):
    method judge (line 145) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_6 (line 177) | class SingleTask_Contacts_6(SingleTask):
    method judge_page (line 179) | def judge_page(self, xml_compressed_tree):
    method judge (line 186) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_7 (line 200) | class SingleTask_Contacts_7(SingleTask):
    method judge_page (line 202) | def judge_page(self, xml_compressed_tree):
    method judge (line 209) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_8 (line 228) | class SingleTask_Contacts_8(SingleTask):
    method judge_page (line 230) | def judge_page(self, xml_compressed_tree):
    method judge (line 237) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_9 (line 251) | class SingleTask_Contacts_9(SingleTask):
    method judge_page (line 253) | def judge_page(self, xml_compressed_tree):
    method judge (line 258) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_10 (line 278) | class SingleTask_Contacts_10(SingleTask):
    method judge_page (line 280) | def judge_page(self, xml_compressed_tree):
    method judge (line 287) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_11 (line 294) | class SingleTask_Contacts_11(SingleTask):
    method judge_page (line 296) | def judge_page(self, xml_compressed_tree):
    method judge (line 301) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_12 (line 310) | class SingleTask_Contacts_12(SingleTask):
    method judge (line 312) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_13 (line 325) | class SingleTask_Contacts_13(SingleTask):
    method judge (line 327) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_14 (line 340) | class SingleTask_Contacts_14(SingleTask):
    method judge (line 342) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_15 (line 355) | class SingleTask_Contacts_15(SingleTask):
    method judge (line 357) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_16 (line 370) | class SingleTask_Contacts_16(SingleTask):
    method judge_page (line 372) | def judge_page(self, xml_compressed_tree):
    method judge (line 377) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_1 (line 387) | class SingleTask_Contacts_LLM_1(SingleTask):
    method __init__ (line 388) | def __init__(self, args):
    method judge_page (line 396) | def judge_page(self, line):
    method _get_screenshot_path (line 401) | def _get_screenshot_path(self, line):
    method judge (line 409) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_2 (line 431) | class SingleTask_Contacts_LLM_2(SingleTask):
    method __init__ (line 432) | def __init__(self, args):
    method judge_page (line 440) | def judge_page(self, line):
    method _get_screenshot_path (line 445) | def _get_screenshot_path(self, line):
    method judge (line 453) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_3 (line 476) | class SingleTask_Contacts_LLM_3(SingleTask):
    method __init__ (line 477) | def __init__(self, args):
    method judge_page (line 485) | def judge_page(self, line):
    method _get_screenshot_path (line 490) | def _get_screenshot_path(self, line):
    method judge (line 498) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_4 (line 521) | class SingleTask_Contacts_LLM_4(SingleTask):
    method __init__ (line 522) | def __init__(self, args):
    method judge_page (line 529) | def judge_page(self, line):
    method _get_screenshot_path (line 534) | def _get_screenshot_path(self, line):
    method judge (line 542) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_5 (line 564) | class SingleTask_Contacts_LLM_5(SingleTask):
    method __init__ (line 565) | def __init__(self, args):
    method judge_page (line 573) | def judge_page(self, line):
    method _get_screenshot_path (line 578) | def _get_screenshot_path(self, line):
    method judge (line 586) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_6 (line 609) | class SingleTask_Contacts_LLM_6(SingleTask):
    method __init__ (line 610) | def __init__(self, args):
    method judge_page (line 617) | def judge_page(self, line):
    method _get_screenshot_path (line 622) | def _get_screenshot_path(self, line):
    method judge (line 630) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_7 (line 652) | class SingleTask_Contacts_LLM_7(SingleTask):
    method __init__ (line 653) | def __init__(self, args):
    method judge_page (line 660) | def judge_page(self, line):
    method _get_screenshot_path (line 665) | def _get_screenshot_path(self, line):
    method judge (line 673) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_8 (line 695) | class SingleTask_Contacts_LLM_8(SingleTask):
    method __init__ (line 696) | def __init__(self, args):
    method judge_page (line 703) | def judge_page(self, line):
    method _get_screenshot_path (line 708) | def _get_screenshot_path(self, line):
    method judge (line 716) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_9 (line 738) | class SingleTask_Contacts_LLM_9(SingleTask):
    method __init__ (line 739) | def __init__(self, args):
    method judge_page (line 747) | def judge_page(self, line):
    method _get_screenshot_path (line 752) | def _get_screenshot_path(self, line):
    method judge (line 760) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_10 (line 783) | class SingleTask_Contacts_LLM_10(SingleTask):
    method __init__ (line 784) | def __init__(self, args):
    method judge_page (line 791) | def judge_page(self, line):
    method _get_screenshot_path (line 796) | def _get_screenshot_path(self, line):
    method judge (line 804) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_11 (line 826) | class SingleTask_Contacts_LLM_11(SingleTask):
    method __init__ (line 827) | def __init__(self, args):
    method judge_page (line 834) | def judge_page(self, line):
    method _get_screenshot_path (line 839) | def _get_screenshot_path(self, line):
    method judge (line 847) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_12 (line 869) | class SingleTask_Contacts_LLM_12(SingleTask):
    method __init__ (line 870) | def __init__(self, args):
    method judge_page (line 877) | def judge_page(self, line):
    method judge (line 882) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_13 (line 903) | class SingleTask_Contacts_LLM_13(SingleTask):
    method __init__ (line 904) | def __init__(self, args):
    method judge_page (line 911) | def judge_page(self, line):
    method judge (line 916) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_14 (line 937) | class SingleTask_Contacts_LLM_14(SingleTask):
    method __init__ (line 938) | def __init__(self, args):
    method judge_page (line 945) | def judge_page(self, line):
    method judge (line 950) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_15 (line 971) | class SingleTask_Contacts_LLM_15(SingleTask):
    method __init__ (line 972) | def __init__(self, args):
    method judge_page (line 979) | def judge_page(self, line):
    method judge (line 984) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Contacts_LLM_16 (line 1005) | class SingleTask_Contacts_LLM_16(SingleTask):
    method __init__ (line 1006) | def __init__(self, args):
    method judge_page (line 1013) | def judge_page(self, line):
    method _get_screenshot_path (line 1018) | def _get_screenshot_path(self, line):
    method judge (line 1026) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/gmail/gmail.py
  class SingleTask_Gmail_LLM_1 (line 11) | class SingleTask_Gmail_LLM_1(SingleTask):
    method __init__ (line 12) | def __init__(self, args):
    method judge_page (line 22) | def judge_page(self, line):
    method _get_screenshot_path (line 27) | def _get_screenshot_path(self, line):
    method judge (line 35) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Gmail_LLM_2 (line 58) | class SingleTask_Gmail_LLM_2(SingleTask):
    method __init__ (line 59) | def __init__(self, args):
    method judge_page (line 68) | def judge_page(self, line):
    method _get_screenshot_path (line 73) | def _get_screenshot_path(self, line):
    method judge (line 81) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Gmail_LLM_3 (line 103) | class SingleTask_Gmail_LLM_3(SingleTask):
    method __init__ (line 104) | def __init__(self, args):
    method judge_page (line 112) | def judge_page(self, line):
    method _get_text_content (line 117) | def _get_text_content(self, line):
    method judge (line 122) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Gmail_LLM_4 (line 142) | class SingleTask_Gmail_LLM_4(SingleTask):
    method __init__ (line 143) | def __init__(self, args):
    method judge_page (line 153) | def judge_page(self, line):
    method _get_screenshot_path (line 158) | def _get_screenshot_path(self, line):
    method judge (line 166) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Gmail_LLM_5 (line 186) | class SingleTask_Gmail_LLM_5(SingleTask):
    method __init__ (line 187) | def __init__(self, args):
    method judge_page (line 195) | def judge_page(self, line):
    method _get_screenshot_path (line 200) | def _get_screenshot_path(self, line):
    method judge (line 208) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Gmail_LLM_6 (line 228) | class SingleTask_Gmail_LLM_6(SingleTask):
    method __init__ (line 229) | def __init__(self, args):
    method judge_page (line 237) | def judge_page(self, line):
    method _get_text_content (line 242) | def _get_text_content(self, line):
    method judge (line 247) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Gmail_LLM_7 (line 270) | class SingleTask_Gmail_LLM_7(SingleTask):
    method __init__ (line 271) | def __init__(self, args):
    method judge_page (line 279) | def judge_page(self, line):
    method _get_text_content (line 284) | def _get_text_content(self, line):
    method judge (line 289) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/llm_evaluator.py
  class LLMEvaluator (line 9) | class LLMEvaluator:
    method __init__ (line 10) | def __init__(self, api_key: str = "API_KEY"):
    method encode_image (line 18) | def encode_image(self, image_path: str) -> str:
    method _post (line 22) | def _post(self, payload: Dict[str, Any]) -> Dict[str, Any]:
    method _extract_message_content (line 34) | def _extract_message_content(self, response_json: Dict[str, Any]) -> str:
    method _parse_json_content (line 41) | def _parse_json_content(self, content: str) -> Dict[str, Any]:
    method analyze_text (line 50) | def analyze_text(self, text: str, task_prompt: str) -> Dict[str, Any]:
    method analyze_screenshot (line 67) | def analyze_screenshot(self, image_path: str, task_prompt: str) -> Dic...

FILE: evaluation/tasks/map_me/map.py
  class SingleTask_Mapme_1 (line 9) | class SingleTask_Mapme_1(SingleTask):
    method judge (line 11) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_2 (line 24) | class SingleTask_Mapme_2(SingleTask):
    method judge (line 26) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_3 (line 39) | class SingleTask_Mapme_3(SingleTask):
    method judge (line 41) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_4 (line 54) | class SingleTask_Mapme_4(SingleTask):
    method judge (line 56) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_5 (line 69) | class SingleTask_Mapme_5(SingleTask):
    method judge (line 71) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_6 (line 84) | class SingleTask_Mapme_6(SingleTask):
    method judge (line 86) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_7 (line 99) | class SingleTask_Mapme_7(SingleTask):
    method judge (line 101) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_8 (line 114) | class SingleTask_Mapme_8(SingleTask):
    method judge (line 116) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_9 (line 129) | class SingleTask_Mapme_9(SingleTask):
    method judge (line 131) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_10 (line 144) | class SingleTask_Mapme_10(SingleTask):
    method judge (line 146) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_11 (line 159) | class SingleTask_Mapme_11(SingleTask):
    method judge_page (line 161) | def judge_page(self, xml_compressed_tree):
    method judge (line 171) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_12 (line 189) | class SingleTask_Mapme_12(SingleTask):
    method judge_page (line 191) | def judge_page(self, xml_compressed_tree):
    method judge (line 201) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_13 (line 208) | class SingleTask_Mapme_13(SingleTask):
    method judge_page (line 210) | def judge_page(self, xml_compressed_tree):
    method judge (line 220) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_14 (line 227) | class SingleTask_Mapme_14(SingleTask):
    method judge_page (line 229) | def judge_page(self, xml_compressed_tree):
    method judge (line 239) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_15 (line 246) | class SingleTask_Mapme_15(SingleTask):
    method judge_page (line 248) | def judge_page(self, xml_compressed_tree):
    method judge (line 258) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_1 (line 265) | class SingleTask_Mapme_LLM_1(SingleTask):
    method __init__ (line 266) | def __init__(self, args):
    method judge_page (line 273) | def judge_page(self, line):
    method _get_text_content (line 278) | def _get_text_content(self, line):
    method judge (line 283) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_2 (line 311) | class SingleTask_Mapme_LLM_2(SingleTask):
    method __init__ (line 312) | def __init__(self, args):
    method judge_page (line 319) | def judge_page(self, line):
    method _get_text_content (line 324) | def _get_text_content(self, line):
    method judge (line 329) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_3 (line 357) | class SingleTask_Mapme_LLM_3(SingleTask):
    method __init__ (line 358) | def __init__(self, args):
    method judge_page (line 365) | def judge_page(self, line):
    method _get_text_content (line 370) | def _get_text_content(self, line):
    method judge (line 375) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_4 (line 403) | class SingleTask_Mapme_LLM_4(SingleTask):
    method __init__ (line 404) | def __init__(self, args):
    method judge_page (line 411) | def judge_page(self, line):
    method _get_text_content (line 416) | def _get_text_content(self, line):
    method judge (line 421) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_5 (line 449) | class SingleTask_Mapme_LLM_5(SingleTask):
    method __init__ (line 450) | def __init__(self, args):
    method judge_page (line 457) | def judge_page(self, line):
    method _get_text_content (line 462) | def _get_text_content(self, line):
    method judge (line 467) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_6 (line 495) | class SingleTask_Mapme_LLM_6(SingleTask):
    method __init__ (line 496) | def __init__(self, args):
    method judge_page (line 503) | def judge_page(self, line):
    method _get_text_content (line 508) | def _get_text_content(self, line):
    method judge (line 513) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_7 (line 541) | class SingleTask_Mapme_LLM_7(SingleTask):
    method __init__ (line 542) | def __init__(self, args):
    method judge_page (line 549) | def judge_page(self, line):
    method _get_text_content (line 554) | def _get_text_content(self, line):
    method judge (line 559) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_8 (line 587) | class SingleTask_Mapme_LLM_8(SingleTask):
    method __init__ (line 588) | def __init__(self, args):
    method judge_page (line 595) | def judge_page(self, line):
    method _get_text_content (line 600) | def _get_text_content(self, line):
    method judge (line 605) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_9 (line 633) | class SingleTask_Mapme_LLM_9(SingleTask):
    method __init__ (line 634) | def __init__(self, args):
    method judge_page (line 641) | def judge_page(self, line):
    method _get_text_content (line 646) | def _get_text_content(self, line):
    method judge (line 651) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_10 (line 679) | class SingleTask_Mapme_LLM_10(SingleTask):
    method __init__ (line 680) | def __init__(self, args):
    method judge_page (line 687) | def judge_page(self, line):
    method _get_text_content (line 692) | def _get_text_content(self, line):
    method judge (line 697) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_11 (line 725) | class SingleTask_Mapme_LLM_11(SingleTask):
    method __init__ (line 726) | def __init__(self, args):
    method judge_page (line 733) | def judge_page(self, line):
    method _get_screenshot_path (line 738) | def _get_screenshot_path(self, line):
    method judge (line 746) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_12 (line 779) | class SingleTask_Mapme_LLM_12(SingleTask):
    method __init__ (line 780) | def __init__(self, args):
    method judge_page (line 787) | def judge_page(self, line):
    method _get_screenshot_path (line 792) | def _get_screenshot_path(self, line):
    method judge (line 800) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_13 (line 828) | class SingleTask_Mapme_LLM_13(SingleTask):
    method __init__ (line 829) | def __init__(self, args):
    method judge_page (line 836) | def judge_page(self, line):
    method _get_screenshot_path (line 841) | def _get_screenshot_path(self, line):
    method judge (line 849) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_14 (line 877) | class SingleTask_Mapme_LLM_14(SingleTask):
    method __init__ (line 878) | def __init__(self, args):
    method judge_page (line 885) | def judge_page(self, line):
    method _get_screenshot_path (line 890) | def _get_screenshot_path(self, line):
    method judge (line 898) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Mapme_LLM_15 (line 926) | class SingleTask_Mapme_LLM_15(SingleTask):
    method __init__ (line 927) | def __init__(self, args):
    method judge_page (line 934) | def judge_page(self, line):
    method _get_screenshot_path (line 939) | def _get_screenshot_path(self, line):
    method judge (line 947) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/pimusic/pimusic.py
  function extract_songs (line 17) | def extract_songs(xml_compressed_tree) -> List[Dict]:
  function parse_duration (line 45) | def parse_duration(duration):
  function extract_info (line 55) | def extract_info(xml_compressed_tree):
  function check_selected (line 65) | def check_selected(xml_compressed_tree, key_filter):
  class SingleTask_pimusic_1 (line 83) | class SingleTask_pimusic_1(SingleTask):
    method judge_page (line 85) | def judge_page(self, line):
    method judge (line 90) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_2 (line 103) | class SingleTask_pimusic_2(SingleTask):
    method judge_page (line 105) | def judge_page(self, line):
    method judge (line 110) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_3 (line 123) | class SingleTask_pimusic_3(SingleTask):
    method judge_page (line 125) | def judge_page(self, line):
    method judge (line 130) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_4 (line 143) | class SingleTask_pimusic_4(SingleTask):
    method judge_page (line 145) | def judge_page(self, line):
    method judge (line 150) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_5 (line 163) | class SingleTask_pimusic_5(SingleTask):
    method judge_page (line 165) | def judge_page(self, line):
    method judge (line 170) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_6 (line 183) | class SingleTask_pimusic_6(SingleTask):
    method judge (line 185) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_7 (line 198) | class SingleTask_pimusic_7(SingleTask):
    method judge_page (line 202) | def judge_page(self, xml_compressed_tree):
    method judge (line 209) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_8 (line 237) | class SingleTask_pimusic_8(SingleTask):
    method judge_page (line 241) | def judge_page(self, xml_compressed_tree):
    method judge (line 248) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_9 (line 283) | class SingleTask_pimusic_9(SingleTask):
    method judge_page (line 285) | def judge_page(self, xml_compressed_tree):
    method judge (line 288) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_10 (line 309) | class SingleTask_pimusic_10(SingleTask):
    method judge_page (line 311) | def judge_page(self, xml_compressed_tree):
    method judge (line 318) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_11 (line 334) | class SingleTask_pimusic_11(SingleTask):
    method judge_page (line 336) | def judge_page(self, xml_compressed_tree):
    method judge (line 343) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_12 (line 359) | class SingleTask_pimusic_12(SingleTask):
    method judge_page (line 362) | def judge_page(self, xml_compressed_tree):
    method judge (line 365) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_1 (line 390) | class SingleTask_pimusic_LLM_1(SingleTask):
    method __init__ (line 391) | def __init__(self, args):
    method judge_page (line 399) | def judge_page(self, line):
    method judge (line 404) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_2 (line 426) | class SingleTask_pimusic_LLM_2(SingleTask):
    method __init__ (line 427) | def __init__(self, args):
    method judge_page (line 435) | def judge_page(self, line):
    method judge (line 440) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_3 (line 462) | class SingleTask_pimusic_LLM_3(SingleTask):
    method __init__ (line 463) | def __init__(self, args):
    method judge_page (line 471) | def judge_page(self, line):
    method judge (line 476) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_4 (line 498) | class SingleTask_pimusic_LLM_4(SingleTask):
    method __init__ (line 499) | def __init__(self, args):
    method judge_page (line 507) | def judge_page(self, line):
    method judge (line 512) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_5 (line 534) | class SingleTask_pimusic_LLM_5(SingleTask):
    method __init__ (line 535) | def __init__(self, args):
    method judge_page (line 543) | def judge_page(self, line):
    method judge (line 548) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_6 (line 570) | class SingleTask_pimusic_LLM_6(SingleTask):
    method __init__ (line 571) | def __init__(self, args):
    method judge_page (line 579) | def judge_page(self, line):
    method judge (line 584) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_7 (line 606) | class SingleTask_pimusic_LLM_7(SingleTask):
    method __init__ (line 607) | def __init__(self, args):
    method judge_page (line 614) | def judge_page(self, line):
    method _get_screenshot_path (line 619) | def _get_screenshot_path(self, line):
    method judge (line 627) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_8 (line 659) | class SingleTask_pimusic_LLM_8(SingleTask):
    method __init__ (line 660) | def __init__(self, args):
    method judge_page (line 667) | def judge_page(self, line):
    method _get_screenshot_path (line 672) | def _get_screenshot_path(self, line):
    method judge (line 680) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_9 (line 716) | class SingleTask_pimusic_LLM_9(SingleTask):
    method __init__ (line 717) | def __init__(self, args):
    method judge_page (line 725) | def judge_page(self, line):
    method _get_screenshot_path (line 730) | def _get_screenshot_path(self, line):
    method judge (line 738) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_10 (line 776) | class SingleTask_pimusic_LLM_10(SingleTask):
    method __init__ (line 777) | def __init__(self, args):
    method judge_page (line 785) | def judge_page(self, line):
    method _get_screenshot_path (line 790) | def _get_screenshot_path(self, line):
    method judge (line 798) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_11 (line 830) | class SingleTask_pimusic_LLM_11(SingleTask):
    method __init__ (line 831) | def __init__(self, args):
    method judge_page (line 838) | def judge_page(self, line):
    method _get_screenshot_path (line 843) | def _get_screenshot_path(self, line):
    method judge (line 851) | def judge(self, xml_compressed_tree, line):
  class SingleTask_pimusic_LLM_12 (line 883) | class SingleTask_pimusic_LLM_12(SingleTask):
    method __init__ (line 884) | def __init__(self, args):
    method judge_page (line 891) | def judge_page(self, line):
    method _get_screenshot_path (line 896) | def _get_screenshot_path(self, line):
    method judge (line 904) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/reddit/reddit.py
  class SingleTask_Reddit_LLM_1 (line 10) | class SingleTask_Reddit_LLM_1(SingleTask):
    method __init__ (line 11) | def __init__(self, args):
    method judge_page (line 20) | def judge_page(self, line):
    method _get_screenshot_path (line 25) | def _get_screenshot_path(self, line):
    method judge (line 33) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Reddit_LLM_2 (line 50) | class SingleTask_Reddit_LLM_2(SingleTask):
    method __init__ (line 51) | def __init__(self, args):
    method judge_page (line 59) | def judge_page(self, line):
    method _get_screenshot_path (line 64) | def _get_screenshot_path(self, line):
    method judge (line 72) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Reddit_LLM_3 (line 89) | class SingleTask_Reddit_LLM_3(SingleTask):
    method __init__ (line 90) | def __init__(self, args):
    method judge_page (line 100) | def judge_page(self, line):
    method _get_screenshot_path (line 105) | def _get_screenshot_path(self, line):
    method judge (line 113) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Reddit_LLM_4 (line 133) | class SingleTask_Reddit_LLM_4(SingleTask):
    method __init__ (line 134) | def __init__(self, args):
    method judge_page (line 144) | def judge_page(self, line):
    method _get_screenshot_path (line 149) | def _get_screenshot_path(self, line):
    method judge (line 157) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Reddit_LLM_5 (line 177) | class SingleTask_Reddit_LLM_5(SingleTask):
    method __init__ (line 178) | def __init__(self, args):
    method judge_page (line 187) | def judge_page(self, line):
    method _get_screenshot_path (line 192) | def _get_screenshot_path(self, line):
    method judge (line 200) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/setting/setting.py
  class SingleTask_Setting_0 (line 9) | class SingleTask_Setting_0(SingleTask):
    method judge (line 11) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_1 (line 21) | class SingleTask_Setting_1(SingleTask):
    method judge_page (line 23) | def judge_page(self, xml_compressed_tree):
    method judge (line 30) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_2 (line 42) | class SingleTask_Setting_2(SingleTask):
    method judge_page (line 44) | def judge_page(self, xml_compressed_tree):
    method judge (line 51) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_3 (line 61) | class SingleTask_Setting_3(SingleTask):
    method judge (line 63) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_4 (line 73) | class SingleTask_Setting_4(SingleTask):
    method judge_page (line 75) | def judge_page(self, xml_compressed_tree):
    method judge (line 82) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_5 (line 92) | class SingleTask_Setting_5(SingleTask):
    method judge_page (line 94) | def judge_page(self, xml_compressed_tree):
    method judge (line 101) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_6 (line 113) | class SingleTask_Setting_6(SingleTask):
    method judge_page (line 115) | def judge_page(self, xml_compressed_tree):
    method judge (line 122) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_7 (line 136) | class SingleTask_Setting_7(SingleTask):
    method judge_page (line 138) | def judge_page(self, xml_compressed_tree):
    method setting_ch (line 145) | def setting_ch(self, xml_compressed_tree):
    method judge (line 170) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_8 (line 198) | class SingleTask_Setting_8(SingleTask):
    method judge_page (line 200) | def judge_page(self, xml_compressed_tree):
    method judge (line 207) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_9 (line 225) | class SingleTask_Setting_9(SingleTask):
    method judge (line 227) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_10 (line 239) | class SingleTask_Setting_10(SingleTask):
    method judge (line 241) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_11 (line 253) | class SingleTask_Setting_11(SingleTask):
    method judge_page (line 255) | def judge_page(self, xml_compressed_tree):
    method judge (line 260) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_12 (line 272) | class SingleTask_Setting_12(SingleTask):
    method judge_page (line 274) | def judge_page(self, xml_compressed_tree):
    method judge (line 279) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_13 (line 291) | class SingleTask_Setting_13(SingleTask):
    method judge_page (line 293) | def judge_page(self, xml_compressed_tree):
    method judge (line 300) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_14 (line 315) | class SingleTask_Setting_14(SingleTask):
    method judge (line 317) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_15 (line 336) | class SingleTask_Setting_15(SingleTask):
    method judge_page (line 338) | def judge_page(self, xml_compressed_tree):
    method judge (line 345) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_16 (line 361) | class SingleTask_Setting_16(SingleTask):
    method judge (line 363) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_17 (line 378) | class SingleTask_Setting_17(SingleTask):
    method judge (line 380) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_18 (line 399) | class SingleTask_Setting_18(SingleTask):
    method judge_page (line 401) | def judge_page(self, xml_compressed_tree):
    method setting_18_ch (line 408) | def setting_18_ch(self, xml_compressed_tree):
    method judge (line 433) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_19 (line 462) | class SingleTask_Setting_19(SingleTask):
    method judge_page (line 464) | def judge_page(self, xml_compressed_tree):
    method judge (line 474) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_20 (line 486) | class SingleTask_Setting_20(SingleTask):
    method judge (line 488) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_21 (line 500) | class SingleTask_Setting_21(SingleTask):
    method judge_page (line 502) | def judge_page(self, xml_compressed_tree):
    method judge (line 510) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_22 (line 516) | class SingleTask_Setting_22(SingleTask):
    method judge (line 518) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_0 (line 540) | class SingleTask_Setting_LLM_0(SingleTask):
    method __init__ (line 541) | def __init__(self, args):
    method judge_page (line 549) | def judge_page(self, line):
    method _get_screenshot_path (line 554) | def _get_screenshot_path(self, line):
    method judge (line 562) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_5 (line 583) | class SingleTask_Setting_LLM_5(SingleTask):
    method __init__ (line 584) | def __init__(self, args):
    method judge_page (line 591) | def judge_page(self, line):
    method _get_screenshot_path (line 596) | def _get_screenshot_path(self, line):
    method judge (line 604) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_7 (line 632) | class SingleTask_Setting_LLM_7(SingleTask):
    method __init__ (line 633) | def __init__(self, args):
    method judge_page (line 640) | def judge_page(self, line):
    method _get_screenshot_path (line 645) | def _get_screenshot_path(self, line):
    method judge (line 653) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_14 (line 697) | class SingleTask_Setting_LLM_14(SingleTask):
    method __init__ (line 698) | def __init__(self, args):
    method judge_page (line 705) | def judge_page(self, line):
    method judge (line 710) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_18 (line 731) | class SingleTask_Setting_LLM_18(SingleTask):
    method __init__ (line 732) | def __init__(self, args):
    method judge_page (line 739) | def judge_page(self, line):
    method _get_screenshot_path (line 744) | def _get_screenshot_path(self, line):
    method judge (line 752) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_1 (line 797) | class SingleTask_Setting_LLM_1(SingleTask):
    method __init__ (line 798) | def __init__(self, args):
    method judge_page (line 805) | def judge_page(self, line):
    method _get_screenshot_path (line 810) | def _get_screenshot_path(self, line):
    method judge (line 818) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_2 (line 846) | class SingleTask_Setting_LLM_2(SingleTask):
    method __init__ (line 847) | def __init__(self, args):
    method judge_page (line 854) | def judge_page(self, line):
    method _get_screenshot_path (line 859) | def _get_screenshot_path(self, line):
    method judge (line 867) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_3 (line 893) | class SingleTask_Setting_LLM_3(SingleTask):
    method __init__ (line 894) | def __init__(self, args):
    method judge_page (line 902) | def judge_page(self, line):
    method _get_screenshot_path (line 907) | def _get_screenshot_path(self, line):
    method judge (line 915) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_4 (line 943) | class SingleTask_Setting_LLM_4(SingleTask):
    method __init__ (line 944) | def __init__(self, args):
    method judge_page (line 951) | def judge_page(self, line):
    method _get_screenshot_path (line 956) | def _get_screenshot_path(self, line):
    method judge (line 964) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_6 (line 990) | class SingleTask_Setting_LLM_6(SingleTask):
    method __init__ (line 991) | def __init__(self, args):
    method judge_page (line 998) | def judge_page(self, line):
    method _get_screenshot_path (line 1003) | def _get_screenshot_path(self, line):
    method judge (line 1011) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_8 (line 1041) | class SingleTask_Setting_LLM_8(SingleTask):
    method __init__ (line 1042) | def __init__(self, args):
    method judge_page (line 1049) | def judge_page(self, line):
    method _get_screenshot_path (line 1054) | def _get_screenshot_path(self, line):
    method judge (line 1062) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_9 (line 1095) | class SingleTask_Setting_LLM_9(SingleTask):
    method __init__ (line 1096) | def __init__(self, args):
    method judge_page (line 1103) | def judge_page(self, line):
    method judge (line 1108) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_10 (line 1129) | class SingleTask_Setting_LLM_10(SingleTask):
    method __init__ (line 1130) | def __init__(self, args):
    method judge_page (line 1137) | def judge_page(self, line):
    method judge (line 1142) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_11 (line 1163) | class SingleTask_Setting_LLM_11(SingleTask):
    method __init__ (line 1164) | def __init__(self, args):
    method judge_page (line 1171) | def judge_page(self, line):
    method _get_screenshot_path (line 1176) | def _get_screenshot_path(self, line):
    method judge (line 1184) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_12 (line 1210) | class SingleTask_Setting_LLM_12(SingleTask):
    method __init__ (line 1211) | def __init__(self, args):
    method judge_page (line 1218) | def judge_page(self, line):
    method _get_screenshot_path (line 1223) | def _get_screenshot_path(self, line):
    method judge (line 1231) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_13 (line 1257) | class SingleTask_Setting_LLM_13(SingleTask):
    method __init__ (line 1258) | def __init__(self, args):
    method judge_page (line 1265) | def judge_page(self, line):
    method _get_screenshot_path (line 1270) | def _get_screenshot_path(self, line):
    method judge (line 1278) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_15 (line 1309) | class SingleTask_Setting_LLM_15(SingleTask):
    method __init__ (line 1310) | def __init__(self, args):
    method judge_page (line 1318) | def judge_page(self, line):
    method _get_screenshot_path (line 1323) | def _get_screenshot_path(self, line):
    method judge (line 1331) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_16 (line 1364) | class SingleTask_Setting_LLM_16(SingleTask):
    method __init__ (line 1365) | def __init__(self, args):
    method judge_page (line 1372) | def judge_page(self, line):
    method judge (line 1377) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_17 (line 1398) | class SingleTask_Setting_LLM_17(SingleTask):
    method __init__ (line 1399) | def __init__(self, args):
    method judge_page (line 1406) | def judge_page(self, line):
    method judge (line 1411) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_19 (line 1432) | class SingleTask_Setting_LLM_19(SingleTask):
    method __init__ (line 1433) | def __init__(self, args):
    method judge_page (line 1440) | def judge_page(self, line):
    method _get_screenshot_path (line 1445) | def _get_screenshot_path(self, line):
    method judge (line 1453) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_20 (line 1481) | class SingleTask_Setting_LLM_20(SingleTask):
    method __init__ (line 1482) | def __init__(self, args):
    method judge_page (line 1489) | def judge_page(self, line):
    method judge (line 1494) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_21 (line 1515) | class SingleTask_Setting_LLM_21(SingleTask):
    method __init__ (line 1516) | def __init__(self, args):
    method judge_page (line 1524) | def judge_page(self, line):
    method _get_screenshot_path (line 1529) | def _get_screenshot_path(self, line):
    method judge (line 1537) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Setting_LLM_22 (line 1563) | class SingleTask_Setting_LLM_22(SingleTask):
    method __init__ (line 1564) | def __init__(self, args):
    method judge_page (line 1571) | def judge_page(self, line):
    method judge (line 1576) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/tiktok/tiktok.py
  class SingleTask_TikTok_LLM_1 (line 10) | class SingleTask_TikTok_LLM_1(SingleTask):
    method __init__ (line 11) | def __init__(self, args):
    method judge_page (line 20) | def judge_page(self, line):
    method _get_screenshot_path (line 25) | def _get_screenshot_path(self, line):
    method judge (line 33) | def judge(self, xml_compressed_tree, line):
  class SingleTask_TikTok_LLM_2 (line 47) | class SingleTask_TikTok_LLM_2(SingleTask):
    method __init__ (line 48) | def __init__(self, args):
    method judge_page (line 56) | def judge_page(self, line):
    method _get_text_content (line 61) | def _get_text_content(self, line):
    method judge (line 66) | def judge(self, xml_compressed_tree, line):
  class SingleTask_TikTok_LLM_3 (line 81) | class SingleTask_TikTok_LLM_3(SingleTask):
    method __init__ (line 82) | def __init__(self, args):
    method judge_page (line 91) | def judge_page(self, line):
    method _get_screenshot_path (line 96) | def _get_screenshot_path(self, line):
    method judge (line 104) | def judge(self, xml_compressed_tree, line):
  class SingleTask_TikTok_LLM_4 (line 119) | class SingleTask_TikTok_LLM_4(SingleTask):
    method __init__ (line 120) | def __init__(self, args):
    method judge_page (line 129) | def judge_page(self, line):
    method _get_text_content (line 134) | def _get_text_content(self, line):
    method judge (line 139) | def judge(self, xml_compressed_tree, line):
  class SingleTask_TikTok_LLM_5 (line 154) | class SingleTask_TikTok_LLM_5(SingleTask):
    method __init__ (line 155) | def __init__(self, args):
    method judge_page (line 164) | def judge_page(self, line):
    method _get_screenshot_path (line 169) | def _get_screenshot_path(self, line):
    method judge (line 177) | def judge(self, xml_compressed_tree, line):
  class SingleTask_TikTok_LLM_6 (line 192) | class SingleTask_TikTok_LLM_6(SingleTask):
    method __init__ (line 193) | def __init__(self, args):
    method judge_page (line 202) | def judge_page(self, line):
    method _get_screenshot_path (line 207) | def _get_screenshot_path(self, line):
    method judge (line 215) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/tasks/zoom/zoom.py
  class SingleTask_Zoom_1 (line 10) | class SingleTask_Zoom_1(SingleTask):
    method judge_page (line 12) | def judge_page(self, xml_compressed_tree):
    method judge (line 17) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Zoom_2 (line 29) | class SingleTask_Zoom_2(SingleTask):
    method judge_page (line 31) | def judge_page(self, xml_compressed_tree):
    method judge (line 36) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Zoom_3 (line 51) | class SingleTask_Zoom_3(SingleTask):
    method judge_page (line 53) | def judge_page(self, xml_compressed_tree):
    method judge (line 58) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Zoom_4 (line 80) | class SingleTask_Zoom_4(SingleTask):
    method judge_page (line 82) | def judge_page(self, xml_compressed_tree):
    method _get_screenshot_path (line 87) | def _get_screenshot_path(self, line):
    method judge (line 95) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Zoom_5 (line 116) | class SingleTask_Zoom_5(SingleTask):
    method judge_page (line 118) | def judge_page(self, xml_compressed_tree):
    method judge (line 124) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Zoom_LLM_1 (line 136) | class SingleTask_Zoom_LLM_1(SingleTask):
    method __init__ (line 137) | def __init__(self, args):
    method judge_page (line 145) | def judge_page(self, line):
    method _get_screenshot_path (line 150) | def _get_screenshot_path(self, line):
    method judge (line 158) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Zoom_LLM_2 (line 187) | class SingleTask_Zoom_LLM_2(SingleTask):
    method __init__ (line 188) | def __init__(self, args):
    method judge_page (line 197) | def judge_page(self, line):
    method _get_screenshot_path (line 202) | def _get_screenshot_path(self, line):
    method judge (line 210) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Zoom_LLM_3 (line 244) | class SingleTask_Zoom_LLM_3(SingleTask):
    method __init__ (line 245) | def __init__(self, args):
    method judge_page (line 255) | def judge_page(self, line):
    method _get_screenshot_path (line 260) | def _get_screenshot_path(self, line):
    method judge (line 268) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Zoom_LLM_4 (line 310) | class SingleTask_Zoom_LLM_4(SingleTask):
    method __init__ (line 311) | def __init__(self, args):
    method judge_page (line 319) | def judge_page(self, line):
    method _get_screenshot_path (line 324) | def _get_screenshot_path(self, line):
    method judge (line 332) | def judge(self, xml_compressed_tree, line):
  class SingleTask_Zoom_LLM_5 (line 363) | class SingleTask_Zoom_LLM_5(SingleTask):
    method __init__ (line 364) | def __init__(self, args):
    method judge_page (line 372) | def judge_page(self, line):
    method _get_screenshot_path (line 377) | def _get_screenshot_path(self, line):
    method judge (line 385) | def judge(self, xml_compressed_tree, line):

FILE: evaluation/utils.py
  function find_matching_subtrees (line 10) | def find_matching_subtrees(tree, search_str):
  function find_subtrees_of_parents_with_key (line 55) | def find_subtrees_of_parents_with_key(tree, search_key):
  function get_avd_serial_number (line 87) | def get_avd_serial_number(avd_name):
  function extract_bounds (line 112) | def extract_bounds(node, path=""):
  function execute_adb (line 124) | def execute_adb(adb_command, type="cmd", output=True, port=None):
  function list_all_devices (line 144) | def list_all_devices(type="cmd", port=None):
  function get_adb_device_name (line 156) | def get_adb_device_name(avd_name=None):
  function find_free_ports (line 167) | def find_free_ports(start_port=6060):
  function clone_avd (line 179) | def clone_avd(src_avd_name, tar_avd_name, android_avd_home):

FILE: generate_result.py
  function find_all_task_files (line 20) | def find_all_task_files(all_task_config_path) -> List[str]:
  function find_all_traces_files (line 32) | def find_all_traces_files(traces_path_fold) -> Dict[str, Dict[str, str]]:
  function evaluate_all_tasks (line 52) | def evaluate_all_tasks(tasks: List[Evaluation_Task]):
  function evaluate_input_dir (line 62) | def evaluate_input_dir(input_dir, task_yamls, create_time, args):
  function calculate_cloud_percentage (line 87) | def calculate_cloud_percentage(output_folder, agent_name, input_folder):
  function output_to_excel (line 161) | def output_to_excel(args):
  function parse_args (line 212) | def parse_args():
  function main (line 226) | def main():

FILE: ios_agent/actions.py
  function _physical_to_logical (line 35) | def _physical_to_logical(x: int, y: int) -> Tuple[int, int]:
  function _logical_to_physical (line 49) | def _logical_to_physical(x: int, y: int) -> Tuple[int, int]:
  function _get_wda_session_url (line 63) | def _get_wda_session_url(wda_url: str, session_id: Optional[str], endpoi...
  class IOSActionHandler (line 72) | class IOSActionHandler:
    method __init__ (line 75) | def __init__(
    method tap (line 83) | def tap(self, x: int, y: int, delay: float = 1.0) -> bool:
    method double_tap (line 112) | def double_tap(self, x: int, y: int, delay: float = 1.0) -> bool:
    method long_press (line 145) | def long_press(self, x: int, y: int, duration: float = 3.0, delay: flo...
    method swipe (line 175) | def swipe(
    method back (line 210) | def back(self, delay: float = 1.0) -> bool:
    method home (line 245) | def home(self, delay: float = 1.0) -> bool:
    method launch_app (line 257) | def launch_app(self, app_name: str, delay: float = 1.0) -> bool:
    method type_text (line 278) | def type_text(self, text: str, frequency: int = 60) -> bool:
    method clear_text (line 293) | def clear_text(self) -> bool:
    method hide_keyboard (line 315) | def hide_keyboard(self) -> bool:
    method get_current_app (line 326) | def get_current_app(self) -> str:
    method get_screen_size (line 351) | def get_screen_size(self) -> tuple[int, int]:

FILE: ios_agent/application/mail/pipeline.py
  class MailConfig (line 30) | class MailConfig:
    method __init__ (line 33) | def __init__(self, task_dir=None, screenshot_dir=None):
  function _set_single_step_instruction (line 41) | def _set_single_step_instruction(task_agent: IOSTask, instruction: str, ...
  function create_mail_pipeline_overview (line 72) | def create_mail_pipeline_overview() -> str:
  function build_step_instructions (line 81) | def build_step_instructions():
  function _get_active_bundle_id (line 118) | def _get_active_bundle_id(wda_url: str) -> Optional[str]:
  function _is_mail_bundle (line 138) | def _is_mail_bundle(bundle_id: Optional[str]) -> bool:
  function main (line 146) | def main():

FILE: ios_agent/application/mail/rag_system.py
  class MailScreenshotAnalyzer (line 27) | class MailScreenshotAnalyzer:
    method __init__ (line 30) | def __init__(self, agent):
    method analyze_email_screenshot (line 33) | def analyze_email_screenshot(self, screenshot_path: str) -> Dict[str, ...
    method _parse_response (line 113) | def _parse_response(self, response: str) -> Dict[str, Any]:
  class MailRAGSystem (line 147) | class MailRAGSystem:
    method __init__ (line 150) | def __init__(self, agent, screenshot_dir: str):
    method find_email_screenshots (line 156) | def find_email_screenshots(self) -> List[str]:
    method analyze_screenshots (line 182) | def analyze_screenshots(self, max_screenshots: Optional[int] = None):
    method _is_email_content (line 205) | def _is_email_content(self, email_info: Dict[str, Any]) -> bool:
    method remove_duplicates (line 227) | def remove_duplicates(self):
    method generate_report (line 277) | def generate_report(self) -> str:
    method save_report (line 392) | def save_report(self, output_path: str):
    method save_json_data (line 399) | def save_json_data(self, output_path: str):
  function main (line 406) | def main():

FILE: ios_agent/connection.py
  class ConnectionType (line 10) | class ConnectionType(Enum):
  class DeviceInfo (line 17) | class DeviceInfo:
  class IOSConnection (line 27) | class IOSConnection:
    method __init__ (line 36) | def __init__(self, wda_url: str = "http://localhost:8100"):
    method list_devices (line 47) | def list_devices(self) -> list[DeviceInfo]:
    method _get_device_details (line 98) | def _get_device_details(self, udid: str) -> dict[str, str]:
    method is_connected (line 127) | def is_connected(self, device_id: Optional[str] = None) -> bool:
    method is_wda_ready (line 136) | def is_wda_ready(self, timeout: int = 2) -> bool:
    method start_wda_session (line 150) | def start_wda_session(self) -> tuple[bool, str]:
    method get_wda_status (line 178) | def get_wda_status(self) -> dict:

FILE: ios_agent/controller.py
  class IOSController (line 12) | class IOSController:
    method __init__ (line 20) | def __init__(self, wda_url: str = "http://localhost:8100", session_id:...
    method get_device_size (line 38) | def get_device_size(self) -> Tuple[int, int]:
    method get_current_activity (line 42) | def get_current_activity(self) -> str:
    method get_current_app (line 51) | def get_current_app(self) -> str:
    method tap (line 55) | def tap(self, x: int, y: int) -> bool:
    method text (line 59) | def text(self, input_str: str) -> bool:
    method long_press (line 68) | def long_press(self, x: int, y: int, duration: int = 3000) -> bool:
    method swipe (line 79) | def swipe(self, x: int, y: int, direction: str, dist: str = "medium", ...
    method back (line 122) | def back(self) -> bool:
    method home (line 126) | def home(self) -> bool:
    method enter (line 130) | def enter(self) -> bool:
    method launch_app (line 134) | def launch_app(self, app_name: str) -> bool:
    method save_screenshot (line 138) | def save_screenshot(self, file_path: str) -> bool:
    method get_screenshot (line 143) | def get_screenshot(self) -> Screenshot:
    method get_xml (line 147) | def get_xml(self, prefix: str = "", save_dir: str = "") -> str:

FILE: ios_agent/executor.py
  class IOSExecutor (line 11) | class IOSExecutor:
    method __init__ (line 19) | def __init__(self, wda_url: str = "http://localhost:8100", session_id:...
    method get_screenshot (line 36) | def get_screenshot(self) -> Screenshot:
    method tap (line 44) | def tap(self, x: int, y: int) -> dict:
    method text (line 58) | def text(self, input_str: str) -> dict:
    method type (line 83) | def type(self, input_str: str) -> dict:
    method long_press (line 87) | def long_press(self, x: int, y: int) -> dict:
    method swipe (line 101) | def swipe(self, x: int, y: int, direction: str, dist: str = "medium") ...
    method back (line 164) | def back(self) -> dict:
    method home (line 178) | def home(self) -> dict:
    method wait (line 192) | def wait(self, interval: int = 5) -> dict:
    method enter (line 208) | def enter(self) -> dict:
    method launch (line 224) | def launch(self, app_name: str) -> dict:
    method finish (line 238) | def finish(self, message: Optional[str] = None) -> dict:
    method get_current_app (line 252) | def get_current_app(self) -> str:
    method get_screen_size (line 256) | def get_screen_size(self) -> tuple[int, int]:
    method set_elem_list (line 260) | def set_elem_list(self, xml_path_or_string: str):
    method tap_by_index (line 281) | def tap_by_index(self, index: int) -> dict:
    method long_press_by_index (line 317) | def long_press_by_index(self, index: int) -> dict:
    method swipe_by_index (line 340) | def swipe_by_index(self, index: int, direction: str, dist: str = "medi...
    method __call__ (line 365) | def __call__(self, code_snippet: str):
    method do (line 447) | def do(self, action=None, element=None, **kwargs):
    method update_screenshot (line 540) | def update_screenshot(self, prefix=None, suffix=None):

FILE: ios_agent/hierarchy.py
  class IOSElement (line 9) | class IOSElement:
  function get_page_source (line 20) | def get_page_source(
  function parse_bounds (line 189) | def parse_bounds(bounds_str: str) -> Optional[Tuple[Tuple[int, int], Tup...
  function get_element_bounds (line 232) | def get_element_bounds(element: ET.Element) -> Optional[Tuple[Tuple[int,...
  function get_element_id (line 272) | def get_element_id(element: ET.Element) -> str:
  function is_interactive_element (line 304) | def is_interactive_element(element: ET.Element) -> bool:
  function traverse_ios_tree (line 371) | def traverse_ios_tree(
  function get_ios_elements (line 454) | def get_ios_elements(xml_string: str) -> List[IOSElement]:

FILE: ios_agent/labeling.py
  function _get_scale_factor (line 22) | def _get_scale_factor(img_path: str) -> float:
  function draw_bbox_multi_ios (line 63) | def draw_bbox_multi_ios(

FILE: ios_agent/recorder.py
  class IOSRecorder (line 18) | class IOSRecorder:
    method __init__ (line 26) | def __init__(self, id: str, instruction: str, page_executor, config=No...
    method update_before (line 69) | def update_before(self, controller, need_screenshot: bool = False, nee...
    method update_after (line 258) | def update_after(self, exe_res, response: str):
    method update_after_cot (line 276) | def update_after_cot(self, exe_res, response: str, prompt_his: Optiona...
    method get_latest_xml (line 308) | def get_latest_xml(self) -> str:
    method _save_trace (line 328) | def _save_trace(self):

FILE: ios_agent/run_ios_agent.py
  class IOSConfig (line 30) | class IOSConfig:
    method __init__ (line 32) | def __init__(self, task_dir=None, screenshot_dir=None):
  function main (line 37) | def main():

FILE: ios_agent/screenshot.py
  class Screenshot (line 16) | class Screenshot:
  function get_screenshot (line 24) | def get_screenshot(
  function _get_screenshot_wda (line 56) | def _get_screenshot_wda(
  function _get_screenshot_idevice (line 90) | def _get_screenshot_idevice(
  function _create_fallback_screenshot (line 134) | def _create_fallback_screenshot(is_sensitive: bool) -> Screenshot:
  function save_screenshot (line 152) | def save_screenshot(screenshot: Screenshot, file_path: str) -> bool:
  function get_screenshot_png (line 164) | def get_screenshot_png(

FILE: ios_agent/task.py
  class IOSTask (line 16) | class IOSTask:
    method __init__ (line 24) | def __init__(self, instruction: str, controller, page_executor, agent,...
    method set_system_prompt (line 44) | def set_system_prompt(self, instruction: str):
    method run_step (line 55) | def run_step(self, round_count: int):

FILE: model_training/R1-V/src/distill_r1/create_hf_dataset.py
  function create_r1_train_dataset (line 11) | def create_r1_train_dataset(
  function create_val_dataset (line 59) | def create_val_dataset(

FILE: model_training/R1-V/src/distill_r1/filter_r1.py
  function extract_answer_from_query (line 7) | def extract_answer_from_query(query_results: str) -> str | None:
  function validate_qa_pairs (line 80) | def validate_qa_pairs(input_file: str, output_dir: str, verbose: bool = ...

FILE: model_training/R1-V/src/distill_r1/query_r1.py
  function format_query (line 21) | def format_query(qa_dict: Dict, v2=False) -> str:
  function write_to_jsonl (line 32) | def write_to_jsonl(result: Dict, filename: str):
  function query_r1 (line 38) | def query_r1(qa_pair: Dict, output_file: str, model: str = "deepseek-ai/...
  function process_qa_pairs_parallel (line 69) | def process_qa_pairs_parallel(qa_pairs: List[Dict], output_file: str, ma...

FILE: model_training/R1-V/src/eval/test_qwen2vl_counting_superclevr.py
  function extract_number_answer (line 88) | def extract_number_answer(output_str):

FILE: model_training/R1-V/src/eval/test_qwen2vl_geoqa_multigpu.py
  function get_eval_config (line 16) | def get_eval_config():
  function prepare_test_messages (line 30) | def prepare_test_messages(testset_path):
  function init_model (line 58) | def init_model(model_path, gpu_id):
  function answer_a_batch_question_qwen (line 72) | def answer_a_batch_question_qwen(batch_messages, model, processor):
  function infer_on_single_gpu (line 94) | def infer_on_single_gpu(model_path, device_id, chunk_of_tested_messages,...
  function multi_gpu_inference (line 112) | def multi_gpu_inference(prompts, gpu_ids, model_path, batch_size):
  function compute_metrics (line 145) | def compute_metrics(testset_data, all_predicts):

FILE: model_training/R1-V/src/r1-v/local_scripts/create_vision_cot_data.py
  function get_image_data_url (line 47) | def get_image_data_url(image_input):
  function gpt4o_query (line 70) | def gpt4o_query(image, prompt, max_retries=5, initial_delay=3):
  function process_single_item (line 120) | def process_single_item(example):
  function main (line 136) | def main():

FILE: model_training/R1-V/src/r1-v/local_scripts/prepare_hf_data.py
  function extract_problem_solution (line 32) | def extract_problem_solution(gpt4o_response):
  function load_image_from_path (line 58) | def load_image_from_path(image_path):
  function process_raw_data (line 67) | def process_raw_data(raw_data):
  function has_empty_tags (line 132) | def has_empty_tags(text):
  function has_answer_pattern (line 138) | def has_answer_pattern(text):
  function has_valid_image_size (line 144) | def has_valid_image_size(example): # for Qwen2-VL-2B's processor require...

FILE: model_training/R1-V/src/r1-v/setup.py
  function deps_list (line 80) | def deps_list(*pkgs):

FILE: model_training/R1-V/src/r1-v/src/open_r1/evaluate.py
  function prompt_fn (line 37) | def prompt_fn(line, task_name: str = None):

FILE: model_training/R1-V/src/r1-v/src/open_r1/generate.py
  function build_distilabel_pipeline (line 22) | def build_distilabel_pipeline(

FILE: model_training/R1-V/src/r1-v/src/open_r1/grpo.py
  class GRPOScriptArguments (line 30) | class GRPOScriptArguments(ScriptArguments):
  function accuracy_reward (line 53) | def accuracy_reward(completions, solution, **kwargs):
  function format_reward (line 96) | def format_reward(completions, **kwargs):
  function main (line 117) | def main(script_args, training_args, model_args):

FILE: model_training/R1-V/src/r1-v/src/open_r1/grpo_agent.py
  function extract_function_call (line 37) | def extract_function_call(text):
  function extract_state_assessment (line 44) | def extract_state_assessment(text):
  function extract_thinking (line 51) | def extract_thinking(text):
  function calculate_semantic_similarity (line 58) | def calculate_semantic_similarity(text1, text2):
  class GRPOScriptArguments (line 67) | class GRPOScriptArguments(ScriptArguments):
  function accuracy_reward (line 90) | def accuracy_reward(completions, solution, **kwargs):
  function format_reward (line 125) | def format_reward(completions, **kwargs):
  function main (line 175) | def main(script_args, training_args, model_args):

FILE: model_training/R1-V/src/r1-v/src/open_r1/sft.py
  class SFTConfig (line 66) | class SFTConfig(trl.SFTConfig):
  function convert_example (line 93) | def convert_example(example):
  function collate_fn (line 144) | def collate_fn(examples):
  function main (line 168) | def main(script_args, training_args, model_args):

FILE: model_training/R1-V/src/r1-v/src/open_r1/trainer/grpo_trainer.py
  class Qwen2VLGRPOTrainer (line 63) | class Qwen2VLGRPOTrainer(Trainer):
    method __init__ (line 147) | def __init__(
    method _set_signature_columns_if_needed (line 327) | def _set_signature_columns_if_needed(self):
    method _get_per_token_logps (line 337) | def _get_per_token_logps(self, model, input_ids, attention_mask, pixel...
    method _prepare_inputs (line 352) | def _prepare_inputs(self, inputs: dict[str, Union[torch.Tensor, Any]])...
    method compute_loss (line 355) | def compute_loss(self, model, inputs, return_outputs=False, num_items_...
    method log (line 490) | def log(self, logs: dict[str, float], start_time: Optional[float] = No...
    method create_model_card (line 499) | def create_model_card(

FILE: model_training/R1-V/src/r1-v/src/open_r1/trainer/vllm_grpo_trainer.py
  class RepeatRandomSampler (line 82) | class RepeatRandomSampler(Sampler):
    method __init__ (line 100) | def __init__(self, data_source, repeat_count: int):
    method __iter__ (line 105) | def __iter__(self):
    method __len__ (line 113) | def __len__(self):
  class Qwen2VLGRPOVLLMTrainer (line 117) | class Qwen2VLGRPOVLLMTrainer(Trainer):
    method __init__ (line 118) | def __init__(
    method _set_signature_columns_if_needed (line 468) | def _set_signature_columns_if_needed(self):
    method _get_train_sampler (line 477) | def _get_train_sampler(self):
    method _get_per_token_logps (line 481) | def _get_per_token_logps(
    method _prepare_inputs (line 517) | def _prepare_inputs(
    method compute_loss (line 761) | def compute_loss(
    method log (line 826) | def log(self, logs: dict[str, float], start_time: Optional[float] = No...

FILE: model_training/R1-V/src/r1-v/src/open_r1/trainer/vllm_grpo_trainer_modified.py
  class Qwen2VLGRPOVLLMTrainerModified (line 81) | class Qwen2VLGRPOVLLMTrainerModified(Trainer):
    method __init__ (line 82) | def __init__(
    method _set_signature_columns_if_needed (line 380) | def _set_signature_columns_if_needed(self):
    method _get_per_token_logps (line 389) | def _get_per_token_logps(
    method _prepare_inputs (line 425) | def _prepare_inputs(
    method compute_loss (line 712) | def compute_loss(
    method log (line 765) | def log(self, logs: dict[str, float], start_time: Optional[float] = No...

FILE: page_executor/simple_vision_executor.py
  class AndroidElement (line 7) | class AndroidElement:
    method __init__ (line 8) | def __init__(self, uid, bbox, attrib):
    method __print__ (line 13) | def __print__(self):
  function get_id_from_element (line 19) | def get_id_from_element(elem):
  function traverse_tree (line 34) | def traverse_tree(xml_path, elem_list, attrib, add_index=False):
  class VisionExecutor (line 70) | class VisionExecutor(TextOnlyExecutor):
    method __init__ (line 71) | def __init__(self, controller, config):
    method set_elem_list (line 90) | def set_elem_list(self, xml_path):
    method tap (line 114) | def tap(self, index):
    method text (line 121) | def text(self, input_str):
    method type (line 125) | def type(self, input_str):
    method long_press (line 129) | def long_press(self, index):
    method swipe (line 135) | def swipe(self, index, direction, dist):
    method back (line 142) | def back(self):
    method home (line 146) | def home(self):
    method wait (line 150) | def wait(self, interval=5):
    method enter (line 156) | def enter(self):
    method launch (line 160) | def launch(self, app_name):
    method finish (line 164) | def finish(self, message=None):

FILE: page_executor/text_executor.py
  function remove_leading_zeros_in_string (line 11) | def remove_leading_zeros_in_string(s):
  class TextOnlyExecutor (line 15) | class TextOnlyExecutor:
    method __init__ (line 16) | def __init__(self, controller, config):
    method __get_current_status__ (line 36) | def __get_current_status__(self):
    method modify_relative_bbox (line 44) | def modify_relative_bbox(self, relative_bbox):
    method __call__ (line 52) | def __call__(self, code_snippet):
    method __get_class_methods__ (line 71) | def __get_class_methods__(self, include_dunder=False, exclude_inherite...
    method update_screenshot (line 90) | def update_screenshot(self, prefix=None, suffix=None):
    method do (line 102) | def do(self, action=None, element=None, **kwargs):
    method get_relative_bbox_center (line 132) | def get_relative_bbox_center(self, instruction, screenshot):
    method tap (line 150) | def tap(self, element):
    method long_press (line 161) | def long_press(self, element):
    method swipe (line 172) | def swipe(self, element=None, **kwargs):
    method type (line 191) | def type(self, **kwargs):
    method press_enter (line 199) | def press_enter(self):
    method press_back (line 203) | def press_back(self):
    method press_home (line 207) | def press_home(self):
    method finish (line 211) | def finish(self, message=None):
    method wait (line 215) | def wait(self):
    method launch (line 219) | def launch(self, **kwargs):

FILE: page_executor/utils.py
  function _add_text (line 7) | def _add_text(instruction, image):
  function plot_bbox (line 35) | def plot_bbox(bbox, screenshot, instruction=None):
  function call_dino (line 46) | def call_dino(instruction, screenshot_path):
  function get_relative_bbox_center (line 53) | def get_relative_bbox_center(page, instruction, screenshot):

FILE: prepare_data/rl/convert_to_hf_vl.py
  function show_image_info (line 12) | def show_image_info(image, title):
  function convert_json_to_hf_dataset (line 19) | def convert_json_to_hf_dataset(json_file_path, output_dir, test_size=0.0...

FILE: prepare_data/visual_model_data/data_maker.py
  function image_to_base64 (line 254) | def image_to_base64(image_path):
  class Agent (line 258) | class Agent:
    method act (line 261) | def act(self, messages: List[Dict[str, Any]]) -> str:
    method prompt_to_message (line 264) | def prompt_to_message(self, prompt, images):
    method system_prompt (line 267) | def system_prompt(self, instruction) -> str:
  class VisualInterfaceAgent (line 270) | class VisualInterfaceAgent(Agent):
    method __init__ (line 271) | def __init__(
    method act (line 292) | def act(self, messages: List[Dict[str, Any]]) -> str:
    method prompt_to_message (line 321) | def prompt_to_message(self, image, content_text):
  class GPTAgent (line 343) | class GPTAgent(Agent):
    method __init__ (line 344) | def __init__(
    method act (line 365) | def act(self, messages: List[Dict[str, Any]]) -> str:
    method prompt_to_message (line 381) | def prompt_to_message(self, content):
  function extract_function_call (line 389) | def extract_function_call(text):
  function extract_state_assessment (line 396) | def extract_state_assessment(text):
  function extract_reasoning (line 403) | def extract_reasoning(text):
  function parse_interaction_history (line 414) | def parse_interaction_history(history):
  function list_sorted_files_in_directory (line 428) | def list_sorted_files_in_directory(directory):

FILE: prepare_data/visual_model_data/sft_data_maker.py
  function extract_function_call (line 5) | def extract_function_call(text):
  function extract_state_assessment (line 12) | def extract_state_assessment(text):
  function extract_state_assessment_full (line 19) | def extract_state_assessment_full(text):
  function extract_thinking (line 26) | def extract_thinking(text):
  function extract_function_call_detail (line 33) | def extract_function_call_detail(text):
  function print_image_size (line 40) | def print_image_size(image_path):
  function convert_to_alpaca (line 93) | def convert_to_alpaca(data):

FILE: recorder/json_recoder.py
  function get_compressed_xml (line 11) | def get_compressed_xml(xml_path, type="plain_text", version="v1"):
  class JSONRecorder (line 28) | class JSONRecorder:
    method __init__ (line 29) | def __init__(self, id, instruction, page_executor, config):
    method update_response_deprecated (line 56) | def update_response_deprecated(self, controller, response=None, prompt...
    method test_per_step (line 93) | def test_per_step(self, step, controller):
    method update_before (line 104) | def update_before(self, controller, prompt="** XML **", need_screensho...
    method dectect_auto_stop (line 156) | def dectect_auto_stop(self):
    method get_latest_xml (line 170) | def get_latest_xml(self):
    method get_latest_xml_tree (line 185) | def get_latest_xml_tree(self):
    method update_execution (line 196) | def update_execution(self, exe_res):
    method update_after_cot (line 203) | def update_after_cot(self, exe_res, rep, ui_text, action, cloud_status...

FILE: templates/android_screenshot_template.py
  function get_template_prompt (line 863) | def get_template_prompt(prompt, app):

FILE: templates/packages.py
  function find_closest (line 58) | def find_closest(input_str, dict):
  function find_package (line 82) | def find_package(input_str: str) -> str:
  function find_app (line 86) | def find_app(input_str: str) -> str:

FILE: tools/check_result_multiprocess.py
  function draw_cross_on_image (line 16) | def draw_cross_on_image(img, coordinates):
  function draw_arrow_on_image (line 26) | def draw_arrow_on_image(img, start, end):
  function create_text_image (line 40) | def create_text_image(text, base_image, font_size=24, font_name='Songti ...
  function merge_text (line 70) | def merge_text(img, text_image, position=(0, 0)):
  function merge_text_up (line 84) | def merge_text_up(img, text_image, position=(0, 0)):
  function merge_images (line 108) | def merge_images(images):
  function make_merge_pic (line 147) | def make_merge_pic(log_path, save_path=None):
  function single_worker (line 246) | def single_worker(all_log_path, log, save_path):
  function check_all_log (line 256) | def check_all_log(all_log_path, save_path=None):

FILE: tools/modify_mobile_to_avd.py
  function update_device_ini (line 5) | def update_device_ini(avd_dir, device_name):
  function update_config_files (line 18) | def update_config_files(avd_dir, device_name, sdk_dir):
  function main (line 37) | def main(avd_dir, sdk_dir, device_name):

FILE: tools/modify_mobile_to_docker.py
  function update_device_ini (line 6) | def update_device_ini(avd_dir, device_name, save_dir):
  function update_config_files (line 22) | def update_config_files(avd_dir, device_name, save_dir):
  function main (line 45) | def main(avd_dir, device_name, save_dir):

FILE: utils_mobile/and_controller.py
  class AndroidController (line 15) | class AndroidController:
    method __init__ (line 16) | def __init__(self, device, type="cmd", instance=None):
    method execute_adb (line 32) | def execute_adb(self, adb_command, type="cmd", output=True):
    method get_device_size (line 54) | def get_device_size(self):
    method get_screenshot (line 69) | def get_screenshot(self, prefix, save_dir):
    method save_screenshot (line 83) | def save_screenshot(self, save_path):
    method get_xml (line 96) | def get_xml(self, prefix, save_dir):
    method get_ac_xml (line 129) | def get_ac_xml(self, prefix, save_dir):
    method get_current_activity (line 154) | def get_current_activity(self):
    method get_current_app (line 162) | def get_current_app(self):
    method back (line 167) | def back(self):
    method enter (line 172) | def enter(self):
    method home (line 177) | def home(self):
    method tap (line 182) | def tap(self, x, y):
    method text (line 187) | def text(self, input_str):
    method long_press (line 208) | def long_press(self, x, y, duration=1000):
    method kill_package (line 213) | def kill_package(self, package_name):
    method swipe (line 217) | def swipe(self, x, y, direction, dist: Union[str, int] = "medium", qui...
    method swipe_precise (line 245) | def swipe_precise(self, start, end, duration=400):
    method launch_app (line 252) | def launch_app(self, package_name):
    method start_screen_record (line 257) | def start_screen_record(self, prefix):
    method launch (line 262) | def launch(self, package_name):
    method run_command (line 266) | def run_command(self, command):
    method check_ac_survive (line 270) | def check_ac_survive(self):

FILE: utils_mobile/specialCheck.py
  function bounds_to_coords (line 6) | def bounds_to_coords(bounds_string):
  function coords_to_bounds (line 12) | def coords_to_bounds(bounds):
  function check_valid_bounds (line 16) | def check_valid_bounds(bounds):
  function check_point_containing (line 23) | def check_point_containing(bounds, x, y, window, threshold=0):
  function check_bounds_containing (line 33) | def check_bounds_containing(bounds_contained, bounds_containing):
  function check_bounds_intersection (line 43) | def check_bounds_intersection(bounds1, bounds2):
  function get_bounds_area (line 51) | def get_bounds_area(bounds):
  function get_bounds_center (line 56) | def get_bounds_center(bounds):
  function calculate_point_distance (line 61) | def calculate_point_distance(x1, y1, x2, y2):
  function compare_bounds_area (line 66) | def compare_bounds_area(bounds1, bounds2):
  function compare_y_in_bounds (line 75) | def compare_y_in_bounds(bounds1, bounds2):
  class MiniMapSpecialCheck (line 87) | class MiniMapSpecialCheck:
    method __init__ (line 88) | def __init__(self, xml_string, root):
    method check (line 92) | def check(self):
    method check_page (line 106) | def check_page(self):
    method get_filter_base_node (line 137) | def get_filter_base_node(self, node, page_type):
    method check_filter (line 174) | def check_filter(self, page_type):
    method get_route_base_node (line 199) | def get_route_base_node(self, node, page_type):
    method check_route (line 223) | def check_route(self, page_type):
    method get_search_result_base_node (line 241) | def get_search_result_base_node(self, node, page_type):
    method check_search_result (line 265) | def check_search_result(self, page_type):
  class WeiXinSpecialCheck (line 287) | class WeiXinSpecialCheck:
    method __init__ (line 288) | def __init__(self, xml_string, root):
    method check (line 292) | def check(self):
    method check_page (line 305) | def check_page(self):
    method check_moments_icons (line 325) | def check_moments_icons(self, page_type):
    method get_search_base_node (line 336) | def get_search_base_node(self, node, page_type):
    method check_search (line 360) | def check_search(self, page_type):
    method get_menu_base_node (line 378) | def get_menu_base_node(self, node, page_type):
    method check_menu (line 396) | def check_menu(self, page_type):
  class MeituanSpecialCheck (line 432) | class MeituanSpecialCheck:
    method __init__ (line 433) | def __init__(self, xml_string, root):
    method check (line 437) | def check(self):
    method child_index (line 451) | def child_index(self, parent, node):
    method remove_children_overlap_with_bounds (line 458) | def remove_children_overlap_with_bounds(self, node, overlap_bounds, cu...
    method remove_overlap (line 469) | def remove_overlap(self):
    method check_page (line 505) | def check_page(self):
    method get_home_base_node (line 531) | def get_home_base_node(self, node, page_type):
    method check_home (line 555) | def check_home(self, page_type):
    method get_favourite_base_node (line 573) | def get_favourite_base_node(self, node, page_type):
    method check_favourite (line 598) | def check_favourite(self, page_type):
    method get_search_base_node (line 619) | def get_search_base_node(self, node, page_type):
    method check_search (line 644) | def check_search(self, page_type):

FILE: utils_mobile/utils.py
  function get_compressed_xml (line 23) | def get_compressed_xml(xml_path, type="json"):
  function handle_backoff (line 34) | def handle_backoff(details):
  function handle_giveup (line 38) | def handle_giveup(details):
  function get_completion_glm4 (line 49) | def get_completion_glm4(prompt, glm4_key):
  function time_within_ten_secs (line 60) | def time_within_ten_secs(time1, time2):
  function print_with_color (line 80) | def print_with_color(text: str, color=""):
  function draw_bbox_multi (line 102) | def draw_bbox_multi(img_path, output_path, elem_list, record_mode=False,...
  function draw_grid (line 141) | def draw_grid(img_path, output_path):
  function encode_image (line 176) | def encode_image(image_path):
  function start_screen_record (line 185) | def start_screen_record(self, file_name):
  function write_jsonl (line 191) | def write_jsonl(data: List[dict], path: str, append: bool = False):
  function del_file (line 197) | def del_file(path):
  function copy_directory (line 204) | def copy_directory(source_dir, target_dir):
  function remove_punctuation (line 224) | def remove_punctuation(input_string):
  function contains_chinese (line 234) | def contains_chinese(text):
  function split_chunks (line 240) | def split_chunks(lst, num_chunks):
  function glm_call (line 253) | def glm_call(prompt, temperature=0.7, top_p=0.9):
  function get_xml_list (line 283) | def get_xml_list(xml_path):
  function dump_xml (line 295) | def dump_xml(controller, device_name=None, accessiblity=False, task_id="...
  function load_json (line 310) | def load_json(path, encoding='utf-8'):
  function save_json (line 314) | def save_json(obj, path):
  function load_jsonl (line 319) | def load_jsonl(path, encoding='utf-8'):
  function save_jsonl (line 327) | def save_jsonl(obj, path):
  function write_jsonl (line 333) | def write_jsonl(data: List[dict], path: str, append: bool = False):
  function del_file (line 339) | def del_file(path):
  function copy_directory (line 346) | def copy_directory(source_dir, target_dir):
  function remove_punctuation (line 366) | def remove_punctuation(input_string):
  function contains_chinese (line 376) | def contains_chinese(text):
  function split_chunks (line 382) | def split_chunks(lst, num_chunks):
  function glm_call (line 395) | def glm_call(prompt, temperature=0.7, top_p=0.9):
  class OpenAIEngine (line 425) | class OpenAIEngine:
    method __init__ (line 426) | def __init__(
    method generate (line 447) | def generate(self, messages) -> str:
  function extract_bounds (line 466) | def extract_bounds(node, path=""):

FILE: utils_mobile/xml_tool.py
  function get_words_in_certain_length (line 12) | def get_words_in_certain_length(text, length=10):
  class UIXMLTree (line 20) | class UIXMLTree:
    method __init__ (line 21) | def __init__(self):
    method process (line 38) | def process(self, xml_string, app_info=None, level=1, str_type="json",...
    method insert_node (line 71) | def insert_node(self, parent, index, attrib_dict):
    method append_node (line 79) | def append_node(self, parent, attrib_dict):
    method find_smallest_enclosing_node (line 87) | def find_smallest_enclosing_node(self, root, bounds):
    method find_nodes_for_bounds (line 97) | def find_nodes_for_bounds(self, root, bounds_list):
    method dict_to_plain_text (line 131) | def dict_to_plain_text(self, xml_dict, indent=0):
    method should_remove_node (line 141) | def should_remove_node(self, node):
    method child_index (line 168) | def child_index(self, parent, node):
    method merge_attribute_in_one_line (line 175) | def merge_attribute_in_one_line(self, node):
    method get_xpath (line 221) | def get_xpath(self, node):
    method get_attr_count (line 237) | def get_attr_count(self, collection_key, key):
    method inc_attr_count (line 244) | def inc_attr_count(self, collection_key, key):
    method get_xpath_new (line 253) | def get_xpath_new(self, node):
    method get_xpath_all_new (line 293) | def get_xpath_all_new(self, node):
    method mid_order_remove (line 299) | def mid_order_remove(self, node):
    method get_all_bounds (line 364) | def get_all_bounds(self, node, parent_keys):
    method remove_children_overlap_with_bounds (line 380) | def remove_children_overlap_with_bounds(self, node, overlap_bounds, cu...
    method remove_overlap (line 391) | def remove_overlap(self):
    method dump_tree (line 425) | def dump_tree(self):
    method mid_order_reindex (line 429) | def mid_order_reindex(self, node):
    method merge_description (line 476) | def merge_description(self, p_desc, c_desc):
    method can_merge_bounds (line 487) | def can_merge_bounds(self, parent_bounds, child_bounds):
    method mid_order_merge (line 501) | def mid_order_merge(self, node):
    method traverse_dict (line 533) | def traverse_dict(self, _dict):
    method merge_none_act (line 561) | def merge_none_act(self):
    method reindex (line 564) | def reindex(self):
    method special_check (line 568) | def special_check(self):
    method xml_sparse (line 576) | def xml_sparse(self):
    method dump_xpath (line 595) | def dump_xpath(self):
    method dump_name (line 599) | def dump_name(self):
    method get_recycle_nodes (line 603) | def get_recycle_nodes(self, root):
    method same_subtree (line 611) | def same_subtree(self, tree1, tree2):
    method check_unique (line 624) | def check_unique(self, node, node_list):
    method merge_recycle_list (line 630) | def merge_recycle_list(self, recycle_nodes):
    method check_scroll_bottom (line 642) | def check_scroll_bottom(self, tree1, tree2):

FILE: vllm_script/run_server_visual_cot.py
  class OpenAIRequest (line 29) | class OpenAIRequest(BaseModel):
  function chat_completions (line 38) | async def chat_completions(request: OpenAIRequest):
Condensed preview — 200 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (5,214K chars).
[
  {
    "path": ".gitignore",
    "chars": 10,
    "preview": ".DS_Store\n"
  },
  {
    "path": "Communication.md",
    "chars": 234,
    "preview": "We provide QR codes for joining the HKUDS discussion groups on WeChat and Feishu.\n\nYou can join by scanning the QR codes"
  },
  {
    "path": "LICENSE",
    "chars": 1102,
    "preview": "MIT License\n\nCopyright (c) 2024 xuyifan-0731\nCopyright (c) 2025 yangqin-jiang\n\nPermission is hereby granted, free of cha"
  },
  {
    "path": "PhoneClaw/README.md",
    "chars": 20694,
    "preview": "# PhoneClaw — iOS Ralph Loop GUI Agent\n\nAn autonomous iOS GUI automation agent built on the **Ralph Loop** methodology: "
  },
  {
    "path": "PhoneClaw/__init__.py",
    "chars": 88,
    "preview": "\"\"\"PhoneClaw: iOS GUI Agent based on Ralph Loop (EXECUTE → EVALUATE → FIX → REPEAT).\"\"\"\n"
  },
  {
    "path": "PhoneClaw/actions.py",
    "chars": 17629,
    "preview": "\"\"\"Action execution for iOS devices via WebDriverAgent.\"\"\"\n\nimport time\nfrom typing import Optional, Tuple\n\n# iOS app bu"
  },
  {
    "path": "PhoneClaw/agent.py",
    "chars": 6262,
    "preview": "\"\"\"OpenRouter VLM agent for PhoneClaw.\n\nOpenRouter (https://openrouter.ai) provides unified access to many VLMs\n(GPT-4o,"
  },
  {
    "path": "PhoneClaw/connection.py",
    "chars": 5573,
    "preview": "\"\"\"iOS device connection management via WebDriverAgent.\"\"\"\n\nimport subprocess\nimport time\nfrom dataclasses import datacl"
  },
  {
    "path": "PhoneClaw/controller.py",
    "chars": 5518,
    "preview": "\"\"\"iOS Controller - adapts iOS device control to Android-Lab's controller interface.\"\"\"\n\nimport os\nimport time\nfrom typi"
  },
  {
    "path": "PhoneClaw/embeddings.py",
    "chars": 5413,
    "preview": "\"\"\"Embedding utilities for semantic deduplication in PhoneClaw.\n\nUses an OpenAI-compatible embeddings API to compute den"
  },
  {
    "path": "PhoneClaw/evaluator.py",
    "chars": 4266,
    "preview": "\"\"\"Evaluator for PhoneClaw Ralph Loop.\n\nUses a VLM to determine whether the current screen satisfies a subtask's\nsuccess"
  },
  {
    "path": "PhoneClaw/executor.py",
    "chars": 20063,
    "preview": "\"\"\"iOS executor for PhoneClaw - adapts iOS actions to Android-Lab interface.\"\"\"\n\nimport os\nimport time\nfrom typing impor"
  },
  {
    "path": "PhoneClaw/experience.py",
    "chars": 23420,
    "preview": "\"\"\"Experience log for PhoneClaw — procedural memory.\n\nPersists a JSON file at ~/.PhoneClaw/experience_log.json that accu"
  },
  {
    "path": "PhoneClaw/hierarchy.py",
    "chars": 11650,
    "preview": "\"\"\"iOS UI Hierarchy - get and parse iOS page source for element labeling.\"\"\"\n\nimport xml.etree.ElementTree as ET\nfrom ty"
  },
  {
    "path": "PhoneClaw/keepalive.py",
    "chars": 9576,
    "preview": "\"\"\"Screen keepalive for PhoneClaw.\n\nStrategy\n--------\nPrimary — ``idleTimerDisabled`` (no touch required)\n    WDA expose"
  },
  {
    "path": "PhoneClaw/labeling.py",
    "chars": 5472,
    "preview": "\"\"\"iOS Screenshot Labeling - draw bounding boxes and labels on screenshots.\"\"\"\n\nimport cv2\nimport os\nfrom typing import "
  },
  {
    "path": "PhoneClaw/learn.py",
    "chars": 20591,
    "preview": "\"\"\"Learning mode for PhoneClaw — record human demonstrations and extract lessons.\n\nHow it works\n------------\n1. DemoReco"
  },
  {
    "path": "PhoneClaw/loop.py",
    "chars": 20130,
    "preview": "\"\"\"Ralph Loop orchestrator for PhoneClaw.\n\nImplements the core EXECUTE → EVALUATE → FIX → REPEAT cycle:\n\n  Outer loop: i"
  },
  {
    "path": "PhoneClaw/memory.py",
    "chars": 19559,
    "preview": "\"\"\"User memory / profile for PhoneClaw interactive mode.\n\nPersists a JSON file at PhoneClaw/data/user_profile.json that "
  },
  {
    "path": "PhoneClaw/planner.py",
    "chars": 5022,
    "preview": "\"\"\"Task Planner for PhoneClaw.\n\nUses an LLM to decompose a high-level task description into an ordered list of\natomic su"
  },
  {
    "path": "PhoneClaw/prompts.py",
    "chars": 24111,
    "preview": "\"\"\"Prompts for PhoneClaw Ralph Loop agent.\n\nEight prompt sets:\n  1. PLANNER_PROMPT          - task → structured subtask "
  },
  {
    "path": "PhoneClaw/recorder.py",
    "chars": 9681,
    "preview": "\"\"\"Recorder for PhoneClaw - logs per-step traces including Ralph Loop evaluation results.\"\"\"\n\nimport json\nimport os\nimpo"
  },
  {
    "path": "PhoneClaw/run_phoneclaw.py",
    "chars": 36626,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nPhoneClaw: iOS GUI Agent based on Ralph Loop\n\n--- Single-task mode ---\n\n    export OPENROUTER"
  },
  {
    "path": "PhoneClaw/screenshot.py",
    "chars": 4808,
    "preview": "\"\"\"Screenshot utilities for capturing iOS device screen.\"\"\"\n\nimport base64\nimport os\nimport subprocess\nimport tempfile\ni"
  },
  {
    "path": "PhoneClaw/state.py",
    "chars": 6804,
    "preview": "\"\"\"State persistence for PhoneClaw Ralph Loop.\n\nSaves and restores task progress to the filesystem so that:\n- Tasks can "
  },
  {
    "path": "README.md",
    "chars": 25480,
    "preview": "<div align=\"center\">\n  <picture>\n      <img src=\"./figures/phoneagent_logo.png\" width=\"20%\" style=\"border: none; box-sha"
  },
  {
    "path": "adb_client.py",
    "chars": 4662,
    "preview": "import os\nimport shutil\nimport subprocess\nimport time\nfrom flask import Flask, request, jsonify\n\n\ndef list_all_devices()"
  },
  {
    "path": "agent/__init__.py",
    "chars": 682,
    "preview": "try:\n    from .mllm.claude_model import *\nexcept:\n    print(\"Claude LLM is not available.\")\ntry:\n    from .llm.glm4 impo"
  },
  {
    "path": "agent/llm/glm4.py",
    "chars": 1363,
    "preview": "from zhipuai import ZhipuAI\n\nfrom agent.model import *\n\n\nclass GLM4Agent(OpenAIAgent):\n    def __init__(\n            sel"
  },
  {
    "path": "agent/llm/qwen_llm_model.py",
    "chars": 678,
    "preview": "from agent.model import *\n\n\nclass QwenLLMAgent(OpenAIAgent):\n    def __init__(\n            self,\n            api_key: st"
  },
  {
    "path": "agent/mllm/claude_model.py",
    "chars": 3967,
    "preview": "import http.client\n\nimport anthropic\n\nfrom agent.model import *\n\nclass Claude_official(OpenAIAgent):\n    def __init__(\n "
  },
  {
    "path": "agent/mllm/qwen_model.py",
    "chars": 1960,
    "preview": "from http import HTTPStatus\n\nimport dashscope\n\nfrom agent.model import *\n\n\nclass QwenAgent(OpenAIAgent):\n    def __init_"
  },
  {
    "path": "agent/model.py",
    "chars": 5245,
    "preview": "from typing import List, Dict, Any\n\nimport backoff\nimport requests\nfrom openai import OpenAI\n\nfrom agent.utils import *\n"
  },
  {
    "path": "agent/utils.py",
    "chars": 1235,
    "preview": "import base64\nimport copy\n\n\ndef encode_image(image_path):\n    with open(image_path, \"rb\") as image_file:\n        return "
  },
  {
    "path": "app_data/README.md",
    "chars": 476,
    "preview": "Due to the fact that the AndroidLab image does not support Android Studio on macOS, some app tasks must be configured ma"
  },
  {
    "path": "configs/example_xml_cloud_hyper.yaml",
    "chars": 534,
    "preview": "agent:\n    name: OpenAIAgent\n    args:\n        api_key: \"EMPTY\"\n        api_base: \"http://localhost:8002/v1\"\n        mod"
  },
  {
    "path": "configs/glm-4-plus-mac-XML.yaml",
    "chars": 474,
    "preview": "agent:\n  name: GLM4Agent\n  args:\n    model_key:\n    model_name: \"glm-4-plus\"\n    max_new_tokens: 512\n    temperature: 0."
  },
  {
    "path": "configs/gpt-4o-linux-XML.yaml",
    "chars": 435,
    "preview": "agent:\n  name: OpenAIAgent\n  args:\n    api_key:\n    api_base:\n    model_name: \"gpt-4o-2024-05-13\"\n    max_new_tokens: 51"
  },
  {
    "path": "configs/gpt-4o-mac-SoM.yaml",
    "chars": 476,
    "preview": "agent:\n  name: OpenAIAgent\n  args:\n    api_key:\n    api_base:\n    model_name: \"gpt-4o-2024-05-13\"\n    max_new_tokens: 51"
  },
  {
    "path": "docs/modify_androidlab.md",
    "chars": 6206,
    "preview": "# How to modify the Androidlab\n\n## How to Modify the Backbone Model\n\nThe `Agent` class has been predefined in the `agent"
  },
  {
    "path": "docs/new_apps.md",
    "chars": 6313,
    "preview": "# Mobile App Task Evaluation Overview\n\nThis document introduces evaluation tasks for four mobile applications designed t"
  },
  {
    "path": "docs/prepare_for_linux.md",
    "chars": 3762,
    "preview": "### Prepare Docker on linux(x86_64)\n\n1. Install Docker on your machine. Make sure your machine already supports KVM. You"
  },
  {
    "path": "docs/prepare_for_mac.md",
    "chars": 4626,
    "preview": "### Prepare AVD on mac(arm64)\n\n1. Install Android Studio and ADB on your machine, and start a virtual machine to ensure "
  },
  {
    "path": "eval.py",
    "chars": 3668,
    "preview": "import os\nimport argparse\nimport yaml\n\nfrom agent import get_agent\nfrom evaluation.auto_test import *\nfrom evaluation.pa"
  },
  {
    "path": "evaluation/auto_test.py",
    "chars": 14924,
    "preview": "import datetime\nimport time\nfrom evaluation.configs import TaskConfig\nfrom evaluation.docker_utils import create_docker_"
  },
  {
    "path": "evaluation/config/bluecoins.yaml",
    "chars": 2869,
    "preview": "APP: bluecoins\npackage: com.rammigsoftware.bluecoins\ntasks:\n  # query\n  - task_id: bluecoins_1\n    task: Could you tell "
  },
  {
    "path": "evaluation/config/calendar.yaml",
    "chars": 2418,
    "preview": "APP: calendar\npackage: com.skuld.calendario\ntasks:\n  - task_id: calendar_1\n    task: I want to add an event at 5:00PM to"
  },
  {
    "path": "evaluation/config/cantook.yaml",
    "chars": 1828,
    "preview": "APP: Cantook\npackage: Cantook\ntasks:\n  # query\n  - task_id: cantook_1\n    task: Do I have Pride and Prejudice on my book"
  },
  {
    "path": "evaluation/config/chrome.yaml",
    "chars": 1098,
    "preview": "APP: Chrome\npackage: com.android.chrome\ntasks:\n    - task_id: chrome_1\n      task: Find the address and founding date of"
  },
  {
    "path": "evaluation/config/clock.yaml",
    "chars": 4511,
    "preview": "APP: clock\npackage: com.google.android.deskclock\ntasks:\n  - task_id: clock_1\n    category: Alarm set\n    task: Set an al"
  },
  {
    "path": "evaluation/config/contacts.yaml",
    "chars": 3025,
    "preview": "APP: Contacts\npackage: com.google.android.contacts\ntasks:\n  - task_id: contacts_1\n    category: add contacts' informatio"
  },
  {
    "path": "evaluation/config/gmail.yaml",
    "chars": 1607,
    "preview": "APP: Gmail\npackage: com.android.gmail\ntasks:\n    - task_id: gmail_1\n      task: 'Edit an email addressed to user_test@gm"
  },
  {
    "path": "evaluation/config/map.yaml",
    "chars": 2796,
    "preview": "APP: map.me\npackage: com.mapswithme.maps.pro\ntasks:\n  - task_id: map_1\n    task: Check the walking distance and time bet"
  },
  {
    "path": "evaluation/config/pimusic.yaml",
    "chars": 2003,
    "preview": "APP: Pi Music Player\npackage: com.Project100Pi.themusicplayer\ntasks:\n  # query\n  - task_id: pimusic_1\n    task: Tell me "
  },
  {
    "path": "evaluation/config/reddit.yaml",
    "chars": 821,
    "preview": "APP: Reddit\npackage: com.android.reddit\ntasks:\n    - task_id: reddit_1\n      task: Join the ChatGPT discussion group\n   "
  },
  {
    "path": "evaluation/config/setting.yaml",
    "chars": 4235,
    "preview": "APP: Settings\npackage: com.android.settings\ntasks:\n  - task_id: setting_0\n    category: Network\n    task: Turn on airpla"
  },
  {
    "path": "evaluation/config/tiktok.yaml",
    "chars": 1055,
    "preview": "APP: TikTok\npackage: com.android.tiktok\ntasks:\n    - task_id: tiktok_1\n      task: 'Go to the homepage of \"IShowSpeed\"'\n"
  },
  {
    "path": "evaluation/config/zoom.yaml",
    "chars": 974,
    "preview": "APP: zoom\npackage: us.zoom.videomeetings\ntasks:\n  # query\n  - task_id: zoom_1\n    task: Join meeting 1234567890. (You sh"
  },
  {
    "path": "evaluation/configs.py",
    "chars": 4398,
    "preview": "import importlib\nimport os\nfrom dataclasses import dataclass\nfrom typing import Optional\n\nimport yaml\n\n\nclass AppConfig:"
  },
  {
    "path": "evaluation/definition.py",
    "chars": 6534,
    "preview": "import sys\nimport re\nfrom openai import OpenAI\nfrom zhipuai import ZhipuAI\nfrom agent import *\nfrom utils_mobile.and_con"
  },
  {
    "path": "evaluation/docker_utils.py",
    "chars": 3833,
    "preview": "import json\nimport subprocess\nimport time\n\nimport requests\n\n\ndef run_docker_command(command):\n    full_command = f\"{comm"
  },
  {
    "path": "evaluation/evaluation.py",
    "chars": 14492,
    "preview": "import templates.seeact_screenshot_prompts as SeeActPrompts\nfrom evaluation.definition import *\nfrom evaluation.utils im"
  },
  {
    "path": "evaluation/parallel.py",
    "chars": 1014,
    "preview": "from queue import Queue\nimport concurrent\nfrom evaluation.auto_test import *\n\n\ndef task_done_callback(future, docker_ins"
  },
  {
    "path": "evaluation/task.py",
    "chars": 14462,
    "preview": "from collections import defaultdict\nfrom typing import Generic, TypeVar\nfrom concurrent.futures import ThreadPoolExecuto"
  },
  {
    "path": "evaluation/tasks/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "evaluation/tasks/bluecoins/__init__.py",
    "chars": 762,
    "preview": "from .bluecoins import *\n\nfunction_map = {\n    'bluecoins_1': SingleTask_bluecoins_LLM_1,\n    'bluecoins_2': SingleTask_"
  },
  {
    "path": "evaluation/tasks/bluecoins/bluecoins.py",
    "chars": 52387,
    "preview": "import re\nfrom typing import Dict\nimport base64\nimport requests\nimport os\nfrom typing import Dict, Any, List\nimport json"
  },
  {
    "path": "evaluation/tasks/calendar/__init__.py",
    "chars": 684,
    "preview": "from .calendar import *\n\nfunction_map = {\n    'calendar_1': SingleTask_calendar_LLM_1,\n    'calendar_2': SingleTask_cale"
  },
  {
    "path": "evaluation/tasks/calendar/calendar.py",
    "chars": 41440,
    "preview": "from evaluation.task import *\nfrom evaluation.tasks.llm_evaluator import LLMEvaluator\nimport re\nfrom typing import Dict\n"
  },
  {
    "path": "evaluation/tasks/cantook/__init__.py",
    "chars": 565,
    "preview": "from .cantook import *\n\nfunction_map = {\n    'cantook_1': SingleTask_cantook_LLM_1,\n    'cantook_2': SingleTask_cantook_"
  },
  {
    "path": "evaluation/tasks/cantook/cantook.py",
    "chars": 32127,
    "preview": "from evaluation.task import SingleTask\nfrom evaluation.utils import find_matching_subtrees\nfrom evaluation.tasks.llm_eva"
  },
  {
    "path": "evaluation/tasks/chrome/__init__.py",
    "chars": 329,
    "preview": "from .chrome import *\n\nfunction_map = {\n    'chrome_1': SingleTask_Chrome_LLM_1,\n    'chrome_2': SingleTask_Chrome_LLM_2"
  },
  {
    "path": "evaluation/tasks/chrome/chrome.py",
    "chars": 10528,
    "preview": "from evaluation.task import *\nfrom evaluation.tasks.llm_evaluator import LLMEvaluator\nimport base64\nimport requests\nimpo"
  },
  {
    "path": "evaluation/tasks/clock/__init__.py",
    "chars": 1130,
    "preview": "from .clock import *\n\nfunction_map = {\n    'clock_1': SingleTask_Clock_LLM_1,\n    'clock_2': SingleTask_Clock_LLM_2,\n   "
  },
  {
    "path": "evaluation/tasks/clock/clock.py",
    "chars": 91912,
    "preview": "from evaluation.task import *\nfrom evaluation.tasks.llm_evaluator import LLMEvaluator\n\n\nimport re\nfrom typing import Dic"
  },
  {
    "path": "evaluation/tasks/contacts/__init__.py",
    "chars": 731,
    "preview": "from .contacts import *\n\nfunction_map = {\n    'contacts_1': SingleTask_Contacts_LLM_1,\n    'contacts_2': SingleTask_Cont"
  },
  {
    "path": "evaluation/tasks/contacts/contacts.py",
    "chars": 38254,
    "preview": "from evaluation.task import *\nimport requests\nimport json\nimport base64\nimport traceback\nimport os\nfrom typing import Di"
  },
  {
    "path": "evaluation/tasks/gmail/__init__.py",
    "chars": 314,
    "preview": "from .gmail import *\n\nfunction_map = {\n    'gmail_1': SingleTask_Gmail_LLM_1,\n    'gmail_2': SingleTask_Gmail_LLM_2,\n   "
  },
  {
    "path": "evaluation/tasks/gmail/gmail.py",
    "chars": 11689,
    "preview": "from evaluation.task import *\nimport base64\nimport requests\nimport os\nfrom typing import Dict, Any, List\nimport json\nimp"
  },
  {
    "path": "evaluation/tasks/human_ground_turth/ground_truth_length.json",
    "chars": 2667,
    "preview": "{\n  \"bluecoins_1\": 3,\n  \"bluecoins_10\": 9,\n  \"bluecoins_11\": 8,\n  \"bluecoins_12\": 10,\n  \"bluecoins_13\": 10,\n  \"bluecoins"
  },
  {
    "path": "evaluation/tasks/llm_evaluator.py",
    "chars": 3164,
    "preview": "import base64\nimport requests\nimport os\nfrom typing import Dict, Any\nimport json\nimport traceback\n\n\nclass LLMEvaluator:\n"
  },
  {
    "path": "evaluation/tasks/map_me/__init__.py",
    "chars": 606,
    "preview": "from .map import *\n\nfunction_map = {\n    'map_1': SingleTask_Mapme_LLM_1,\n    'map_2': SingleTask_Mapme_LLM_2,\n    'map_"
  },
  {
    "path": "evaluation/tasks/map_me/map.py",
    "chars": 36271,
    "preview": "from evaluation.task import *\nfrom evaluation.tasks.llm_evaluator import LLMEvaluator\nimport base64\nimport requests\nimpo"
  },
  {
    "path": "evaluation/tasks/pimusic/__init__.py",
    "chars": 565,
    "preview": "from .pimusic import *\n\nfunction_map = {\n    'pimusic_1': SingleTask_pimusic_LLM_1,\n    'pimusic_2': SingleTask_pimusic_"
  },
  {
    "path": "evaluation/tasks/pimusic/pimusic.py",
    "chars": 31872,
    "preview": "import re\nfrom typing import Dict, List\n\nfrom evaluation.task import SingleTask\nfrom evaluation.utils import find_matchi"
  },
  {
    "path": "evaluation/tasks/reddit/__init__.py",
    "chars": 247,
    "preview": "from .reddit import *\n\nfunction_map = {\n    'reddit_1': SingleTask_Reddit_LLM_1,\n    'reddit_2': SingleTask_Reddit_LLM_2"
  },
  {
    "path": "evaluation/tasks/reddit/reddit.py",
    "chars": 8259,
    "preview": "from evaluation.task import *\nfrom evaluation.tasks.llm_evaluator import LLMEvaluator\nimport base64\nimport requests\nimpo"
  },
  {
    "path": "evaluation/tasks/setting/__init__.py",
    "chars": 1058,
    "preview": "from .setting import *\n\nfunction_map = {\n    'setting_0': SingleTask_Setting_LLM_0,\n    'setting_1': SingleTask_Setting_"
  },
  {
    "path": "evaluation/tasks/setting/setting.py",
    "chars": 61599,
    "preview": "from evaluation.task import *\nfrom evaluation.tasks.llm_evaluator import LLMEvaluator\nimport base64\nimport requests\nimpo"
  },
  {
    "path": "evaluation/tasks/tiktok/__init__.py",
    "chars": 288,
    "preview": "from .tiktok import *\n\nfunction_map = {\n    'tiktok_1': SingleTask_TikTok_LLM_1,\n    'tiktok_2': SingleTask_TikTok_LLM_2"
  },
  {
    "path": "evaluation/tasks/tiktok/tiktok.py",
    "chars": 8634,
    "preview": "from evaluation.task import *\nfrom evaluation.tasks.llm_evaluator import LLMEvaluator\nimport base64\nimport requests\nimpo"
  },
  {
    "path": "evaluation/tasks/zoom/__init__.py",
    "chars": 225,
    "preview": "from .zoom import *\n\nfunction_map = {\n    'zoom_1': SingleTask_Zoom_LLM_1,\n    'zoom_2': SingleTask_Zoom_LLM_2,\n    'zoo"
  },
  {
    "path": "evaluation/tasks/zoom/zoom.py",
    "chars": 16766,
    "preview": "from evaluation.task import *\nfrom evaluation.tasks.llm_evaluator import LLMEvaluator\nimport base64\nimport requests\nimpo"
  },
  {
    "path": "evaluation/utils.py",
    "chars": 8808,
    "preview": "import getpass\nimport os\nimport shutil\nimport socket\nimport subprocess\n\nfrom evaluation.docker_utils import execute_adb_"
  },
  {
    "path": "generate_result.py",
    "chars": 10656,
    "preview": "import argparse\nimport concurrent.futures\nimport datetime\nimport os\nimport re\nfrom tqdm import tqdm\nfrom collections imp"
  },
  {
    "path": "ios_agent/README.md",
    "chars": 10174,
    "preview": "# iOS Agent\n\nAn intelligent iOS automation framework that enables GUI agents to interact with iOS applications through W"
  },
  {
    "path": "ios_agent/__init__.py",
    "chars": 463,
    "preview": "\"\"\"iOS Agent for Android-Lab - iOS device automation support.\"\"\"\n\nfrom ios_agent.connection import IOSConnection\nfrom io"
  },
  {
    "path": "ios_agent/actions.py",
    "chars": 13284,
    "preview": "\"\"\"Action execution for iOS devices via WebDriverAgent.\"\"\"\n\nimport time\nfrom typing import Optional, Tuple\n\n# iOS app bu"
  },
  {
    "path": "ios_agent/application/mail/__init__.py",
    "chars": 43,
    "preview": "\"\"\"\nMail application pipeline package.\n\"\"\"\n"
  },
  {
    "path": "ios_agent/application/mail/pipeline.py",
    "chars": 18979,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nMail Pipeline for iOS Agent\n\nTask: Open Mail app, locate the inbox/mail list, and sequentiall"
  },
  {
    "path": "ios_agent/application/mail/rag_system.py",
    "chars": 21082,
    "preview": "#!/usr/bin/env python3\n\"\"\"\nMail Screenshot RAG System\n\nThis system analyzes screenshots saved by the mail agent and gene"
  },
  {
    "path": "ios_agent/application/mail/run_mail_and_rag.sh",
    "chars": 2016,
    "preview": "#!/bin/bash\n\n# Combined launcher: first run Mail GUI agent, then run Mail RAG analysis\n#\n# Usage:\n#   ./run_mail_and_rag"
  },
  {
    "path": "ios_agent/connection.py",
    "chars": 5903,
    "preview": "\"\"\"iOS device connection management via WebDriverAgent.\"\"\"\n\nimport subprocess\nimport time\nfrom dataclasses import datacl"
  },
  {
    "path": "ios_agent/controller.py",
    "chars": 7330,
    "preview": "\"\"\"iOS Controller - adapts iOS device control to Android-Lab's controller interface.\"\"\"\n\nimport os\nimport time\nfrom typi"
  },
  {
    "path": "ios_agent/executor.py",
    "chars": 22128,
    "preview": "\"\"\"iOS executor for Android-Lab - adapts iOS actions to Android-Lab interface.\"\"\"\n\nimport time\nfrom typing import Option"
  },
  {
    "path": "ios_agent/hierarchy.py",
    "chars": 19694,
    "preview": "\"\"\"iOS UI Hierarchy - get and parse iOS page source for element labeling.\"\"\"\n\nimport xml.etree.ElementTree as ET\nfrom ty"
  },
  {
    "path": "ios_agent/labeling.py",
    "chars": 7589,
    "preview": "\"\"\"iOS Screenshot Labeling - draw bounding boxes and labels on screenshots.\"\"\"\n\nimport cv2\nimport os\nfrom typing import "
  },
  {
    "path": "ios_agent/prompts.py",
    "chars": 6964,
    "preview": "\"\"\"iOS-specific prompts for Android-Lab agent.\"\"\"\n\nSYSTEM_PROMPT_IOS_MLLM_DIRECT = '''You are an intelligent agent that "
  },
  {
    "path": "ios_agent/recorder.py",
    "chars": 16125,
    "preview": "\"\"\"iOS Recorder - adapts iOS device recording to Android-Lab's recorder interface.\"\"\"\n\nimport json\nimport os\nimport sys\n"
  },
  {
    "path": "ios_agent/run_ios_agent.py",
    "chars": 7197,
    "preview": "#!/usr/bin/env python3\n\"\"\"\niOS Agent Runner for Android-Lab\n\nThis script uses Android-Lab's framework to run iOS automat"
  },
  {
    "path": "ios_agent/screenshot.py",
    "chars": 5086,
    "preview": "\"\"\"Screenshot utilities for capturing iOS device screen.\"\"\"\n\nimport base64\nimport os\nimport subprocess\nimport tempfile\ni"
  },
  {
    "path": "ios_agent/task.py",
    "chars": 4939,
    "preview": "\"\"\"iOS Task - adapts iOS device automation to Android-Lab's task framework.\"\"\"\n\nimport re\nimport sys\nimport time\nfrom pa"
  },
  {
    "path": "model_training/R1-V/src/distill_r1/README.md",
    "chars": 3815,
    "preview": "# R1 Reasoning Dataset Generation \n\n\n\n## QA Pairs Generation\n\nWe create a `scene description` by combining the objects ("
  },
  {
    "path": "model_training/R1-V/src/distill_r1/create_hf_dataset.py",
    "chars": 3916,
    "preview": "import json\nimport os\nimport random\nfrom datasets import load_dataset\nfrom tqdm import tqdm\n\nrandom.seed(1234)\nVAL_NUM ="
  },
  {
    "path": "model_training/R1-V/src/distill_r1/filter_r1.py",
    "chars": 5888,
    "preview": "import json\nimport re\nfrom pathlib import Path\n\n\n\ndef extract_answer_from_query(query_results: str) -> str | None:\n    \""
  },
  {
    "path": "model_training/R1-V/src/distill_r1/generate_scene_qa_pairs.ipynb",
    "chars": 28818,
    "preview": "{\n \"cells\": [\n  {\n   \"cell_type\": \"code\",\n   \"execution_count\": 1,\n   \"id\": \"3a704ea6-2e61-4aaa-97aa-416579c9bc13\",\n   \""
  },
  {
    "path": "model_training/R1-V/src/distill_r1/prompt.py",
    "chars": 1253,
    "preview": "R1_SYS_PROMPT = \"\"\"You are DeepSeek-R1, an AI assistant created exclusively by the Chinese Company DeepSeek. You'll prov"
  },
  {
    "path": "model_training/R1-V/src/distill_r1/query_r1.py",
    "chars": 4287,
    "preview": "import json\nimport random\nimport os \nfrom openai import OpenAI\nfrom tqdm import tqdm\nimport concurrent.futures\nfrom typi"
  },
  {
    "path": "model_training/R1-V/src/eval/logs/counting_results_superclevr_200_qwen2vl_2b_instruct_grpo100_legacy.json",
    "chars": 62351,
    "preview": "{\n  \"accuracy\": 82.5,\n  \"results\": [\n    {\n      \"question\": {\n        \"image_path\": \"/home/chenliang/images/superCLEVR_"
  },
  {
    "path": "model_training/R1-V/src/eval/logs/counting_results_superclevr_200_qwen2vl_2b_instruct_legacy.json",
    "chars": 92106,
    "preview": "{\n  \"accuracy\": 48.0,\n  \"results\": [\n    {\n      \"question\": {\n        \"image_path\": \"/home/chenliang/images/superCLEVR_"
  },
  {
    "path": "model_training/R1-V/src/eval/logs/geoqa_test_qwen2vl_7b_grpo_2epochs_legacy.json",
    "chars": 567136,
    "preview": "{\n  \"accuracy\": 38.726790450928384,\n  \"results\": [\n    {\n      \"question\": {\n        \"image_path\": \"/home/chenliang/Geo1"
  },
  {
    "path": "model_training/R1-V/src/eval/logs/geoqa_test_qwen2vl_7b_instruct_legacy.json",
    "chars": 744375,
    "preview": "{\n  \"accuracy\": 30.636604774535808,\n  \"results\": [\n    {\n      \"question\": {\n        \"image_path\": \"/home/chenliang/Geo1"
  },
  {
    "path": "model_training/R1-V/src/eval/prompts/geoqa_test_prompts.jsonl",
    "chars": 205697,
    "preview": "{\"image_path\": \"./Geo170K/images/test/0.png\", \"question\": \"As shown in the figure, in triangle ABC, it is known that ang"
  },
  {
    "path": "model_training/R1-V/src/eval/prompts/superclevr_test200_counting_problems.jsonl",
    "chars": 27228,
    "preview": "{\"image_path\": \"./images/superCLEVR_new_025000.png\", \"question\": \"How many different items are there in the image?\", \"gr"
  },
  {
    "path": "model_training/R1-V/src/eval/test_qwen2vl_counting_superclevr.py",
    "chars": 3885,
    "preview": "from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor\nfrom qwen_vl_utils import process"
  },
  {
    "path": "model_training/R1-V/src/eval/test_qwen2vl_geoqa.py",
    "chars": 3996,
    "preview": "from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor\nfrom qwen_vl_utils import process"
  },
  {
    "path": "model_training/R1-V/src/eval/test_qwen2vl_geoqa_multigpu.py",
    "chars": 7921,
    "preview": "from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor\nfrom qwen_vl_utils import process"
  },
  {
    "path": "model_training/R1-V/src/r1-v/.gitignore",
    "chars": 3474,
    "preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
  },
  {
    "path": "model_training/R1-V/src/r1-v/LICENSE",
    "chars": 11357,
    "preview": "                                 Apache License\n                           Version 2.0, January 2004\n                   "
  },
  {
    "path": "model_training/R1-V/src/r1-v/Makefile",
    "chars": 486,
    "preview": ".PHONY: style quality\n\n# make sure to test the local checkout in scripts and not the pre-installed one (don't use quotes"
  },
  {
    "path": "model_training/R1-V/src/r1-v/configs/ddp.yaml",
    "chars": 319,
    "preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndistributed_type: MULTI_GPU\ndowncast_bf16: 'no'\ngpu_ids: all\nmachine_ran"
  },
  {
    "path": "model_training/R1-V/src/r1-v/configs/qwen2vl_sft_config.yaml",
    "chars": 870,
    "preview": "# Model arguments\nmodel_name_or_path: Qwen/Qwen2-VL-2B-Instruct\nmodel_revision: main\ntorch_dtype: bfloat16\n\n# Data train"
  },
  {
    "path": "model_training/R1-V/src/r1-v/configs/zero2.yaml",
    "chars": 467,
    "preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndeepspeed_config:\n  deepspeed_multinode_launcher: standard\n  offload_opt"
  },
  {
    "path": "model_training/R1-V/src/r1-v/configs/zero3.yaml",
    "chars": 498,
    "preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndeepspeed_config:\n  deepspeed_multinode_launcher: standard\n  offload_opt"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/create_vision_cot_data.py",
    "chars": 4963,
    "preview": "import argparse\nimport base64\nimport concurrent.futures\nimport io\nimport json\nimport os\nimport random\nimport re\nimport t"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/lmms_eval_qwen2vl.sh",
    "chars": 1973,
    "preview": "export HF_HOME=\"<CACHE_DIR>\"\nexport HF_TOKEN=\"<HF_TOKEN>\"\nexport HF_HUB_ENABLE_HF_TRANSFER=\"1\"\n\nexport API_TYPE=\"<API_TY"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/prepare_hf_data.py",
    "chars": 4855,
    "preview": "import matplotlib.pyplot as plt\nimport seaborn as sns\nimport pandas as pd\nimport random\nfrom typing import List, Dict\nim"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/train_aria_moe.sh",
    "chars": 2033,
    "preview": "#!/bin/bash\n\nexport NCCL_BLOCKING_WAIT=0\nexport TOKENIZERS_PARALLELISM=false\nexport OMP_NUM_THREADS=8\nexport NCCL_IB_DIS"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/train_qwen2_vl.sh",
    "chars": 1880,
    "preview": "#!/bin/bash\n\nexport NCCL_BLOCKING_WAIT=0\nexport TOKENIZERS_PARALLELISM=false\nexport OMP_NUM_THREADS=8\nexport NCCL_IB_DIS"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/zero1_no_optimizer.json",
    "chars": 625,
    "preview": "{\n\t\"zero_optimization\": {\n\t\t\"stage\": 1,\n\t\t\"allgather_partitions\": true,\n\t\t\"allgather_bucket_size\": 1e9,\n\t\t\"overlap_comm\""
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/zero2.json",
    "chars": 1028,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/zero2_fix.json",
    "chars": 830,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/zero3.json",
    "chars": 1101,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/zero3.yaml",
    "chars": 498,
    "preview": "compute_environment: LOCAL_MACHINE\ndebug: false\ndeepspeed_config:\n  deepspeed_multinode_launcher: standard\n  offload_opt"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/zero3_offload.json",
    "chars": 1288,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "model_training/R1-V/src/r1-v/local_scripts/zero3_offload_fix.json",
    "chars": 1090,
    "preview": "{\n    \"fp16\": {\n        \"enabled\": \"auto\",\n        \"loss_scale\": 0,\n        \"loss_scale_window\": 1000,\n        \"initial_"
  },
  {
    "path": "model_training/R1-V/src/r1-v/run_grpo.sh",
    "chars": 763,
    "preview": "cd src/r1-v\n\nexport DEBUG_MODE=\"true\"\nexport LOG_PATH=\"./debug_log_2b.txt\"\n\n\n\ntorchrun --nproc_per_node=\"8\" \\\n    --nnod"
  },
  {
    "path": "model_training/R1-V/src/r1-v/setup.cfg",
    "chars": 696,
    "preview": "[isort]\ndefault_section = FIRSTPARTY\nensure_newline_before_comments = True\nforce_grid_wrap = 0\ninclude_trailing_comma = "
  },
  {
    "path": "model_training/R1-V/src/r1-v/setup.py",
    "chars": 4597,
    "preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
  },
  {
    "path": "model_training/R1-V/src/r1-v/src/open_r1/__init__.py",
    "chars": 0,
    "preview": ""
  },
  {
    "path": "model_training/R1-V/src/r1-v/src/open_r1/evaluate.py",
    "chars": 2453,
    "preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
  },
  {
    "path": "model_training/R1-V/src/r1-v/src/open_r1/generate.py",
    "chars": 4806,
    "preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
  },
  {
    "path": "model_training/R1-V/src/r1-v/src/open_r1/grpo.py",
    "chars": 7391,
    "preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
  },
  {
    "path": "model_training/R1-V/src/r1-v/src/open_r1/grpo_agent.py",
    "chars": 10699,
    "preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
  },
  {
    "path": "model_training/R1-V/src/r1-v/src/open_r1/sft.py",
    "chars": 11100,
    "preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
  },
  {
    "path": "model_training/R1-V/src/r1-v/src/open_r1/trainer/__init__.py",
    "chars": 280,
    "preview": "from .grpo_trainer import Qwen2VLGRPOTrainer\nfrom .vllm_grpo_trainer import Qwen2VLGRPOVLLMTrainer \nfrom .vllm_grpo_trai"
  },
  {
    "path": "model_training/R1-V/src/r1-v/src/open_r1/trainer/grpo_trainer.py",
    "chars": 30126,
    "preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
  },
  {
    "path": "model_training/R1-V/src/r1-v/src/open_r1/trainer/vllm_grpo_trainer.py",
    "chars": 37270,
    "preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
  },
  {
    "path": "model_training/R1-V/src/r1-v/src/open_r1/trainer/vllm_grpo_trainer_modified.py",
    "chars": 36563,
    "preview": "# Copyright 2025 The HuggingFace Team. All rights reserved.\n#\n# Licensed under the Apache License, Version 2.0 (the \"Lic"
  },
  {
    "path": "model_training/R1-V/src/requirements.txt",
    "chars": 5876,
    "preview": "absl-py==2.1.0\naccelerate==1.3.0\naenum==3.1.15\naiohappyeyeballs==2.4.6\naiohttp==3.11.12\naiohttp-cors==0.7.0\naiosignal==1"
  },
  {
    "path": "model_training/R1-V/src/scripts/run_grpo_clevr.sh",
    "chars": 886,
    "preview": "export DEBUG_MODE=\"true\" # Enable Debug if you want to see the rollout of model during RL\nexport LOG_PATH=\"./debug_log_2"
  },
  {
    "path": "model_training/R1-V/src/scripts/run_grpo_vllm.sh",
    "chars": 1245,
    "preview": "#!/bin/bash\n\n# The latest vllm==0.7.2 is required for this script: pip3 install vllm==0.7.2 \n\n\nexport DEBUG_MODE=\"true\"\n"
  },
  {
    "path": "model_training/R1-V/src/scripts/run_grpo_vllm_qwen25vl.sh",
    "chars": 1885,
    "preview": "#!/bin/bash\n\n\n# The latest vllm==0.7.3 is required for this script: pip3 install vllm==0.7.3\n# The latest transformers i"
  },
  {
    "path": "model_training/R1-V/src/scripts/run_grpo_vllm_qwen25vl_agent.sh",
    "chars": 2256,
    "preview": "#!/bin/bash\n\n\n# The latest vllm==0.7.3 is required for this script: pip3 install vllm==0.7.3\n# The latest transformers i"
  },
  {
    "path": "model_training/R1-V/src/scripts/run_sft_clevr.sh",
    "chars": 165,
    "preview": "ACCELERATE_LOG_LEVEL=info accelerate launch --config_file src/r1-v/configs/zero2.yaml src/r1-v/src/open_r1/sft.py --conf"
  },
  {
    "path": "model_training/R1-V/src/scripts/test_grpo_geoqa_multigpu.sh",
    "chars": 502,
    "preview": "r1_v_path=/workspace/xxx/github/R1-V\ncd ${r1_v_path}\n\nmodel_path=${r1_v_path}/output/train@geo170k/checkpoint-30\nbatch_s"
  },
  {
    "path": "model_training/README.md",
    "chars": 2124,
    "preview": "## Training Scripts\n\n### SFT Training Scripts\n\nThe SFT (Supervised Fine-Tuning) related scripts are located in the `llam"
  },
  {
    "path": "model_training/llama_factory_scripts/deepspeed_config_zero2.json",
    "chars": 741,
    "preview": "{\n    \"train_micro_batch_size_per_gpu\": 1,\n    \"gradient_accumulation_steps\": 16,\n    \"zero_optimization\": {\n      \"stag"
  },
  {
    "path": "model_training/llama_factory_scripts/deepspeed_config_zero3.json",
    "chars": 1326,
    "preview": "{\n    \"train_batch_size\": \"auto\",\n    \"train_micro_batch_size_per_gpu\": \"auto\",\n    \"gradient_accumulation_steps\": \"auto"
  },
  {
    "path": "model_training/llama_factory_scripts/full_tuning_setting_3B_vl_fix.sh",
    "chars": 1237,
    "preview": "\nnohup deepspeed --include localhost:1,2 ./../src/train.py \\\n    --stage sft \\\n    --do_train \\\n    --model_name_or_path"
  },
  {
    "path": "page_executor/__init__.py",
    "chars": 44,
    "preview": "from .text_executor import TextOnlyExecutor\n"
  },
  {
    "path": "page_executor/simple_vision_executor.py",
    "chars": 6695,
    "preview": "import time\nimport xml.etree.ElementTree as ET\n\nfrom page_executor.text_executor import TextOnlyExecutor\n\n\nclass Android"
  },
  {
    "path": "page_executor/text_executor.py",
    "chars": 10604,
    "preview": "import inspect\nimport json\nimport re\nimport time\nfrom functools import partial\n\nfrom templates.packages import find_pack"
  },
  {
    "path": "page_executor/utils.py",
    "chars": 2746,
    "preview": "import textwrap\n\nimport cv2\nimport requests\n\n\ndef _add_text(instruction, image):\n    screen_height, screen_width, _ = im"
  },
  {
    "path": "prepare_data/README.md",
    "chars": 2044,
    "preview": "## Overview\n\nThis directory provides scripts and datasets for generating fine-tuning data with long reasoning chains and"
  },
  {
    "path": "prepare_data/rl/convert_to_hf_vl.py",
    "chars": 3293,
    "preview": "import json\nimport pandas as pd\nfrom datasets import Dataset, DatasetDict, Features, Image, Value\nimport os\nimport base6"
  },
  {
    "path": "prepare_data/visual_model_data/data_maker.py",
    "chars": 21225,
    "preview": "import json\nimport re\nimport os\nimport random\nimport time\nimport logging\nfrom typing import List, Dict, Any\nimport base6"
  },
  {
    "path": "prepare_data/visual_model_data/o1_data_visual_cot_pimusic.json",
    "chars": 1645928,
    "preview": "[\n{\n    \"task\": \"You should use pi_music to complete the following task: Open the entire \\\"ALBUMS\\\" list by the artist A"
  },
  {
    "path": "prepare_data/visual_model_data/sft_data_maker.py",
    "chars": 4618,
    "preview": "import json\nimport re\nimport random  \nfrom PIL import Image\ndef extract_function_call(text):\n    pattern = r'<CALLED_FUN"
  },
  {
    "path": "recorder/__init__.py",
    "chars": 39,
    "preview": "from .json_recoder import JSONRecorder\n"
  },
  {
    "path": "recorder/json_recoder.py",
    "chars": 8560,
    "preview": "import json\nimport os\n\nimport jsonlines\n\nfrom utils_mobile.utils import draw_bbox_multi\nfrom utils_mobile.xml_tool impor"
  },
  {
    "path": "requirements.txt",
    "chars": 390,
    "preview": "backoff==2.2.1\nInquirerPy==0.3.4\nlxml==4.9.3\nopenai\njsonlines\nnumpy\nprotobuf\nBeautifulSoup4\ntoml\naioconsole\npython-doten"
  },
  {
    "path": "templates/__init__.py",
    "chars": 116,
    "preview": "from .android_screenshot_template import *\nfrom .seeact_screenshot_prompts import *\nfrom .text_only_mobile import *\n"
  },
  {
    "path": "templates/android_screenshot_template.py",
    "chars": 36878,
    "preview": "SYSTEM_PROMPT_ANDROID_MULTI = '''# Setup\nYou are a professional android operation agent assistant that can fulfill user'"
  },
  {
    "path": "templates/packages.py",
    "chars": 3166,
    "preview": "apps_dict = {\n    \"桌面\": \"com.google.android.apps.nexuslauncher\",\n    \"Spotify\": \"com.spotify.music\",\n    \"Contacts\": \"co"
  },
  {
    "path": "templates/seeact_screenshot_prompts.py",
    "chars": 4179,
    "preview": "QUERY_SYSTEM_PROMPT = \"\"\"\nYou are assisting humans doing smartphone navigation tasks step by step. At each stage, you ca"
  },
  {
    "path": "templates/text_only_mobile.py",
    "chars": 32459,
    "preview": "SYSTEM_PROMPT_ANDROID_TEXT_EXPLORATION_GLM = '''# Setup\r\nYou are a professional android operation agent assistant that c"
  },
  {
    "path": "test_script/all_test_cloud_v1_hyper.sh",
    "chars": 14755,
    "preview": "python ./../eval.py -n all_cloud_v1_hyper -c ./../configs/example_xml_cloud_hyper.yaml --task_id zoom_1\npython ./../eval"
  },
  {
    "path": "test_script/all_test_cloud_v1_hyper_add.sh",
    "chars": 2642,
    "preview": "python ./../eval.py -n all_cloud_v1_hyper -c ./../configs/example_xml_cloud_hyper.yaml --task_id gmail_1\npython ./../eva"
  },
  {
    "path": "tools/check_result_multiprocess.py",
    "chars": 11134,
    "preview": "import json\nimport math\nimport os\nfrom multiprocessing import Pool\n\nimport chardet\nimport jsonlines\nimport matplotlib.py"
  },
  {
    "path": "tools/delete_blank.py",
    "chars": 636,
    "preview": "import os\nimport shutil\n\nfolder = \"/Users/xuyifan/Desktop/agent/pipeline-mobile/logs/evaluation\"\nfiles = os.listdir(fold"
  },
  {
    "path": "tools/modify_mobile_to_avd.py",
    "chars": 1749,
    "preview": "import argparse\nimport os\n\n\ndef update_device_ini(avd_dir, device_name):\n    device_ini_path = os.path.join(avd_dir, f'{"
  },
  {
    "path": "tools/modify_mobile_to_docker.py",
    "chars": 2231,
    "preview": "import argparse\nimport os\nimport shutil\n\n\ndef update_device_ini(avd_dir, device_name, save_dir):\n    device_ini_path = o"
  },
  {
    "path": "utils_mobile/and_controller.py",
    "chars": 11884,
    "preview": "import base64\nimport getpass\nimport os\nimport subprocess\nimport time\nfrom typing import Union\n\nfrom evaluation.docker_ut"
  },
  {
    "path": "utils_mobile/specialCheck.py",
    "chars": 24897,
    "preview": "import math\nimport re\nfrom collections import deque\n\n\ndef bounds_to_coords(bounds_string):\n    pattern = r\"\\[(-?\\d+),(-?"
  },
  {
    "path": "utils_mobile/utils.py",
    "chars": 15044,
    "preview": "import base64\nimport json\nimport re\nimport shutil\nimport time\nfrom datetime import datetime, timedelta\nfrom pathlib impo"
  },
  {
    "path": "utils_mobile/xml_tool.py",
    "chars": 26365,
    "preview": "import copy\nimport json\nimport uuid\nfrom typing import Dict\n\nimport xmltodict\nfrom lxml import etree\n\nfrom utils_mobile."
  },
  {
    "path": "vllm_script/run_server_visual_cot.py",
    "chars": 2442,
    "preview": "import os \nimport uvicorn\nfrom fastapi import FastAPI, HTTPException\nfrom pydantic import BaseModel\nfrom vllm import LLM"
  }
]

// ... and 2 more files (download for full content)

About this extraction

This page contains the full source code of the HKUDS/OpenPhone GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 200 files (57.2 MB), approximately 1.2M tokens, and a symbol index with 2015 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!