Showing preview only (4,368K chars total). Download the full file or copy to clipboard to get everything.
Repository: aiming-lab/AutoResearchClaw
Branch: main
Commit: 258dae2bb28f
Files: 422
Total size: 4.1 MB
Directory structure:
gitextract_tp1xyq09/
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── config.researchclaw.example.yaml
├── docs/
│ ├── BUG_FIX_DOCUMENT_20260316.md
│ ├── BUG_TRACKER.md
│ ├── CHANGELOG_ANTHROPIC_ADAPTER.md
│ ├── PIPELINE_TEST_LOG_R5.md
│ ├── README_AR.md
│ ├── README_CN.md
│ ├── README_DE.md
│ ├── README_ES.md
│ ├── README_FR.md
│ ├── README_JA.md
│ ├── README_KO.md
│ ├── README_PT.md
│ ├── README_RU.md
│ ├── TESTER_GUIDE.md
│ ├── TESTER_GUIDE_CN.md
│ ├── TESTER_GUIDE_JA.md
│ ├── agent_figure_and_benchmark_plan.md
│ ├── figure_prompts/
│ │ ├── case_a_meta_learning.md
│ │ └── case_b_rlhf_alignment.md
│ ├── integration-guide.md
│ ├── issue_tracker_v9.md
│ ├── iteration_plan_v8.md
│ ├── iteration_showcase_narrative.md
│ ├── metaclaw-integration-plan.md
│ ├── next_phase_showcase_plan.md
│ ├── pipeline_critical_fixes_v8.md
│ ├── rate_limit_fix_plan.md
│ ├── sandbox_environment_fix_plan.md
│ └── showcase/
│ └── SHOWCASE.md
├── prompts.default.yaml
├── pyproject.toml
├── researchclaw/
│ ├── __init__.py
│ ├── __main__.py
│ ├── adapters.py
│ ├── agents/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── benchmark_agent/
│ │ │ ├── __init__.py
│ │ │ ├── acquirer.py
│ │ │ ├── orchestrator.py
│ │ │ ├── selector.py
│ │ │ ├── surveyor.py
│ │ │ └── validator.py
│ │ ├── code_searcher/
│ │ │ ├── __init__.py
│ │ │ ├── agent.py
│ │ │ ├── cache.py
│ │ │ ├── github_client.py
│ │ │ ├── pattern_extractor.py
│ │ │ └── query_gen.py
│ │ └── figure_agent/
│ │ ├── __init__.py
│ │ ├── codegen.py
│ │ ├── critic.py
│ │ ├── decision.py
│ │ ├── integrator.py
│ │ ├── nano_banana.py
│ │ ├── orchestrator.py
│ │ ├── planner.py
│ │ ├── renderer.py
│ │ └── style_config.py
│ ├── assessor/
│ │ ├── __init__.py
│ │ ├── comparator.py
│ │ ├── rubrics.py
│ │ ├── scorer.py
│ │ └── venue_recommender.py
│ ├── calendar/
│ │ ├── __init__.py
│ │ ├── data/
│ │ │ └── conferences.yaml
│ │ ├── deadlines.py
│ │ ├── planner.py
│ │ └── reminder.py
│ ├── cli.py
│ ├── collaboration/
│ │ ├── __init__.py
│ │ ├── dedup.py
│ │ ├── publisher.py
│ │ ├── repository.py
│ │ └── subscriber.py
│ ├── config.py
│ ├── copilot/
│ │ ├── __init__.py
│ │ ├── branching.py
│ │ ├── controller.py
│ │ ├── feedback.py
│ │ └── modes.py
│ ├── dashboard/
│ │ ├── __init__.py
│ │ ├── broadcaster.py
│ │ ├── collector.py
│ │ └── metrics.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── benchmark_knowledge.yaml
│ │ ├── dataset_registry.yaml
│ │ ├── docker_profiles.yaml
│ │ ├── framework_docs/
│ │ │ ├── axolotl.md
│ │ │ ├── llamafactory.md
│ │ │ ├── peft.md
│ │ │ ├── transformers_training.md
│ │ │ └── trl.md
│ │ └── seminal_papers.yaml
│ ├── docker/
│ │ ├── Dockerfile
│ │ ├── Dockerfile.biology
│ │ ├── Dockerfile.chemistry
│ │ ├── Dockerfile.economics
│ │ ├── Dockerfile.generic
│ │ ├── Dockerfile.math
│ │ ├── Dockerfile.physics
│ │ └── entrypoint.sh
│ ├── domains/
│ │ ├── __init__.py
│ │ ├── adapters/
│ │ │ ├── __init__.py
│ │ │ ├── biology.py
│ │ │ ├── chemistry.py
│ │ │ ├── economics.py
│ │ │ ├── generic.py
│ │ │ ├── math.py
│ │ │ ├── ml.py
│ │ │ ├── neuroscience.py
│ │ │ ├── physics.py
│ │ │ ├── robotics.py
│ │ │ └── security.py
│ │ ├── detector.py
│ │ ├── experiment_schema.py
│ │ ├── profiles/
│ │ │ ├── _generic.yaml
│ │ │ ├── biology_genomics.yaml
│ │ │ ├── biology_protein.yaml
│ │ │ ├── biology_singlecell.yaml
│ │ │ ├── chemistry_molprop.yaml
│ │ │ ├── chemistry_qm.yaml
│ │ │ ├── economics_empirical.yaml
│ │ │ ├── mathematics_numerical.yaml
│ │ │ ├── mathematics_optimization.yaml
│ │ │ ├── ml_compression.yaml
│ │ │ ├── ml_generative.yaml
│ │ │ ├── ml_generic.yaml
│ │ │ ├── ml_graph.yaml
│ │ │ ├── ml_nlp.yaml
│ │ │ ├── ml_rl.yaml
│ │ │ ├── ml_tabular.yaml
│ │ │ ├── ml_vision.yaml
│ │ │ ├── neuroscience_computational.yaml
│ │ │ ├── neuroscience_imaging.yaml
│ │ │ ├── physics_pde.yaml
│ │ │ ├── physics_quantum.yaml
│ │ │ ├── physics_simulation.yaml
│ │ │ ├── robotics_control.yaml
│ │ │ └── security_detection.yaml
│ │ └── prompt_adapter.py
│ ├── evolution.py
│ ├── experiment/
│ │ ├── __init__.py
│ │ ├── agentic_sandbox.py
│ │ ├── code_agent.py
│ │ ├── colab_sandbox.py
│ │ ├── docker_sandbox.py
│ │ ├── evaluators/
│ │ │ ├── __init__.py
│ │ │ └── convergence.py
│ │ ├── factory.py
│ │ ├── git_manager.py
│ │ ├── harness_template.py
│ │ ├── metrics.py
│ │ ├── runner.py
│ │ ├── sandbox.py
│ │ ├── ssh_sandbox.py
│ │ ├── validator.py
│ │ └── visualize.py
│ ├── feedback/
│ │ └── FEEDBACK_ANALYSIS_PROMPT.md
│ ├── hardware.py
│ ├── health.py
│ ├── knowledge/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ └── graph/
│ │ ├── __init__.py
│ │ ├── builder.py
│ │ ├── entities.py
│ │ ├── query.py
│ │ ├── relations.py
│ │ └── visualizer.py
│ ├── literature/
│ │ ├── __init__.py
│ │ ├── arxiv_client.py
│ │ ├── cache.py
│ │ ├── models.py
│ │ ├── novelty.py
│ │ ├── openalex_client.py
│ │ ├── search.py
│ │ ├── semantic_scholar.py
│ │ ├── trends.py
│ │ └── verify.py
│ ├── llm/
│ │ ├── __init__.py
│ │ ├── acp_client.py
│ │ ├── anthropic_adapter.py
│ │ └── client.py
│ ├── mcp/
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── registry.py
│ │ ├── server.py
│ │ ├── tools.py
│ │ └── transport.py
│ ├── memory/
│ │ ├── __init__.py
│ │ ├── decay.py
│ │ ├── embeddings.py
│ │ ├── experiment_memory.py
│ │ ├── ideation_memory.py
│ │ ├── retriever.py
│ │ ├── store.py
│ │ └── writing_memory.py
│ ├── metaclaw_bridge/
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── lesson_to_skill.py
│ │ ├── prm_gate.py
│ │ ├── session.py
│ │ ├── skill_feedback.py
│ │ └── stage_skill_map.py
│ ├── overleaf/
│ │ ├── __init__.py
│ │ ├── conflict.py
│ │ ├── formatter.py
│ │ ├── sync.py
│ │ └── watcher.py
│ ├── pipeline/
│ │ ├── __init__.py
│ │ ├── _domain.py
│ │ ├── _helpers.py
│ │ ├── code_agent.py
│ │ ├── contracts.py
│ │ ├── executor.py
│ │ ├── experiment_diagnosis.py
│ │ ├── experiment_repair.py
│ │ ├── opencode_bridge.py
│ │ ├── paper_verifier.py
│ │ ├── runner.py
│ │ ├── stage_impls/
│ │ │ ├── __init__.py
│ │ │ ├── _analysis.py
│ │ │ ├── _code_generation.py
│ │ │ ├── _execution.py
│ │ │ ├── _experiment_design.py
│ │ │ ├── _literature.py
│ │ │ ├── _paper_writing.py
│ │ │ ├── _review_publish.py
│ │ │ ├── _synthesis.py
│ │ │ └── _topic.py
│ │ ├── stages.py
│ │ └── verified_registry.py
│ ├── project/
│ │ ├── __init__.py
│ │ ├── idea_pool.py
│ │ ├── manager.py
│ │ ├── models.py
│ │ └── scheduler.py
│ ├── prompts.py
│ ├── quality.py
│ ├── report.py
│ ├── server/
│ │ ├── __init__.py
│ │ ├── app.py
│ │ ├── dialog/
│ │ │ ├── __init__.py
│ │ │ ├── intents.py
│ │ │ ├── router.py
│ │ │ └── session.py
│ │ ├── middleware/
│ │ │ ├── __init__.py
│ │ │ └── auth.py
│ │ ├── routes/
│ │ │ ├── __init__.py
│ │ │ ├── chat.py
│ │ │ ├── pipeline.py
│ │ │ ├── projects.py
│ │ │ └── voice.py
│ │ └── websocket/
│ │ ├── __init__.py
│ │ ├── events.py
│ │ └── manager.py
│ ├── servers/
│ │ ├── __init__.py
│ │ ├── cloud_executor.py
│ │ ├── dispatcher.py
│ │ ├── monitor.py
│ │ ├── registry.py
│ │ ├── slurm_executor.py
│ │ └── ssh_executor.py
│ ├── skills/
│ │ ├── __init__.py
│ │ ├── builtin/
│ │ │ ├── __init__.py
│ │ │ ├── domain/
│ │ │ │ ├── cv-classification/
│ │ │ │ │ └── SKILL.md
│ │ │ │ ├── cv-detection/
│ │ │ │ │ └── SKILL.md
│ │ │ │ ├── nlp-alignment/
│ │ │ │ │ └── SKILL.md
│ │ │ │ ├── nlp-pretraining/
│ │ │ │ │ └── SKILL.md
│ │ │ │ └── rl-policy-optimization/
│ │ │ │ └── SKILL.md
│ │ │ ├── experiment/
│ │ │ │ ├── experimental-design/
│ │ │ │ │ └── SKILL.md
│ │ │ │ ├── meta-analysis/
│ │ │ │ │ └── SKILL.md
│ │ │ │ └── systematic-review/
│ │ │ │ └── SKILL.md
│ │ │ └── tooling/
│ │ │ ├── data-loading/
│ │ │ │ └── SKILL.md
│ │ │ ├── distributed-training/
│ │ │ │ └── SKILL.md
│ │ │ ├── mixed-precision/
│ │ │ │ └── SKILL.md
│ │ │ └── pytorch-training/
│ │ │ └── SKILL.md
│ │ ├── loader.py
│ │ ├── matcher.py
│ │ ├── registry.py
│ │ └── schema.py
│ ├── templates/
│ │ ├── __init__.py
│ │ ├── compiler.py
│ │ ├── conference.py
│ │ ├── converter.py
│ │ ├── results_table_builder.py
│ │ └── styles/
│ │ ├── iclr_2025/
│ │ │ ├── iclr2025_conference.bst
│ │ │ └── iclr2025_conference.sty
│ │ ├── iclr_2026/
│ │ │ ├── iclr2026_conference.bst
│ │ │ └── iclr2026_conference.sty
│ │ ├── icml_2025/
│ │ │ ├── icml2025.bst
│ │ │ └── icml2025.sty
│ │ ├── icml_2026/
│ │ │ ├── icml2026.bst
│ │ │ └── icml2026.sty
│ │ ├── neurips_2024/
│ │ │ └── neurips_2024.sty
│ │ └── neurips_2025/
│ │ └── neurips_2025.sty
│ ├── trends/
│ │ ├── __init__.py
│ │ ├── auto_topic.py
│ │ ├── daily_digest.py
│ │ ├── feeds.py
│ │ ├── opportunity_finder.py
│ │ └── trend_analyzer.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── sanitize.py
│ │ └── thinking_tags.py
│ ├── voice/
│ │ ├── __init__.py
│ │ ├── commands.py
│ │ ├── synthesizer.py
│ │ └── transcriber.py
│ ├── web/
│ │ ├── __init__.py
│ │ ├── _ssrf.py
│ │ ├── agent.py
│ │ ├── crawler.py
│ │ ├── pdf_extractor.py
│ │ ├── scholar.py
│ │ └── search.py
│ ├── wizard/
│ │ ├── __init__.py
│ │ ├── quickstart.py
│ │ ├── templates.py
│ │ └── validator.py
│ └── writing_guide.py
├── scripts/
│ ├── metaclaw_start.sh
│ ├── plot_iteration_showcase.py
│ ├── test_beast_mode_e2e.py
│ ├── test_code_agent_live.py
│ ├── test_code_agent_sandbox.py
│ └── test_codegen_v2.py
├── sentinel.sh
├── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── e2e_docker_sandbox.py
│ ├── e2e_real_llm.py
│ ├── test_anthropic.py
│ ├── test_assessor.py
│ ├── test_benchmark_agent.py
│ ├── test_calendar.py
│ ├── test_cli.py
│ ├── test_code_agent.py
│ ├── test_code_searcher.py
│ ├── test_collaboration.py
│ ├── test_compiler.py
│ ├── test_convergence_evaluator.py
│ ├── test_copilot.py
│ ├── test_decision_agent.py
│ ├── test_domain_detector.py
│ ├── test_entry_point_validation.py
│ ├── test_experiment_diagnosis.py
│ ├── test_experiment_repair.py
│ ├── test_experiment_schema.py
│ ├── test_figure_agent.py
│ ├── test_knowledge_graph.py
│ ├── test_mcp.py
│ ├── test_memory_system.py
│ ├── test_metaclaw_bridge/
│ │ ├── __init__.py
│ │ ├── test_config.py
│ │ ├── test_lesson_to_skill.py
│ │ ├── test_prm_gate.py
│ │ ├── test_session.py
│ │ ├── test_skill_feedback.py
│ │ └── test_stage_skill_map.py
│ ├── test_metric_parser.py
│ ├── test_minimax_provider.py
│ ├── test_neuroscience_domain.py
│ ├── test_opencode_bridge.py
│ ├── test_overleaf.py
│ ├── test_paper_verifier.py
│ ├── test_project_manager.py
│ ├── test_prompt_adapter.py
│ ├── test_rc_adapters.py
│ ├── test_rc_cache.py
│ ├── test_rc_checkpoint.py
│ ├── test_rc_citation_resolve.py
│ ├── test_rc_citation_verify.py
│ ├── test_rc_cli.py
│ ├── test_rc_config.py
│ ├── test_rc_contracts.py
│ ├── test_rc_docker_sandbox.py
│ ├── test_rc_e2e_regression.py
│ ├── test_rc_evolution.py
│ ├── test_rc_executor.py
│ ├── test_rc_hardware.py
│ ├── test_rc_health.py
│ ├── test_rc_kb.py
│ ├── test_rc_literature.py
│ ├── test_rc_llm.py
│ ├── test_rc_novelty.py
│ ├── test_rc_preflight.py
│ ├── test_rc_prompts.py
│ ├── test_rc_quality.py
│ ├── test_rc_report.py
│ ├── test_rc_runner.py
│ ├── test_rc_sanitization.py
│ ├── test_rc_sentinel.py
│ ├── test_rc_stages.py
│ ├── test_rc_templates.py
│ ├── test_rc_validator.py
│ ├── test_results_table_builder.py
│ ├── test_robotics_adapter.py
│ ├── test_servers.py
│ ├── test_skills_library.py
│ ├── test_ssh_and_colab_sandbox.py
│ ├── test_trends.py
│ ├── test_universal_codegen_integration.py
│ ├── test_v6_improvements.py
│ ├── test_verified_registry.py
│ ├── test_web_crawler.py
│ ├── test_web_integration.py
│ ├── test_web_pdf_extractor.py
│ ├── test_web_platform.py
│ ├── test_web_scholar.py
│ └── test_web_search.py
└── website/
├── features.html
├── getting-started.html
├── index.html
├── papers.html
├── pipeline.html
└── style.css
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
HANDOFF_METACLAW_SKILL_LOOP.md
.venv/
__pycache__/
*.pyc
*.egg-info/
dist/
build/
workspaces/
.claude/*
!.claude/agents/
!.claude/agents/*.md
!.claude/skills/
!.claude/skills/**/SKILL.md
.claude/settings.local.json
# Experiment run artifacts (local only)
artifacts/
output/
experiment_metaclaw/
promotional/
# Legacy experiment artifacts (pre-v5)
exp/
logs/
writing/
# Root-level config (local overrides, not committed)
/config.yaml
# Sensitive / credentials
user_token_cache.json
*.secret
.env
.env.*
config_run*.yaml
# Literature search cache
.researchclaw_cache/
# Playwright MCP logs
.playwright-mcp/
# Internal dev/debug docs (not for public)
docs/internal/
docs/kb/
docs/plans/
docs/BUGFIX_TRACKER*.md
docs/IMPROVEMENT_PLAN*.md
docs/IMPROVEMENT_*_EXECUTION.md
docs/OPTIMIZATION_PLAN*.md
docs/MULTI_CASE_EVALUATION*.md
docs/pipeline_quality_issues*.md
docs/autobench-loop.md
RESEARCHCLAW_AGENTS.md
RESEARCHCLAW_CLAUDE.md
# Task-specific config files (keep example template only)
config_agent_*.yaml
config_case*.yaml
config_v8_case*.yaml
pipeline_run_*.log
benchmarks/
# Logo generation prompts
image/logo_prompt.md
# macOS
.DS_Store
run.log
# Misc temp files
.history/
.serena/
cli_pause
暂停
进入
连续失败
重试一次
.venv_arc/
/config.arc.yaml
config_*.yaml
# Frontend (local dev only)
frontend/
# Test outputs and run logs (local only)
test_outputs*/
records/
run*_full_log.txt
mdpdf.log
scripts/md2pdf.py
# Local docs (not for public)
docs/tasks/
docs/feature_expansion_analysis.*
docs/tester_guide_cn.*
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to AutoResearchClaw
## Setup
1. Fork and clone the repo
2. Create a venv and install with dev extras:
```
python3 -m venv .venv && source .venv/bin/activate
pip install -e ".[dev]"
```
3. Generate your local config:
```
researchclaw init
```
4. Edit `config.arc.yaml` with your LLM settings
## Config Convention
- `config.researchclaw.example.yaml` — tracked template (do not add secrets)
- `config.arc.yaml` — your local config (gitignored, created by `researchclaw init`)
- `config.yaml` — also gitignored, supported as fallback
## Running Tests
```
pytest tests/
```
## Checking Your Environment
```
researchclaw doctor
```
## PR Guidelines
- Branch from main
- One concern per PR
- Ensure `pytest tests/` passes
- Include tests for new functionality
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2026 Aiming Lab
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
<p align="center">
<img src="image/logo.png" width="700" alt="AutoResearchClaw Logo">
</p>
<h2 align="center"><b>Chat an Idea. Get a Paper. Fully Autonomous & Self-Evolving.</b></h2>
<p align="center">
<b><i><font size="5">Just chat with <a href="#openclaw-integration">OpenClaw</a>: "Research X" → done.</font></i></b>
</p>
<p align="center">
<img src="image/framework_v2.png" width="100%" alt="AutoResearchClaw Framework">
</p>
<p align="center">
<a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="MIT License"></a>
<a href="https://python.org"><img src="https://img.shields.io/badge/Python-3.11%2B-3776AB?logo=python&logoColor=white" alt="Python 3.11+"></a>
<a href="#testing"><img src="https://img.shields.io/badge/Tests-1823%20passed-brightgreen?logo=pytest&logoColor=white" alt="1823 Tests Passed"></a>
<a href="https://github.com/aiming-lab/AutoResearchClaw"><img src="https://img.shields.io/badge/GitHub-AutoResearchClaw-181717?logo=github" alt="GitHub"></a>
<a href="#openclaw-integration"><img src="https://img.shields.io/badge/OpenClaw-Compatible-ff4444?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTEyIDJDNi40OCAyIDIgNi40OCAyIDEyczQuNDggMTAgMTAgMTAgMTAtNC40OCAxMC0xMFMxNy41MiAyIDEyIDJ6IiBmaWxsPSJ3aGl0ZSIvPjwvc3ZnPg==" alt="OpenClaw Compatible"></a>
<a href="https://discord.gg/u4ksqW5P"><img src="https://img.shields.io/badge/Discord-Join%20Community-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
</p>
<p align="center">
<a href="docs/README_CN.md">🇨🇳 中文</a> ·
<a href="docs/README_JA.md">🇯🇵 日本語</a> ·
<a href="docs/README_KO.md">🇰🇷 한국어</a> ·
<a href="docs/README_FR.md">🇫🇷 Français</a> ·
<a href="docs/README_DE.md">🇩🇪 Deutsch</a> ·
<a href="docs/README_ES.md">🇪🇸 Español</a> ·
<a href="docs/README_PT.md">🇧🇷 Português</a> ·
<a href="docs/README_RU.md">🇷🇺 Русский</a> ·
<a href="docs/README_AR.md">🇸🇦 العربية</a>
</p>
<p align="center">
<a href="docs/showcase/SHOWCASE.md">🏆 Paper Showcase</a> · <a href="docs/integration-guide.md">📖 Integration Guide</a> · <a href="https://discord.gg/u4ksqW5P">💬 Discord Community</a>
</p>
---
<table>
<tr>
<td width="18%">
<a href="docs/showcase/SHOWCASE.md"><img src="docs/showcase/thumbnails/paper_I_random_matrix-01.png" width="120" alt="Sample Paper"/></a>
</td>
<td valign="middle">
<b>🏆 Generated Paper Showcase</b><br><br>
<b>8 papers across 8 domains</b> — math, statistics, biology, computing, NLP, RL, vision, robustness — generated fully autonomously with zero human intervention.<br><br>
<a href="docs/showcase/SHOWCASE.md"><img src="https://img.shields.io/badge/View_Full_Showcase_→-All_8_Papers-d73a49?style=for-the-badge" alt="View Showcase"></a>
</td>
</tr>
</table>
---
> **🧪 We're looking for testers!** Try the pipeline with your own research idea — from any field — and [tell us what you think](docs/TESTER_GUIDE.md). Your feedback directly shapes the next version. **[→ Testing Guide](docs/TESTER_GUIDE.md)** | **[→ 中文测试指南](docs/TESTER_GUIDE_CN.md)** | **[→ 日本語テストガイド](docs/TESTER_GUIDE_JA.md)**
---
## 🔥 News
- **[03/22/2026]** [v0.3.2](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.2) — **Cross-Platform Support + Major Stability** — AutoResearchClaw now runs on any ACP-compatible agent backend (Claude Code, Codex CLI, Copilot CLI, Gemini CLI, Kimi CLI) and supports messaging platforms (Discord, Telegram, Lark, WeChat) via OpenClaw bridge. New CLI-agent code generation backend delegates Stages 10 & 13 to external CLI agents with budget control and timeout management. Also includes anti-fabrication system (VerifiedRegistry + experiment diagnosis & repair loop), 100+ bug fixes, modular executor refactoring, `--resume` auto-detection, LLM retry hardening, and community-reported fixes.
- **[03/18/2026]** [v0.3.1](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.1) — **OpenCode Beast Mode + Community Contributions** — New "Beast Mode" routes complex code generation to [OpenCode](https://github.com/anomalyco/opencode) with automatic complexity scoring and graceful fallback. Added Novita AI provider support, thread-safety hardening, improved LLM output parsing robustness, and 20+ bug fixes from community PRs and internal audit.
- **[03/17/2026]** [v0.3.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.0) — **MetaClaw Integration** — AutoResearchClaw now supports [MetaClaw](https://github.com/aiming-lab/MetaClaw) cross-run learning: pipeline failures → structured lessons → reusable skills, injected into all 23 stages. **+18.3%** robustness in controlled experiments. Opt-in (`metaclaw_bridge.enabled: true`), fully backward-compatible. See [Integration Guide](#-metaclaw-integration).
- **[03/16/2026]** [v0.2.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.2.0) — Three multi-agent subsystems (CodeAgent, BenchmarkAgent, FigureAgent), hardened Docker sandbox with network-policy-aware execution, 4-round paper quality audit (AI-slop detection, 7-dim review scoring, NeurIPS checklist), and 15+ bug fixes from production runs.
- **[03/15/2026]** [v0.1.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.1.0) — We release AutoResearchClaw: a fully autonomous 23-stage research pipeline that turns a single research idea into a conference-ready paper. No human intervention required.
---
## ⚡ One Command. One Paper.
```bash
pip install -e . && researchclaw setup && researchclaw init && researchclaw run --topic "Your research idea here" --auto-approve
```
---
## 🤔 What Is This?
**You think it. AutoResearchClaw writes it.**
Drop a research topic — get back a full academic paper with real literature from OpenAlex, Semantic Scholar & arXiv, hardware-aware sandbox experiments (GPU/MPS/CPU auto-detected), statistical analysis, multi-agent peer review, and conference-ready LaTeX targeting NeurIPS/ICML/ICLR. No babysitting. No copy-pasting. No hallucinated references.
<table>
<tr><td>📄</td><td><code>paper_draft.md</code></td><td>Full academic paper (Introduction, Related Work, Method, Experiments, Results, Conclusion)</td></tr>
<tr><td>📐</td><td><code>paper.tex</code></td><td>Conference-ready LaTeX (NeurIPS / ICLR / ICML templates)</td></tr>
<tr><td>📚</td><td><code>references.bib</code></td><td>Real BibTeX references from OpenAlex, Semantic Scholar and arXiv — auto-pruned to match inline citations</td></tr>
<tr><td>🔍</td><td><code>verification_report.json</code></td><td>4-layer citation integrity + relevance verification (arXiv, CrossRef, DataCite, LLM)</td></tr>
<tr><td>🧪</td><td><code>experiment runs/</code></td><td>Generated code + sandbox results + structured JSON metrics</td></tr>
<tr><td>📊</td><td><code>charts/</code></td><td>Auto-generated condition comparison charts with error bars and confidence intervals</td></tr>
<tr><td>📝</td><td><code>reviews.md</code></td><td>Multi-agent peer review with methodology-evidence consistency checks</td></tr>
<tr><td>🧬</td><td><code>evolution/</code></td><td>Self-learning lessons extracted from each run</td></tr>
<tr><td>📦</td><td><code>deliverables/</code></td><td>All final outputs in one folder — compile-ready for Overleaf</td></tr>
</table>
The pipeline runs **end-to-end without human intervention**. When experiments fail, it self-heals. When hypotheses don't hold, it pivots. When citations are fake, it kills them.
🌍 **Run it anywhere.** AutoResearchClaw isn't locked to a single platform. Use it standalone via CLI, plug it into [OpenClaw](https://github.com/openclaw/openclaw), or wire it up through any ACP-compatible agent — 🤖 Claude Code, 💻 Codex CLI, 🐙 Copilot CLI, ♊ Gemini CLI, 🌙 Kimi CLI, you name it. And because OpenClaw bridges to messaging platforms, you can kick off a full research run from 💬 Discord, ✈️ Telegram, 🐦 Lark (飞书), 💚 WeChat, or wherever your team already hangs out. One topic in, one paper out — no matter where you type it.
---
## 🚀 Quick Start
```bash
# 1. Clone & install
git clone https://github.com/aiming-lab/AutoResearchClaw.git
cd AutoResearchClaw
python3 -m venv .venv && source .venv/bin/activate
pip install -e .
# 2. Setup (interactive — installs OpenCode beast mode, checks Docker/LaTeX)
researchclaw setup
# 3. Configure
researchclaw init # Interactive: choose LLM provider, creates config.arc.yaml
# Or manually: cp config.researchclaw.example.yaml config.arc.yaml
# 4. Run
export OPENAI_API_KEY="sk-..."
researchclaw run --config config.arc.yaml --topic "Your research idea" --auto-approve
```
Output → `artifacts/rc-YYYYMMDD-HHMMSS-<hash>/deliverables/` — compile-ready LaTeX, BibTeX, experiment code, charts.
<details>
<summary>📝 Minimum required config</summary>
```yaml
project:
name: "my-research"
research:
topic: "Your research topic here"
llm:
base_url: "https://api.openai.com/v1"
api_key_env: "OPENAI_API_KEY"
primary_model: "gpt-4o"
fallback_models: ["gpt-4o-mini"]
experiment:
mode: "sandbox"
sandbox:
python_path: ".venv/bin/python"
```
</details>
---
## 🧠 What Makes It Different
| Capability | How It Works |
|-----------|-------------|
| **🔄 PIVOT / REFINE Loop** | Stage 15 autonomously decides: PROCEED, REFINE (tweak params), or PIVOT (new direction). Artifacts auto-versioned. |
| **🤖 Multi-Agent Debate** | Hypothesis generation, result analysis, and peer review each use structured multi-perspective debate. |
| **🧬 Self-Learning** | Lessons extracted per run (decision rationale, runtime warnings, metric anomalies) with 30-day time-decay. Future runs learn from past mistakes. |
| **📚 Knowledge Base** | Every run builds structured KB across 6 categories (decisions, experiments, findings, literature, questions, reviews). |
| **🛡️ Sentinel Watchdog** | Background quality monitor: NaN/Inf detection, paper-evidence consistency, citation relevance scoring, anti-fabrication guard. |
---
## 🦞 OpenClaw Integration
<table>
<tr>
**AutoResearchClaw is an [OpenClaw](https://github.com/openclaw/openclaw)-compatible service.** Install it in OpenClaw and launch autonomous research with a single message — or use it standalone via CLI, Claude Code, or any AI coding assistant.
</tr>
</table>
### 🚀 Use with OpenClaw (Recommended)
If you already use [OpenClaw](https://github.com/openclaw/openclaw) as your AI assistant:
```
1️⃣ Share the GitHub repo URL with OpenClaw
2️⃣ OpenClaw auto-reads RESEARCHCLAW_AGENTS.md → understands the pipeline
3️⃣ Say: "Research [your topic]"
4️⃣ Done — OpenClaw clones, installs, configures, runs, and returns results
```
**That's it.** OpenClaw handles `git clone`, `pip install`, config setup, and pipeline execution automatically. You just chat.
<details>
<summary>💡 What happens under the hood</summary>
1. OpenClaw reads `RESEARCHCLAW_AGENTS.md` → learns the research orchestrator role
2. OpenClaw reads `README.md` → understands installation and pipeline structure
3. OpenClaw copies `config.researchclaw.example.yaml` → `config.yaml`
4. Asks for your LLM API key (or uses your environment variable)
5. Runs `pip install -e .` + `researchclaw run --topic "..." --auto-approve`
6. Returns the paper, LaTeX, experiments, and citations
</details>
### 🔌 OpenClaw Bridge (Advanced)
For deeper integration, AutoResearchClaw includes a **bridge adapter system** with 6 optional capabilities:
```yaml
# config.arc.yaml
openclaw_bridge:
use_cron: true # ⏰ Scheduled research runs
use_message: true # 💬 Progress notifications (Discord/Slack/Telegram)
use_memory: true # 🧠 Cross-session knowledge persistence
use_sessions_spawn: true # 🔀 Spawn parallel sub-sessions for concurrent stages
use_web_fetch: true # 🌐 Live web search during literature review
use_browser: false # 🖥️ Browser-based paper collection
```
Each flag activates a typed adapter protocol. When OpenClaw provides these capabilities, the adapters consume them without code changes. See [`docs/integration-guide.md`](docs/integration-guide.md) for full details.
### ACP (Agent Client Protocol)
AutoResearchClaw can use **any ACP-compatible coding agent** as its LLM backend — no API keys required. The agent communicates via [acpx](https://github.com/openclaw/acpx), maintaining a single persistent session across all 23 pipeline stages.
| Agent | Command | Notes |
|-------|---------|-------|
| Claude Code | `claude` | Anthropic |
| Codex CLI | `codex` | OpenAI |
| Copilot CLI | `gh` | GitHub |
| Gemini CLI | `gemini` | Google |
| OpenCode | `opencode` | SST |
| Kimi CLI | `kimi` | Moonshot |
```yaml
# config.yaml — ACP example
llm:
provider: "acp"
acp:
agent: "claude" # Any ACP-compatible agent CLI command
cwd: "." # Working directory for the agent
# No base_url or api_key needed — the agent handles its own auth.
```
```bash
# Just run — the agent uses its own credentials
researchclaw run --config config.yaml --topic "Your research idea" --auto-approve
```
### 🛠️ Other Ways to Run
| Method | How |
|--------|-----|
| **Standalone CLI** | `researchclaw setup` → `researchclaw init` → `researchclaw run --topic "..." --auto-approve` |
| **Python API** | `from researchclaw.pipeline import Runner; Runner(config).run()` |
| **Claude Code** | Reads `RESEARCHCLAW_CLAUDE.md` — just say *"Run research on [topic]"* |
| **Copilot CLI** | `researchclaw run --topic "..."` with `llm.acp.agent: "gh"` |
| **OpenCode** | Reads `.claude/skills/` — same natural language interface |
| **Any AI CLI** | Provide `RESEARCHCLAW_AGENTS.md` as context → agent auto-bootstraps |
---
## 🔬 Pipeline: 23 Stages, 8 Phases
```
Phase A: Research Scoping Phase E: Experiment Execution
1. TOPIC_INIT 12. EXPERIMENT_RUN
2. PROBLEM_DECOMPOSE 13. ITERATIVE_REFINE ← self-healing
Phase B: Literature Discovery Phase F: Analysis & Decision
3. SEARCH_STRATEGY 14. RESULT_ANALYSIS ← multi-agent
4. LITERATURE_COLLECT ← real API 15. RESEARCH_DECISION ← PIVOT/REFINE
5. LITERATURE_SCREEN [gate]
6. KNOWLEDGE_EXTRACT Phase G: Paper Writing
16. PAPER_OUTLINE
Phase C: Knowledge Synthesis 17. PAPER_DRAFT
7. SYNTHESIS 18. PEER_REVIEW ← evidence check
8. HYPOTHESIS_GEN ← debate 19. PAPER_REVISION
Phase D: Experiment Design Phase H: Finalization
9. EXPERIMENT_DESIGN [gate] 20. QUALITY_GATE [gate]
10. CODE_GENERATION 21. KNOWLEDGE_ARCHIVE
11. RESOURCE_PLANNING 22. EXPORT_PUBLISH ← LaTeX
23. CITATION_VERIFY ← relevance check
```
> **Gate stages** (5, 9, 20) pause for human approval or auto-approve with `--auto-approve`. On rejection, the pipeline rolls back.
> **Decision loops**: Stage 15 can trigger REFINE (→ Stage 13) or PIVOT (→ Stage 8), with automatic artifact versioning.
<details>
<summary>📋 What Each Phase Does</summary>
| Phase | What Happens |
|-------|-------------|
| **A: Scoping** | LLM decomposes the topic into a structured problem tree with research questions |
| **A+: Hardware** | Auto-detects GPU (NVIDIA CUDA / Apple MPS / CPU-only), warns if local hardware is limited, adapts code generation accordingly |
| **B: Literature** | Multi-source search (OpenAlex → Semantic Scholar → arXiv) for real papers, screens by relevance, extracts knowledge cards |
| **C: Synthesis** | Clusters findings, identifies research gaps, generates testable hypotheses via multi-agent debate |
| **D: Design** | Designs experiment plan, generates hardware-aware runnable Python (GPU tier → package selection), estimates resource needs |
| **E: Execution** | Runs experiments in sandbox, detects NaN/Inf and runtime bugs, self-heals code via targeted LLM repair |
| **F: Analysis** | Multi-agent analysis of results; autonomous PROCEED / REFINE / PIVOT decision with rationale |
| **G: Writing** | Outlines → section-by-section drafting (5,000-6,500 words) → peer reviews (with methodology-evidence consistency) → revises with length guard |
| **H: Finalization** | Quality gate, knowledge archival, LaTeX export with conference template, citation integrity + relevance verification |
</details>
---
## ✨ Key Features
| Feature | Description |
|---------|------------|
| **📚 Multi-Source Literature** | Real papers from OpenAlex, Semantic Scholar & arXiv — query expansion, deduplication, circuit breaker with graceful degradation |
| **🔍 4-Layer Citation Verification** | arXiv ID check → CrossRef/DataCite DOI → Semantic Scholar title match → LLM relevance scoring. Hallucinated refs auto-removed. |
| **🖥️ Hardware-Aware Execution** | Auto-detects GPU (NVIDIA CUDA / Apple MPS / CPU-only) and adapts code generation, imports, and experiment scale accordingly |
| **🦾 OpenCode Beast Mode** | Complex experiments auto-routed to [OpenCode](https://github.com/anomalyco/opencode) — generates multi-file projects with custom architectures, training loops, and ablation studies. Install via `researchclaw setup`. |
| **🧪 Sandbox Experiments** | AST-validated code, immutable harness, NaN/Inf fast-fail, self-healing repair, iterative refinement (up to 10 rounds), partial result capture |
| **📝 Conference-Grade Writing** | NeurIPS/ICML/ICLR templates, section-by-section drafting (5,000-6,500 words), anti-fabrication guard, revision length guard, anti-disclaimer enforcement |
| **📐 Template Switching** | `neurips_2025`, `iclr_2026`, `icml_2026` — Markdown → LaTeX with math, tables, figures, cross-refs, `\cite{}` |
| **🛡️ Anti-Fabrication** | VerifiedRegistry enforces ground-truth experiment data in papers. Auto-diagnoses failed experiments and repairs them before writing. Unverified numbers sanitized. |
| **🚦 Quality Gates** | 3 human-in-the-loop gates (Stages 5, 9, 20) with rollback. Skip with `--auto-approve`. |
---
## 🧠 MetaClaw Integration
**AutoResearchClaw + [MetaClaw](https://github.com/aiming-lab/MetaClaw) = A pipeline that learns from every run.**
MetaClaw adds **cross-run knowledge transfer** to AutoResearchClaw. When enabled, the pipeline automatically captures lessons from failures and warnings, converts them into reusable skills, and injects those skills into all 23 pipeline stages on subsequent runs — so the same mistakes are never repeated.
### How It Works
```
Run N executes → failures/warnings captured as Lessons
↓
MetaClaw Lesson → Skill conversion
↓
arc-* Skill files stored in ~/.metaclaw/skills/
↓
Run N+1 → build_overlay() injects skills into every LLM prompt
↓
LLM avoids known pitfalls → higher quality, fewer retries
```
### Quick Setup
```bash
# 1. Install MetaClaw (if not already)
pip install metaclaw
# 2. Enable in your config
```
```yaml
# config.arc.yaml
metaclaw_bridge:
enabled: true
proxy_url: "http://localhost:30000" # MetaClaw proxy (optional)
skills_dir: "~/.metaclaw/skills" # Where skills are stored
fallback_url: "https://api.openai.com/v1" # Direct LLM fallback
fallback_api_key: "" # API key for fallback URL
lesson_to_skill:
enabled: true
min_severity: "warning" # Convert warnings + errors
max_skills_per_run: 3
```
```bash
# 3. Run as usual — MetaClaw works transparently
researchclaw run --config config.arc.yaml --topic "Your idea" --auto-approve
```
After each run, check `~/.metaclaw/skills/arc-*/SKILL.md` to see the skills your pipeline has learned.
### Experiment Results
In controlled A/B experiments (same topic, same LLM, same configuration):
| Metric | Baseline | With MetaClaw | Improvement |
|--------|----------|---------------|-------------|
| Stage retry rate | 10.5% | 7.9% | **-24.8%** |
| Refine cycle count | 2.0 | 1.2 | **-40.0%** |
| Pipeline stage completion | 18/19 | 19/19 | **+5.3%** |
| Overall robustness score (composite) | 0.714 | 0.845 | **+18.3%** |
> Composite robustness score is a weighted average of stage completion rate (40%), retry reduction (30%), and refine cycle efficiency (30%).
### Backward Compatibility
- **Default: OFF.** If `metaclaw_bridge` is absent or `enabled: false`, the pipeline behaves exactly as before.
- **No new dependencies.** MetaClaw is optional — the core pipeline works without it.
- **All 1,823 existing tests pass** with the integration code present.
---
## ⚙️ Configuration Reference
<details>
<summary>Click to expand full configuration reference</summary>
```yaml
# === Project ===
project:
name: "my-research" # Project identifier
mode: "docs-first" # docs-first | semi-auto | full-auto
# === Research ===
research:
topic: "..." # Research topic (required)
domains: ["ml", "nlp"] # Research domains for literature search
daily_paper_count: 8 # Target papers per search query
quality_threshold: 4.0 # Minimum quality score for papers
# === Runtime ===
runtime:
timezone: "America/New_York" # For timestamps
max_parallel_tasks: 3 # Concurrent experiment limit
approval_timeout_hours: 12 # Gate stage timeout
retry_limit: 2 # Retry count on stage failure
# === LLM ===
llm:
provider: "openai-compatible" # openai | openrouter | deepseek | minimax | acp | openai-compatible
base_url: "https://..." # API endpoint (required for openai-compatible)
api_key_env: "OPENAI_API_KEY" # Env var for API key (required for openai-compatible)
api_key: "" # Or hardcode key here
primary_model: "gpt-4o" # Primary model
fallback_models: ["gpt-4o-mini"] # Fallback chain
s2_api_key: "" # Semantic Scholar API key (optional, higher rate limits)
acp: # Only used when provider: "acp"
agent: "claude" # ACP agent CLI command (claude, codex, gemini, etc.)
cwd: "." # Working directory for the agent
# === Experiment ===
experiment:
mode: "sandbox" # simulated | sandbox | docker | ssh_remote
time_budget_sec: 300 # Max execution time per run (default: 300s)
max_iterations: 10 # Max optimization iterations
metric_key: "val_loss" # Primary metric name
metric_direction: "minimize" # minimize | maximize
sandbox:
python_path: ".venv/bin/python"
gpu_required: false
allowed_imports: [math, random, json, csv, numpy, torch, sklearn]
max_memory_mb: 4096
docker:
image: "researchclaw/experiment:latest"
network_policy: "setup_only" # none | setup_only | pip_only | full
gpu_enabled: true
memory_limit_mb: 8192
auto_install_deps: true # Auto-detect imports → requirements.txt
ssh_remote:
host: "" # GPU server hostname
gpu_ids: [] # Available GPU IDs
remote_workdir: "/tmp/researchclaw_experiments"
opencode: # OpenCode Beast Mode (auto-installed via `researchclaw setup`)
enabled: true # Master switch (default: true)
auto: true # Auto-trigger without confirmation (default: true)
complexity_threshold: 0.2 # 0.0-1.0 — higher = only trigger on complex experiments
model: "" # Override model (empty = use llm.primary_model)
timeout_sec: 600 # Max seconds for OpenCode generation
max_retries: 1 # Retry count on failure
workspace_cleanup: true # Remove temp workspace after collection
code_agent: # CodeAgent v2 — multi-phase code generation
enabled: true # Use CodeAgent instead of legacy single-prompt codegen
architecture_planning: true # Generate deep implementation blueprint before coding
sequential_generation: true # Generate files one-by-one following dependency DAG
hard_validation: true # AST-based validation gates (blocks identical ablations, hardcoded metrics)
hard_validation_max_repairs: 2 # Max repair attempts when validation fails
exec_fix_max_iterations: 3 # Execution-in-the-loop fix attempts
exec_fix_timeout_sec: 60 # Timeout per exec-fix attempt
benchmark_agent: # BenchmarkAgent — automated dataset & baseline selection
enabled: true # Enable 4-agent benchmark pipeline (Surveyor→Selector→Acquirer→Validator)
enable_hf_search: true # Search HuggingFace Datasets
enable_web_search: true # Search Google Scholar for benchmarks
tier_limit: 2 # Dataset tier filtering (1=small/cached, 2=medium, 3=large)
min_benchmarks: 1 # Minimum datasets required
min_baselines: 2 # Minimum baseline methods required
figure_agent: # FigureAgent — academic figure generation
enabled: true # Enable 5-agent figure pipeline (Planner→CodeGen→Renderer→Critic→Integrator)
min_figures: 3 # Minimum figures to generate
max_figures: 8 # Maximum figures
max_iterations: 3 # Critic-driven refinement iterations
dpi: 300 # Output resolution
strict_mode: false # Fail pipeline if figure generation fails
repair: # Anti-fabrication experiment repair
enabled: true # Auto-diagnose and repair failed experiments
max_cycles: 3 # Repair retry loops
min_completion_rate: 0.5 # >=50% conditions must complete to proceed
min_conditions: 2 # At least 2 conditions for valid experiment
use_opencode: true # Route repairs through OpenCode Beast Mode
# === Web Search (Optional) ===
web_search:
enabled: true # Enable web-augmented literature search
tavily_api_key_env: "TAVILY_API_KEY" # Tavily API key env var (optional)
enable_scholar: true # Google Scholar search
enable_pdf_extraction: true # Extract text from PDFs
max_web_results: 10 # Max web results per query
# === Export ===
export:
target_conference: "neurips_2025" # neurips_2025 | iclr_2026 | icml_2026
authors: "Anonymous"
bib_file: "references"
# === Prompts ===
prompts:
custom_file: "" # Path to custom prompts YAML (empty = defaults)
# === Security ===
security:
hitl_required_stages: [5, 9, 20] # Stages requiring human approval
allow_publish_without_approval: false
redact_sensitive_logs: true
# === Knowledge Base ===
knowledge_base:
backend: "markdown" # markdown | obsidian
root: "docs/kb"
# === Notifications ===
notifications:
channel: "console" # console | discord | slack
target: ""
# === MetaClaw Bridge (Optional) ===
metaclaw_bridge:
enabled: false # Set to true to enable cross-run learning
proxy_url: "http://localhost:30000" # MetaClaw proxy URL
skills_dir: "~/.metaclaw/skills" # Where arc-* skills are stored
fallback_url: "" # Direct LLM fallback when proxy is down
fallback_api_key: "" # API key for fallback endpoint
lesson_to_skill:
enabled: true # Auto-convert lessons to skills
min_severity: "warning" # Minimum severity to convert
max_skills_per_run: 3 # Max new skills per pipeline run
prm: # Process Reward Model quality gate (optional)
enabled: false # Use LLM-as-judge to score stage outputs
model: "gpt-5.4" # PRM judge model
votes: 3 # Majority vote count
gate_stages: [5, 9, 15, 20] # Stages to apply PRM gates
# === OpenClaw Bridge ===
openclaw_bridge:
use_cron: false # Scheduled research runs
use_message: false # Progress notifications
use_memory: false # Cross-session knowledge persistence
use_sessions_spawn: false # Spawn parallel sub-sessions
use_web_fetch: false # Live web search
use_browser: false # Browser-based paper collection
```
</details>
---
## 🙏 Acknowledgments
Inspired by:
- 🔬 [AI Scientist](https://github.com/SakanaAI/AI-Scientist) (Sakana AI) — Automated research pioneer
- 🧠 [AutoResearch](https://github.com/karpathy/autoresearch) (Andrej Karpathy) — End-to-end research automation
- 🌐 [FARS](https://analemma.ai/blog/introducing-fars/) (Analemma) — Fully Automated Research System
---
## 📄 License
MIT — see [LICENSE](LICENSE) for details.
---
## 📌 Citation
If you find AutoResearchClaw useful, please cite:
```bibtex
@misc{liu2026autoresearchclaw,
author = {Liu, Jiaqi and Xia, Peng and Han, Siwei and Qiu, Shi and Zhang, Letian and Chen, Guiming and Tu, Haoqin and Yang, Xinyu and and Zhou, Jiawei and Zhu, Hongtu and Li, Yun and Zhou, Yuyin and Zheng, Zeyu and Xie, Cihang and Ding, Mingyu and Yao, Huaxiu},
title = {AutoResearchClaw: Fully Autonomous Research from Idea to Paper},
year = {2026},
organization = {GitHub},
url = {https://github.com/aiming-lab/AutoResearchClaw},
}
```
<p align="center">
<sub>Built with 🦞 by the AutoResearchClaw team</sub>
</p>
================================================
FILE: config.researchclaw.example.yaml
================================================
project:
name: "my-research"
mode: "full-auto"
research:
topic: "Your research topic here"
domains:
- "machine-learning"
daily_paper_count: 10
quality_threshold: 4.0
runtime:
timezone: "America/New_York"
max_parallel_tasks: 3
approval_timeout_hours: 12
retry_limit: 2
notifications:
channel: "console"
target: ""
on_stage_start: true
on_stage_fail: true
on_gate_required: true
knowledge_base:
backend: "markdown"
root: "docs/kb"
openclaw_bridge:
use_cron: false
use_message: false
use_memory: false
use_sessions_spawn: false
use_web_fetch: false
use_browser: false
llm:
provider: "openai-compatible"
base_url: "https://api.openai.com/v1"
api_key_env: "OPENAI_API_KEY"
api_key: ""
primary_model: "gpt-4o"
fallback_models:
- "gpt-4.1"
- "gpt-4o-mini"
# --- MiniMax provider example ---
# provider: "minimax"
# api_key_env: "MINIMAX_API_KEY"
# primary_model: "MiniMax-M2.5"
# fallback_models:
# - "MiniMax-M2.5-highspeed"
security:
hitl_required_stages: [5, 9, 20]
allow_publish_without_approval: false
redact_sensitive_logs: true
experiment:
# ★ mode 决定实验结果的真实性
# "sandbox" — 在本地沙盒中实际执行生成的 Python 代码,产出真实实验数据
# "docker" — 在 Docker 容器中执行,支持 GPU 直通、依赖自动安装、内存隔离
# "simulated" — 不执行代码,使用公式生成假数据(仅用于框架开发调试,不应用于论文生成)
mode: "sandbox"
time_budget_sec: 300
max_iterations: 10
metric_key: "primary_metric"
metric_direction: "minimize"
sandbox:
# Use ".venv/Scripts/python.exe" on Windows
python_path: ".venv/bin/python3"
gpu_required: false
max_memory_mb: 4096
# Docker sandbox settings (only used when mode: "docker")
# Build image first: docker build -t researchclaw/experiment:latest researchclaw/docker/
docker:
image: "researchclaw/experiment:latest"
gpu_enabled: true
# gpu_device_ids: [0] # empty = all GPUs
memory_limit_mb: 8192
network_policy: "setup_only" # none | setup_only | pip_only | full
# pip_pre_install: ["torchdiffeq", "einops"]
auto_install_deps: true
shm_size_mb: 2048
keep_containers: false
ssh_remote:
host: "" # SSH hostname or IP
user: "" # SSH username (default: current user)
port: 22 # SSH port
key_path: "" # Path to private key (default: ~/.ssh/id_rsa)
gpu_ids: [] # e.g. [0, 1] for CUDA_VISIBLE_DEVICES
remote_workdir: "/tmp/researchclaw_experiments"
remote_python: "python3"
setup_commands: [] # e.g. ["source ~/venv/bin/activate", "pip install torch"]
# Docker-over-SSH (most secure remote execution)
use_docker: false # Set true to run experiments inside Docker on remote host
docker_image: "researchclaw/experiment:latest"
docker_network_policy: "none" # none | full
docker_memory_limit_mb: 8192
docker_shm_size_mb: 2048
# OpenCode Beast Mode — external AI coding agent for complex experiments
# Install: npm i -g opencode-ai@latest (or use `researchclaw setup`)
opencode:
enabled: true # Master switch (default: true)
auto: true # Auto-trigger without confirmation (default: true)
complexity_threshold: 0.2 # 0.0-1.0 — higher = only trigger on complex experiments
model: "" # Override model (empty = use llm.primary_model)
timeout_sec: 600 # Max seconds for OpenCode generation
max_retries: 1 # Retry count on failure
workspace_cleanup: true # Remove temp workspace after collection
# ============================================================================
# SSH Remote Examples
# ============================================================================
#
# 1. Lab server (bare Python, basic sandboxing):
# experiment:
# mode: "ssh_remote"
# ssh_remote:
# host: "gpu-server.lab.edu"
# user: "researcher"
# key_path: "~/.ssh/id_rsa"
# gpu_ids: [0]
# remote_python: "python3"
#
# 2. Lab server (Docker — most secure):
# experiment:
# mode: "ssh_remote"
# ssh_remote:
# host: "gpu-server.lab.edu"
# user: "researcher"
# key_path: "~/.ssh/id_rsa"
# gpu_ids: [0]
# use_docker: true
# docker_image: "researchclaw/experiment:latest"
# docker_network_policy: "none"
#
# 3. Colab via SSH tunnel:
# experiment:
# mode: "ssh_remote"
# ssh_remote:
# host: "localhost"
# port: 12345
# user: "root"
# remote_python: "python3"
# setup_commands:
# - "pip install torch torchvision -q"
#
# 4. Colab via Google Drive (most robust, no SSH needed):
# experiment:
# mode: "colab_drive"
# colab_drive:
# drive_root: "~/Library/CloudStorage/GoogleDrive-you@gmail.com/My Drive/researchclaw"
# poll_interval_sec: 30
# timeout_sec: 3600
# setup_script: "pip install torch torchvision -q"
# # Then in Colab: run the colab_worker.py that appears in your Drive
colab_drive:
drive_root: "" # Local path to Google Drive mount
poll_interval_sec: 30 # How often to check for results
timeout_sec: 3600 # Max wait per experiment (1 hour)
setup_script: "" # Shell commands to run before each experiment
# Scientific Visualization Agent (Code-to-Viz + Nano Banana)
# Uses a Decision Agent to analyze paper content and determine:
# - Code figures (bar charts, line plots) → Matplotlib/TikZ
# - Image figures (architecture, flowcharts) → Gemini Nano Banana
figure_agent:
enabled: true
min_figures: 3
max_figures: 10
max_iterations: 3
render_timeout_sec: 30
# Security: Docker sandbox for visualization code execution
# use_docker: null # null = auto-detect, true = force, false = disable
docker_image: "researchclaw/experiment:latest"
# Output format: "python" (Matplotlib/Seaborn) or "latex" (TikZ/PGFPlots)
output_format: "python"
# Nano Banana (Gemini native image generation)
nano_banana_enabled: true
# gemini_api_key: "" # or set GEMINI_API_KEY env var
gemini_model: "gemini-2.5-flash-image"
strict_mode: false
dpi: 300
# === Prompts ===
# Customize LLM prompts by pointing to your own YAML file.
# Copy prompts.default.yaml, edit the prompts you want, and set the path here.
prompts:
custom_file: "" # e.g. "my_prompts.yaml" (empty = use built-in defaults)
# === MetaClaw Integration ===
# Enable the MetaClaw bridge to get skill injection, PRM quality gates,
# and continuous learning from research pipeline failures.
# Requires MetaClaw to be running: metaclaw start --mode skills_only
metaclaw_bridge:
enabled: false
proxy_url: "http://localhost:30000" # MetaClaw proxy endpoint
skills_dir: "~/.metaclaw/skills" # MetaClaw skills directory
fallback_url: "" # Direct LLM URL if proxy is down
fallback_api_key: ""
# PRM quality gate: LLM-as-judge scoring at gate stages
prm:
enabled: false
api_base: "" # OpenAI-compatible API for PRM judge
api_key_env: "PRM_API_KEY"
api_key: ""
model: "gpt-5.4"
votes: 3 # Majority vote count
gate_stages: [5, 9, 15, 20] # Stages to apply PRM gating
# Lesson-to-skill: auto-convert pipeline failures into MetaClaw skills
lesson_to_skill:
enabled: true
min_severity: "error" # Only convert error-level lessons
max_skills_per_run: 3
================================================
FILE: docs/BUG_FIX_DOCUMENT_20260316.md
================================================
# Bug Fix Document — AutoResearchClaw Pipeline
> 生成日期:2026-03-16
> 反馈来源:2 位测试者(user1: CV 方向 / GPU 环境, user2: Windows 环境)
> 总计问题:9 个
## 📊 总览
| 分类 | 数量 |
|------|------|
| 🔴 确认的 Bug(需修复) | **4** |
| 🟠 架构改进(强烈建议) | **2** |
| 🔵 功能需求 | **3** |
## 🔥 修复优先级
| 优先级 | ID | 问题 | 阶段 | 涉及文件 |
|--------|----|------|------|----------|
| 🔴 CRITICAL | BUG-001 | 论文硬件信息与实际不一致 | PAPER_DRAFT (17) | `executor.py`, `prompts.py` |
| 🔴 CRITICAL | BUG-002 | Windows 环境 Docker 不可用导致实验链式失败 | EXPERIMENT_RUN (12) | `factory.py`, `docker_sandbox.py` |
| 🔴 HIGH | BUG-003 | 论文内容自相矛盾(承诺评测但未执行) | PAPER_DRAFT (17), PEER_REVIEW (18) | `executor.py`, `prompts.py` |
| 🔴 HIGH | BUG-004 | 生成代码缺少数值稳定性防护(NaN/Inf) | CODE_GENERATION (10) | `code_agent.py`, `prompts.py` |
| 🟠 HIGH | ARCH-001 | Stage 17 过于严格的 hard block 策略 | PAPER_DRAFT (17) | `executor.py` |
| 🟠 HIGH | ARCH-002 | Idea 降级时不询问用户确认 | EXPERIMENT_DESIGN (9), RESEARCH_DECISION (15) | `executor.py`, `stages.py` |
---
## 确认的 Bug — 详细修复方案
### 🔴 `BUG-001` — 论文硬件信息与实际机器不一致
| 字段 | 内容 |
|------|------|
| **严重程度** | CRITICAL |
| **所属阶段** | PAPER_DRAFT (Stage 17) |
| **报告者** | user1 |
**问题描述:**
论文中声称使用 A100 GPU 训练,但测试者实际机器上是 A5000。Pipeline 在 Stage 1 检测了硬件并保存到 `hardware_profile.json`,但在论文生成阶段完全没有利用这个信息来约束 LLM 输出。
**根因分析:**
- `executor.py` 第 1226-1233 行:Stage 1 (TOPIC_INIT) 检测硬件,保存 `hardware_profile.json`,包含 `gpu_name`、`vram_gb` 等
- `executor.py` 第 2352-2391 行:硬件信息 **仅** 用于 CODE_GENERATION 阶段的代码生成 hints
- `executor.py` 第 5776-5848 行:PAPER_DRAFT 阶段构建 prompt 时,**没有注入硬件 profile 信息**
- LLM 在缺少约束的情况下会「幻觉」出常见的高端硬件名称(如 A100)
**涉及文件:**
- `researchclaw/pipeline/executor.py`(PAPER_DRAFT 阶段的 prompt 构建部分,约第 5776-5960 行)
- `researchclaw/prompts.py`(paper writing prompt 模板)
**修复方案:**
1. 在 PAPER_DRAFT 阶段的 prompt 构建中,读取 `stage-01/hardware_profile.json`
2. 将实际硬件信息(GPU 型号、VRAM、CPU 等)作为 **硬性约束** 注入 prompt,例如:
```
HARDWARE CONSTRAINT: The experiments were run on the following hardware:
- GPU: {gpu_name} ({vram_gb} GB VRAM)
- CPU: {cpu_info}
You MUST use this exact hardware specification in the paper. Do NOT substitute with other GPU models.
```
3. 在 PEER_REVIEW (Stage 18) 的 prompt 中增加一条审核规则:验证 paper 中提到的硬件是否与 `hardware_profile.json` 一致
**修复后预期行为:**
论文中的硬件描述必须与实际运行环境一致。
<details>
<summary>原始反馈证据</summary>
> 然后就是paper和实验中有一些misalign的地方,比如paper里写说用的A100,实际上机器里的是A5000
</details>
---
### 🔴 `BUG-002` — Windows 环境下 Docker 不可用导致实验链式失败
| 字段 | 内容 |
|------|------|
| **严重程度** | CRITICAL |
| **所属阶段** | EXPERIMENT_RUN (Stage 12) → 链式影响到 Stage 13, 14, 17 |
| **报告者** | user2 |
**问题描述:**
在 Windows 环境下,Docker 不可用时 Pipeline 直接崩溃(`[WinError 2] The system cannot find the file specified`),导致所有后续阶段连锁失败。用户最终看到的是 Stage 17 的误导性错误「没有实验数据无法写论文」,完全看不到真正的根因。
**根因分析:**
- `experiment/factory.py` 第 25-29 行:当 `config.experiment.mode == "docker"` 时调用 `DockerSandbox.check_docker_available()`,如果 Docker 不可用直接 raise `RuntimeError`,**没有自动 fallback 到 subprocess sandbox**
- `docker_sandbox.py` 第 337、366 行:Docker volume mount 使用 POSIX 风格路径(如 `{staging_dir}:/workspace`),在 Windows 上可能导致挂载失败
- **链式失败:** Stage 12 crash → 无 metrics → Stage 13 空跑(`refine_sandbox_v1` 到 `v9` 都失败) → Stage 14 空 `experiment_summary.json` → Stage 17 hard block
- 用户看到的错误完全不提 Docker,只说「no metrics」,非常误导
**涉及文件:**
- `researchclaw/experiment/factory.py`(第 25-29 行,sandbox 创建逻辑)
- `researchclaw/experiment/docker_sandbox.py`(第 337、366、384 行,路径和命令构建)
- `researchclaw/pipeline/executor.py`(第 6000-6020 行,Stage 17 hard block)
**修复方案:**
1. `factory.py`:当 Docker 不可用时,自动 fallback 到 subprocess sandbox 模式,而不是 raise RuntimeError。增加日志 warning 告知用户:
```python
if not DockerSandbox.check_docker_available():
logger.warning("Docker not available, falling back to subprocess sandbox mode")
return SubprocessSandbox(...)
```
2. `docker_sandbox.py`:修复 Windows 路径兼容性问题,使用 `pathlib.PureWindowsPath` 或 `os.path` 正确处理跨平台路径
3. 在 Stage 12 的错误信息中明确指出是 Docker 问题,而不是让错误沿链传播变成「no metrics」
**修复后预期行为:**
Windows 用户即使没有 Docker,Pipeline 也能通过 subprocess sandbox 完成实验。即使实验部分失败,错误信息应清晰指向根因。
<details>
<summary>原始反馈证据</summary>
> 我跑了两次 两次都有stage fail 最后没有生成报告
压缩包中 `experiment_summary.json` stderr: `[WinError 2] The system cannot find the file specified`
`pipeline_summary.json`: `"final_status": "failed"`, `"stages_failed": 1`
`stage-17/paper_draft.md`: `Experiment stage produced no metrics (status: failed/timeout). Cannot write a paper without real experimental data.`
</details>
---
### 🔴 `BUG-003` — 论文内容自相矛盾(承诺评测数据集但未实际执行)
| 字段 | 内容 |
|------|------|
| **严重程度** | HIGH |
| **所属阶段** | PAPER_DRAFT (Stage 17), PEER_REVIEW (Stage 18) |
| **报告者** | user1 |
**问题描述:**
论文前半部分按照用户的 topic 描述声称会在 MME、DocVQA、TextVQA 等数据集上评测,但实际实验阶段因为环境原因未能完成这些评测。论文后半部分在 Limitation 中又说「没有在这些数据集上评估」,形成自相矛盾。
**根因分析:**
- `prompts.py` 第 2006-2018 行:有 EVIDENCE-BOUNDING RULES(Rule 7-9),但这些只是 prompt 中的 **建议**,LLM 可以忽略
- `executor.py` 第 5647-5715 行:`_detect_result_contradictions()` 函数检测 null/negative results,但只生成 advisory text 注入 prompt,**不做硬性阻断**
- `executor.py` 第 6432-6443 行:PEER_REVIEW 阶段收集 `actual_run_count` 作为 evidence,但 **没有自动扫描 paper 文本提取声称的数据集列表并与实际评测记录对比**
- 核心问题:**缺少 claim-evidence 的自动对齐验证**
**涉及文件:**
- `researchclaw/pipeline/executor.py`(第 5647-5715 行、5944-5956 行、6432-6443 行)
- `researchclaw/prompts.py`(第 2006-2049 行、2124-2138 行)
**修复方案:**
1. 在 PAPER_DRAFT 阶段的 prompt 中,**明确列出** 实际完成评测的数据集和指标(从 `experiment_summary.json` 提取),硬性要求 LLM **只能**声称在这些数据集上进行了评测:
```
ACTUAL EVALUATED DATASETS: [ImageNet-val (reconstruction)]
You MUST NOT claim evaluation on any dataset not listed above.
If the original research plan included additional datasets that were not evaluated,
explain this honestly in the Limitations section WITHOUT first claiming you did evaluate them.
```
2. 在 PEER_REVIEW (Stage 18) 增加一个专项检查:自动提取 paper 中所有提到的 benchmark/dataset 名称,与 `experiment_summary.json` 中的实际 metrics keys 对比,不一致则标记为 CRITICAL discrepancy
3. 在 PAPER_REVISION (Stage 19) 中把这些 discrepancy 作为必须修改的 reviewer comment
**修复后预期行为:**
论文中不会出现「前面说评测了 X,后面说没评测 X」的自相矛盾。所有评测声明必须有实验数据支撑。
<details>
<summary>原始反馈证据</summary>
> 以及就是paper中有一些自相矛盾的地方,比如前面按照我的要求,说会在哪几个数据集上面进行评估,后面又没有测,然后在limitation说我们没有在这几个数据集上评估
</details>
---
### 🔴 `BUG-004` — 生成代码缺少数值稳定性防护(NaN/Inf 导致实验提前终止)
| 字段 | 内容 |
|------|------|
| **严重程度** | HIGH |
| **所属阶段** | CODE_GENERATION (Stage 10), ITERATIVE_REFINE (Stage 13) |
| **报告者** | user1 |
**问题描述:**
实验训练过程中出现 `loss = inf` → `loss = nan` 的数值爆炸,触发 harness 的 NaN 检测后实验提前终止。代码生成阶段没有在生成的训练代码中加入数值稳定性保护。
**根因分析:**
- `code_agent.py`:**完全没有** 关于数值稳定性的 prompt 指令。4 个阶段(Planning → Code Generation → Execution-in-the-Loop → Multi-Agent Review)都不检查 NaN guard
- `experiment/harness_template.py` 第 45-62 行:有 `check_value()` 做 NaN/Inf 检测,但这是 **opt-in 机制**——只有生成代码主动调用 `self.check_value(loss, "loss")` 才有效
- `executor.py` 第 779-900 行:`_detect_runtime_issues()` 在运行 **之后** 检测 NaN,但此时实验已经失败了
- `executor.py` 第 3915-3956 行:Stage 13 检测到 NaN 后调用 LLM 做 `iterative_repair`,但修复质量不稳定
**涉及文件:**
- `researchclaw/pipeline/code_agent.py`(prompt 构建,所有阶段)
- `researchclaw/prompts.py`(代码生成相关 prompt)
- `researchclaw/experiment/harness_template.py`(第 45-62 行)
**修复方案:**
1. 在 `code_agent.py` 的代码生成 prompt 中,增加 **强制性** 数值稳定性要求:
```
NUMERICAL STABILITY REQUIREMENTS (MANDATORY):
- Add gradient clipping (max_norm=1.0) to all optimizer steps
- Check loss for NaN/Inf before backward pass: if not math.isfinite(loss): skip this batch
- Use torch.amp.GradScaler for mixed precision training if applicable
- Add learning rate warmup for the first 5-10% of training steps
- Use self.check_value(loss, "loss") from experiment harness for NaN tracking
```
2. 在 `harness_template.py` 中,将 `check_value()` 改为 **自动 hook** 而非 opt-in——在 `finalize()` 中自动检查 metrics 是否为 finite
3. 在 Multi-Agent Review 阶段(`code_agent.py` Phase 4)增加数值稳定性作为必审项
**修复后预期行为:**
生成的训练代码默认包含 gradient clipping 和 NaN guard,训练过程中数值爆炸能被及时 catch 并恢复,而不是直接终止。
<details>
<summary>原始反馈证据</summary>
> 好像是他的代码写错了之类的
压缩包中 `experiment_summary.json` stderr:
```
WARNING: loss = inf (non-finite, skipped)
WARNING: loss = nan (non-finite, skipped)
WARNING: loss = nan (non-finite, skipped)
WARNING: loss = nan (non-finite, skipped)
WARNING: loss = nan (non-finite, skipped)
FAIL: Too many NaN/Inf values detected. Stopping experiment early.
```
</details>
---
## 架构改进 — 强烈建议
### 🟠 `ARCH-001` — Stage 17 (PAPER_DRAFT) 过于严格的 hard block 策略
| 字段 | 内容 |
|------|------|
| **严重程度** | HIGH |
| **所属阶段** | PAPER_DRAFT (Stage 17) |
| **报告者** | user2(链式影响) |
**问题描述:**
当实验阶段没有产出完整 metrics 时,Stage 17 直接 FAILED,不尝试用已有数据写论文。这导致前面 1-16 阶段的全部成果被浪费。
**根因分析:**
- `executor.py` 第 6000-6020 行:当 `has_real_metrics == False` 且 domain 为 empirical 时,直接返回 `StageStatus.FAILED`
- Stage 13 (ITERATIVE_REFINE) 的中间迭代可能产出了部分有效 metrics,但 Stage 17 只看 `experiment_summary.json` 的 final best_run
**涉及文件:**
- `researchclaw/pipeline/executor.py`(第 6000-6020 行)
**修复方案:**
将 hard block 改为 soft degradation:
1. 如果有部分 metrics(即使不完整),用已有数据写论文
2. 在 prompt 中明确告知 LLM 数据不完整,要求在 Abstract 和 Limitations 中如实说明
3. 只有在 **完全没有任何数据**(甚至没有 stage-07 synthesis 和 stage-08 hypotheses)的极端情况下才 hard block
4. 在输出的 `paper_draft.md` 头部加 warning 标记,方便后续阶段识别
**修复后预期行为:**
实验部分失败时,Pipeline 仍能生成一篇带有诚实 Limitations 的论文,用户至少得到有价值的输出。
---
### 🟠 `ARCH-002` — Idea 被降级到弱版本时不询问用户
| 字段 | 内容 |
|------|------|
| **严重程度** | HIGH |
| **所属阶段** | EXPERIMENT_DESIGN (Stage 9), RESEARCH_DECISION (Stage 15) |
| **报告者** | user1 |
**问题描述:**
用户给了一个复杂的 strong idea(如 VAE+ViT 统一编码器 + 多数据集评测),Pipeline 因资源限制(数据集不可用、GPU 不够、环境配不好)自动降级到 weaker 版本,但不通知或征求用户意见。用户认为降级后的研究「变得没啥意义」。
**根因分析:**
- `executor.py` 第 2220-2236 行:LLM 生成的实验计划无效时,使用 topic-derived fallback,**不询问用户**
- `executor.py` 第 4618-4640 行:RESEARCH_DECISION 检测 degenerate cycle 时只给 LLM advisory,**不暂停**
- `stages.py` 第 109-115 行:GATE_STAGES 只包含 Stage 5、9、20,不包含 Stage 15
- `agents/benchmark_agent/orchestrator.py` 第 314-322 行:BenchmarkAgent 验证失败时 silent retry,最终 silent proceed
**涉及文件:**
- `researchclaw/pipeline/executor.py`(第 2220-2236 行、4618-4640 行)
- `researchclaw/pipeline/stages.py`(GATE_STAGES 定义)
- `researchclaw/agents/benchmark_agent/orchestrator.py`(第 314-322 行)
**修复方案:**
1. 在 EXPERIMENT_DESIGN (Stage 9) 中,当检测到 significant downgrade(如:用户要求的数据集不可用、GPU 不满足要求、关键组件被简化)时,生成一个 **downgrade summary** 并暂停等待用户确认
2. 在 RESEARCH_DECISION (Stage 15) 中,将 REFINE → weaker idea 的决策标记为 GATE,需要用户 approve
3. 可以通过 `auto_approve` 参数让用户选择是否跳过这些确认(保持向后兼容)
**修复后预期行为:**
Pipeline 在降级研究方案前通知用户,用户可以选择:接受降级、提供更多资源(如更大的 GPU)、或终止当前 run。
<details>
<summary>原始反馈证据</summary>
> 对,还有就是比如我提出了一个相对strong的idea,而他因为各种原因(比如数据集找不到,环境配不好,gpu不够)之类的,给我fallback到weaker的idea之后,我感觉这个时候应该询问一下用户要不要继续跑
>
> 因为很多时候他继续跑的内容就会变得没啥意义
</details>
---
## 功能需求
### 🔵 `FEAT-001` — 论文生成后增加一致性反馈循环
- **报告者:** user1
- **描述:** 在论文生成之后,增加专门的 consistency check,检查 paper 中的声明与实际实验结果是否一致
- **建议:** 可以在 PEER_REVIEW (Stage 18) 的 prompt 中增加 claim-evidence alignment 专项检查。或者在 Stage 17 和 18 之间加一个轻量级的自动验证步骤
<details>
<summary>原始反馈</summary>
> 感觉这个可以在paper生成之后,加一些相关的consistence feedback之类的?
</details>
### 🔵 `FEAT-002` — 从 Related Works 的 GitHub 学习 Common Practice
- **报告者:** user1
- **描述:** 当前 Pipeline 的 literature 阶段只读论文,不看对应的开源代码。用户建议访问 related works 的 GitHub repo,学习 paper 中不会写的实现细节(tricks、common practice),缓解论文内容过于古老的问题
- **建议:** 在 KNOWLEDGE_EXTRACT (Stage 6) 或 EXPERIMENT_DESIGN (Stage 9) 增加 GitHub repo 分析能力。可以用 GitHub API 搜索 related works 的 repo,提取 README、主要代码结构、训练配置等信息
<details>
<summary>原始反馈</summary>
> 对就是我觉得即使不拿来用,visit related works的github也是有必要的,这样可以看到其他工作的common practice(一些不会在paper中出现的细节),应该会挺有用的。感觉可以缓解一下paper内容过于古老的问题
</details>
### 🔵 `FEAT-003` — 代码应该复用 Related Works 的框架
- **报告者:** user1
- **描述:** 当前代码都是 LLM 从零写的简单文件,用户建议从 most related works 中选一个合适的框架来用,就像真实研究中的做法
- **建议:** 可以在 BenchmarkAgent 或 CODE_GENERATION 阶段增加框架选择逻辑——从相关论文的开源实现中挑选合适的 codebase 作为起点,而不是从零生成。这是一个较大的改动,可以作为长期目标
<details>
<summary>原始反馈</summary>
> 以及他现在写的代码都比较简单,都是自己写几个文件对吧。我在想或许可以从most related works里面选一个合适的框架来用?我们平时也是这样的对吧。当然这个比较复杂,可以先不考虑
</details>
---
## 附录:按测试者分组
### 测试者:`user1`
- **学科/领域:** 计算机视觉(CV),统一图像编解码器
- **运行环境:** GPU 服务器(A5000),使用 Codex 监控
- **总计问题:** 6
- **确认 Bug:** 3(BUG-001, BUG-003, BUG-004)
- **架构改进:** 1(ARCH-002)
- **功能需求:** 3(FEAT-001, FEAT-002, FEAT-003)
| ID | 问题 | 状态 | 严重程度 |
|----|------|------|---------|
| BUG-001 | 论文硬件信息与实际不一致 | confirmed | CRITICAL |
| BUG-003 | 论文内容自相矛盾 | confirmed | HIGH |
| BUG-004 | 代码缺少数值稳定性防护 | confirmed | HIGH |
| ARCH-002 | Idea 降级不询问用户 | confirmed | HIGH |
| FEAT-001 | 一致性反馈循环 | feature_request | — |
| FEAT-002 | 从 GitHub 学习 common practice | feature_request | — |
| FEAT-003 | 复用 related works 框架 | feature_request | — |
### 测试者:`user2`
- **学科/领域:** 未知(topic 与纳米药物递送相关)
- **运行环境:** Windows
- **总计问题:** 2
- **确认 Bug:** 1(BUG-002)
- **架构改进:** 1(ARCH-001)
| ID | 问题 | 状态 | 严重程度 |
|----|------|------|---------|
| BUG-002 | Windows Docker 链式失败 | confirmed | CRITICAL |
| ARCH-001 | Stage 17 过于严格的 hard block | confirmed | HIGH |
---
## 修复执行指引
> 本文档设计为可由另一台机器上的 Claude Code agent 直接读取并执行修复。
> 建议按优先级从上到下依次修复,每修复一个 Bug 运行相关测试验证。
**修复顺序建议:**
1. BUG-002(Docker fallback)→ 解除 Windows 用户的完全阻塞
2. BUG-001(硬件一致性)→ 简单修复,prompt 注入即可
3. BUG-004(NaN guard)→ prompt 层面修复,影响面大
4. BUG-003(claim-evidence 对齐)→ 需要新增验证逻辑
5. ARCH-001(soft degradation)→ 改变 Stage 17 策略
6. ARCH-002(用户确认 Gate)→ 需要状态机和 Gate 逻辑调整
================================================
FILE: docs/BUG_TRACKER.md
================================================
# Bug Tracker & TODO
> 实验运行期间发现的 bug 和待修复事项。实验结束后统一修复。
## 已发现的 Bug
### BUG-01: Stage 2 合约缺少 queries.json 输出 (已修复)
- **状态**: ✅ 已修复 (commit `19c74a0`)
- **描述**: `contracts.py` 中 Stage 2 (PROBLEM_DECOMPOSE) 的 `output_files` 包含 `queries.json`,但实际实现只生成 `problem_tree.md`。`queries.json` 实际在 Stage 3 生成。
- **影响**: Pipeline 在 Stage 2 直接失败
- **修复**: 从 Stage 2 output_files 移除 `queries.json`,从 Stage 3 input_files 移除 `queries.json`
### BUG-02: gpt-5.4 持续 429 限流
- **状态**: ⏳ 待观察
- **描述**: 同时运行多个 pipeline 时,gpt-5.4 频繁返回 429。fallback 机制可以兜底但速度大幅下降。
- **影响**: 运行时间显著增加(Case 2 上轮从 ~2.5h 增至 ~6h)
- **建议**: 考虑增加 pipeline 间的启动间隔,或实现全局 API 调用速率协调
### BUG-03: S2/arXiv 文献搜索 429 限流
- **状态**: ✅ 已缓解 (commit `63c5a7d` circuit breaker)
- **描述**: Semantic Scholar 和 arXiv API 在并发请求时频繁 429
- **影响**: 文献收集阶段延迟,但 circuit breaker 保证最终完成
### BUG-04: Stage 10 深度质量检查 — 类方法不足
- **状态**: ✅ 已加强 (远程 commit `855c201`)
- **描述**: 生成的代码中多个类只有 1 个非 dunder 方法,质量检查报告 "algorithm classes should have at least __init__ + one core method"
- **影响**: 代码质量评分降低,但不阻塞 pipeline
- **远程修复**: 新增 Check 6 — ablation 子类必须 override 父类至少一个非 dunder 方法,否则报警告。修复写入 `validator.py` 和 `executor.py` 的 repair prompt。
### BUG-05: Stage 10 深度质量检查 — UnboundLocalError 风险
- **状态**: ✅ 已修复 (远程 commit `855c201`)
- **描述**: 生成代码中变量只在 if 分支内赋值,但在分支外使用(如 main.py:289 `mask`, main.py:300 `out` 等)
- **影响**: 生成的实验代码可能在运行时崩溃
- **远程修复**: 新增 `auto_fix_unbound_locals()` 函数(`validator.py`),在 Stage 10 代码生成后自动检测 if-only 变量并插入 `var = None` 初始化。`executor.py` 在深度检查前调用。
### BUG-05 更新: UnboundLocalError 问题在 v8r3 中大幅恶化
- **状态**: ✅ 已修复 (被 `auto_fix_unbound_locals()` 覆盖)
- **描述**: v8r3 中 Case 3 (PEFT) 生成的代码有 **47 处** UnboundLocalError 风险(data.py 27 处, methods.py 20 处, main.py 2 处),远超 v8r2 的 8 处。Case 2 也有 8 处。
- **根因**: LLM 生成的代码模式为 `if cond: x = val` 后直接 `use(x)`,缺少 else 分支或默认值初始化
- **远程修复**: 程序化自动修复已集成到 Stage 10 pipeline 中
### BUG-06: P9 Metric direction mismatch
- **状态**: ✅ 已修复
- **描述**: 配置写 `minimize` 但实验代码声明 `direction=higher`,自动纠正为 `maximize`
- **影响**: 可能影响实验结果的正确性
- **修复**: (1) Stage 9 prompt 中注入 `metric_direction` 约束; (2) Stage 12 code_generation prompt 中强制 METRIC_DEF direction 与 config 一致; (3) 取消 auto-correction,改为仅 warn 并保持 config 值
### BUG-07: Stage 23 CITATION_VERIFY 失败率高
- **状态**: ✅ 已修复
- **描述**: 上轮 Case 1 和 Case 3 都在 Stage 23 失败(28/29),仅 Case 2 通过
- **影响**: 最终 pipeline 状态标记为 failed
- **根因**: (1) `_check_citation_relevance()` 最多只处理 30 个 citation,超出的无评分; (2) 无评分的 citation 在 hard cap 排序时被当作 0.0 分全部删除
- **修复**: (1) 改为分批处理所有 citation (batch=30); (2) 无评分 citation 默认 0.7(已验证=大概率相关)
### BUG-08: CodeGen `'str' object has no attribute 'get'` (v8r3 新发现)
- **状态**: ✅ 已修复
- **严重度**: 中 — 不阻塞 pipeline(有 fallback),但连续失败 6 次
- **描述**: Case 1 在 Stage 14 (RESULT_ANALYSIS) 触发 CodeGen 时连续报 `'str' object has no attribute 'get'`。疑似 LLM 返回了纯字符串而非 dict,代码对返回值调 `.get()` 导致 AttributeError。
- **远程修复**: executor.py 中 `_check_ablation_effectiveness` 等函数已加 `isinstance` 保护
- **本地修复**: `code_agent.py` 中 `_parse_json` 结果增加 `isinstance(review, dict)` 检查
### BUG-09: FigureAgent 无法生成图表 (v8r3 新发现)
- **状态**: ✅ 已修复
- **描述**: Case 1 Stage 14 中 `FigureAgent produced no charts, falling back`。FigureAgent 可能因上游 CodeGen 失败或数据格式问题无法生成图表。
- **影响**: 论文缺少可视化图表,影响质量分数
- **根因**: `_condition_summaries` 在 metrics 不含 `/` 分隔符时为空,导致 Planner 没有数据
- **修复**: (1) 从 `metrics_summary` fallback 构建 condition_summaries; (2) 从 `structured_results` 二次 fallback; (3) 向 FigureAgent 传入 `best_run_metrics` 作为数据源兜底
### BUG-10: Degenerate refine cycle (v8r3 新发现)
- **状态**: ✅ 已修复 (远程 commit `e30443e`)
- **描述**: Case 1 出现 `P6: Degenerate refine cycle detected, injecting PROCEED hint`。Pipeline 检测到实验迭代循环没有实质进展,自动注入 PROCEED 跳出。
- **远程修复**: 根因是 LLM 在迭代 refine 时重命名/替换 condition 名称导致漂移。修复方案:在 `iterative_improve` prompt 中注入 `exp_plan.yaml` 锚定,并禁止改名条件。
## 远程额外修复(BUG_TRACKER 未记录的问题)
### RFix-01: Baselines dict→list 转换 (commit `855c201`)
- 若 LLM 输出 baselines 为 dict 而非 list,`executor.py` 现在自动转换为 `list(dict.keys())`
### RFix-02: Gymnasium 环境版本 v4→v5 (commit `855c201`)
- `benchmark_knowledge.yaml` 中 HalfCheetah-v4→v5, Hopper-v4→v5
### RFix-03: Time budget 注入到 Stage 9 (commit `855c201`)
- 实验设计 prompt 中增加 `time_budget_sec` 约束,防止生成超时的实验方案
### RFix-04: 代码模板 optimizers.py→models.py (commit `855c201`)
- 代码生成模板从 `optimizers.py` 改为 `models.py`,并禁止生成只有 import/pass 的 stub 文件
### RFix-05: RL 稳定性修复提示 (commit `e30443e`)
- `iterative_repair` prompt 中增加 gradient clipping、LR cap、reward normalization、NaN guard 等常见 RL 修复建议
## 待修复汇总
| Bug | 优先级 | 状态 |
|-----|--------|------|
| BUG-02 gpt-5.4 限流 | 低 | ⏳ 待观察 (外部限制) |
所有代码层面的 bug 已修复。
## 待办事项 (TODO)
- [x] 拉取远程更新,对比 bug 修复状态
- [x] 更新 BUG_TRACKER 标注远程已修复项
- [x] 修复 BUG-06: 在 experiment design 阶段校验 metric direction 一致性
- [x] 修复 BUG-07: 分析 Stage 23 引用验证高失败率原因
- [x] 完善 BUG-08: CodeGen 调用处增加 str 类型保护
- [x] 修复 BUG-09: FigureAgent 输入数据格式检查
- [ ] 分析本轮 (v8r3) 三个 case 的质量分数,对比上轮 (v8r2)
- [ ] 考虑增加 pipeline 间的 API 调用协调机制
## 历史质量分数对比
| 版本 | Case 1 (Graph-RAG) | Case 2 (Diffusion) | Case 3 (PEFT) | 平均 |
|------|--------------------|--------------------|---------------|------|
| v8r2 | 5.2/10 | 8.0/10 | 5.8/10 | 6.3 |
| v8r3 | 待定 | 待定 | 待定 | 待定 |
---
*最后更新: 2026-03-16*
================================================
FILE: docs/CHANGELOG_ANTHROPIC_ADAPTER.md
================================================
# Anthropic Messages API Adapter — 改动说明
> 本文档详细描述了为 ResearchClaw LLM 模块引入 Anthropic Messages API 原生支持的改动内容,
> 并通过架构图说明本次改动 **不影响现有 OpenAI / OpenRouter / DeepSeek 等 provider 的任何行为**。
---
## 目录
1. [改动背景](#1-改动背景)
2. [架构总览 — 改动前后对比](#2-架构总览--改动前后对比)
3. [核心设计:适配器模式](#3-核心设计适配器模式)
4. [调用流程详解](#4-调用流程详解)
5. [对现有 Provider 零影响的保证](#5-对现有-provider-零影响的保证)
6. [变更文件清单](#6-变更文件清单)
7. [异常处理与重试机制](#7-异常处理与重试机制)
8. [配置示例](#8-配置示例)
9. [新增依赖](#9-新增依赖)
---
## 1. 改动背景
ResearchClaw 的 LLM 模块原先仅支持 **OpenAI Chat Completions API 格式**(含兼容此格式的 OpenRouter、DeepSeek 等)。
Anthropic 的 Claude 系列模型使用独立的 **Messages API**,其请求/响应结构与 OpenAI 格式存在显著差异:
| 差异点 | OpenAI 格式 | Anthropic 格式 |
|---|---|---|
| 认证方式 | `Authorization: Bearer <key>` | `x-api-key: <key>` |
| System 消息 | 放在 `messages` 数组中 | 独立的 `system` 字段 |
| 端点路径 | `/v1/chat/completions` | `/v1/messages` |
| 响应结构 | `choices[0].message.content` | `content[0].text` |
| Token 统计 | `prompt_tokens` / `completion_tokens` | `input_tokens` / `output_tokens` |
为了原生支持 Anthropic API 而不影响现有功能,我们采用了 **适配器模式(Adapter Pattern)**。
---
## 2. 架构总览 — 改动前后对比
### 改动前
```mermaid
graph TB
subgraph "create_llm_client (工厂函数)"
A[config.llm.provider] -->|"acp"| B[ACPClient]
A -->|"其他所有"| C["内联构造 LLMClient<br/>使用 PROVIDER_PRESETS 填充 base_url"]
end
C --> D["_raw_call()<br/>urllib → OpenAI /chat/completions"]
D --> E[LLMResponse]
style B fill:#e1f5fe
style C fill:#e8f5e9
style D fill:#e8f5e9
```
### 改动后
```mermaid
graph TB
subgraph "create_llm_client (工厂函数)"
A[config.llm.provider] -->|"acp"| B[ACPClient]
A -->|"其他所有"| C["LLMClient.from_rc_config()<br/>使用 PROVIDER_PRESETS 填充 base_url"]
end
C -->|"provider == anthropic"| F["挂载 AnthropicAdapter"]
C -->|"其他 provider"| G["_anthropic = None"]
subgraph "_raw_call() 内部分支"
H{"self._anthropic<br/>是否存在?"}
H -->|"是 (Anthropic)"| I["AnthropicAdapter.chat_completion()<br/>httpx → Anthropic /v1/messages"]
H -->|"否 (OpenAI 等)"| J["原有逻辑不变<br/>urllib → OpenAI /chat/completions"]
end
F --> H
G --> H
I --> K["返回 OpenAI 兼容格式 dict"]
J --> K
K --> L["统一解析 → LLMResponse"]
style B fill:#e1f5fe
style F fill:#fff3e0
style I fill:#fff3e0
style G fill:#e8f5e9
style J fill:#e8f5e9
style L fill:#f3e5f5
```
> 绿色 = 原有逻辑(未修改),橙色 = 新增 Anthropic 路径,紫色 = 共享的统一出口。
---
## 3. 核心设计:适配器模式
```mermaid
classDiagram
class LLMClient {
-LLMConfig config
-AnthropicAdapter _anthropic
+chat(messages, ...) LLMResponse
+preflight() tuple
-_call_with_retry(model, ...) LLMResponse
-_raw_call(model, ...) LLMResponse
}
class AnthropicAdapter {
-str base_url
-str api_key
-int timeout_sec
+chat_completion(model, messages, ...) dict
}
class LLMResponse {
+str content
+str model
+int prompt_tokens
+int completion_tokens
}
LLMClient "1" *-- "0..1" AnthropicAdapter : _anthropic
LLMClient ..> LLMResponse : returns
AnthropicAdapter ..> LLMResponse : "返回 OpenAI 兼容 dict\n由 LLMClient 统一解析"
note for AnthropicAdapter "仅当 provider=='anthropic' 时实例化\n其他 provider 时 _anthropic = None"
```
**关键设计决策:**
- `AnthropicAdapter` 是 `LLMClient` 的一个 **可选内部组件**,不是独立的客户端类
- 适配器返回 **OpenAI 兼容格式的 dict**,由 `_raw_call()` 的统一出口解析为 `LLMResponse`
- 当 `_anthropic is None` 时,`_raw_call()` 走 **完全不变的原有 OpenAI 路径**
---
## 4. 调用流程详解
以下时序图展示了两种 provider 各自的完整调用链路:
### OpenAI / OpenRouter / DeepSeek(原有流程,零改动)
```mermaid
sequenceDiagram
participant Caller as 调用方
participant Client as LLMClient
participant Raw as _raw_call()
participant API as OpenAI API
Caller->>Client: chat(messages)
Client->>Client: _call_with_retry(model, ...)
Client->>Raw: _raw_call(model, ...)
Note over Raw: self._anthropic is None<br/>→ 走 else 分支 (原有逻辑)
Raw->>API: urllib POST /chat/completions
API-->>Raw: {"choices": [...], "usage": {...}}
Raw-->>Client: LLMResponse
Client-->>Caller: LLMResponse
```
### Anthropic(新增流程)
```mermaid
sequenceDiagram
participant Caller as 调用方
participant Client as LLMClient
participant Raw as _raw_call()
participant Adapter as AnthropicAdapter
participant API as Anthropic API
Caller->>Client: chat(messages)
Client->>Client: _call_with_retry(model, ...)
Client->>Raw: _raw_call(model, ...)
Note over Raw: self._anthropic 存在<br/>→ 走 if 分支
Raw->>Adapter: chat_completion(model, messages, ...)
Note over Adapter: 1. 提取 system 消息<br/>2. 构建 Anthropic 请求体<br/>3. httpx POST /v1/messages
Adapter->>API: httpx POST /v1/messages
API-->>Adapter: {"content": [...], "usage": {...}}
Note over Adapter: 转换为 OpenAI 兼容格式
Adapter-->>Raw: {"choices": [...], "usage": {...}}
Note over Raw: 统一解析(与 OpenAI 路径完全相同)
Raw-->>Client: LLMResponse
Client-->>Caller: LLMResponse
```
---
## 5. 对现有 Provider 零影响的保证
```mermaid
graph LR
subgraph "provider != 'anthropic' 时的代码路径"
A["from_rc_config()"] --> B["PROVIDER_PRESETS 填充 base_url ✅"]
B --> C["LLMClient.__init__()"]
C --> D["self._anthropic = None"]
D --> E["_raw_call()"]
E --> F{"self._anthropic?"}
F -->|"None → False"| G["else 分支<br/>原有 OpenAI 逻辑<br/>(代码未修改)"]
end
style G fill:#e8f5e9,stroke:#4caf50,stroke-width:3px
style F fill:#fff9c4
```
**零影响的 5 重保证:**
| # | 保证机制 | 说明 |
|---|---|---|
| 1 | **条件初始化** | `AnthropicAdapter` 仅在 `provider == "anthropic"` 时实例化,其他 provider 不触发任何新代码 |
| 2 | **`_anthropic = None`** | `__init__` 中默认设为 `None`,非 Anthropic provider 永远不会进入适配器分支 |
| 3 | **else 分支 = 原代码** | `_raw_call()` 的 else 分支包含的是 **未修改的** OpenAI urllib 调用逻辑 |
| 4 | **PROVIDER_PRESETS 保留** | 恢复了 preset base_url 回退逻辑,`openai` / `openrouter` / `deepseek` 的自动 URL 填充行为与之前一致 |
| 5 | **统一出口** | 两条路径最终都产出相同结构的 dict,由同一段代码解析为 `LLMResponse` |
### PROVIDER_PRESETS 对照表
```mermaid
graph TD
subgraph "PROVIDER_PRESETS(base_url 自动填充)"
P1["openai → https://api.openai.com/v1"]
P2["openrouter → https://openrouter.ai/api/v1"]
P3["deepseek → https://api.deepseek.com/v1"]
P4["anthropic → https://api.anthropic.com"]
P5["openai-compatible → 用户自定义 base_url"]
end
P1 --> |"不变 ✅"| OK1[" "]
P2 --> |"不变 ✅"| OK2[" "]
P3 --> |"不变 ✅"| OK3[" "]
P4 --> |"新增"| OK4[" "]
P5 --> |"不变 ✅"| OK5[" "]
style P1 fill:#e8f5e9
style P2 fill:#e8f5e9
style P3 fill:#e8f5e9
style P4 fill:#fff3e0
style P5 fill:#e8f5e9
```
---
## 6. 变更文件清单
| 文件路径 | 变更类型 | 改动说明 |
|---|---|---|
| `researchclaw/llm/__init__.py` | 修改 | 添加 `"anthropic"` preset;简化工厂函数委托给 `from_rc_config()` |
| `researchclaw/llm/client.py` | 修改 | `from_rc_config()` 恢复 PRESETS 逻辑 + 条件挂载适配器;`_raw_call()` 添加 if/else 分支 |
| `researchclaw/llm/anthropic_adapter.py` | **新增** | `AnthropicAdapter` 类 — Anthropic Messages API → OpenAI 兼容格式转换 |
| `tests/test_anthropic.py` | **新增** | Anthropic API 连通性测试脚本 |
| `pyproject.toml` | 修改 | 添加 `httpx` 为 optional dependency (`[anthropic]` extra) |
| `.gitignore` | 修改 | 添加 `run.log` |
---
## 7. 异常处理与重试机制
Anthropic 适配器内部将 httpx 异常 **转换为 urllib 标准异常**,确保上层重试逻辑无需修改:
```mermaid
graph TD
subgraph "AnthropicAdapter 内部"
A["httpx.HTTPStatusError<br/>(4xx/5xx)"] -->|转换| B["urllib.error.HTTPError<br/>(保留 status_code)"]
C["httpx.ConnectError<br/>httpx.TimeoutException"] -->|转换| D["urllib.error.URLError"]
end
subgraph "_call_with_retry() — 不变"
B --> E{"status code?"}
E -->|"429/500/502/503/504"| F["指数退避重试 ✅"]
E -->|"400"| G["立即抛出(Bad Request)"]
E -->|"403 + model forbidden"| H["跳到下一个 fallback model"]
D --> I["重试直到耗尽 ✅"]
end
style A fill:#fff3e0
style C fill:#fff3e0
style B fill:#e8f5e9
style D fill:#e8f5e9
```
这意味着 Anthropic 路径享有与 OpenAI 路径 **完全相同的重试策略**:指数退避 + jitter + model fallback chain。
---
## 8. 配置示例
### 使用 Anthropic(新增)
```yaml
llm:
provider: anthropic
# base_url 可省略,自动使用 https://api.anthropic.com
api_key_env: ANTHROPIC_API_KEY
primary_model: claude-sonnet-4-20250514
fallback_models:
- claude-haiku-4-5-20251001
```
### 使用 OpenAI(不变)
```yaml
llm:
provider: openai
# base_url 可省略,自动使用 https://api.openai.com/v1
api_key_env: OPENAI_API_KEY
primary_model: gpt-4o
fallback_models:
- gpt-4.1
- gpt-4o-mini
```
### 使用 OpenRouter(不变)
```yaml
llm:
provider: openrouter
api_key_env: OPENROUTER_API_KEY
primary_model: anthropic/claude-sonnet-4-20250514
```
---
## 9. 新增依赖
| 依赖 | 版本要求 | 安装方式 | 说明 |
|---|---|---|---|
| `httpx` | `>=0.24` | `pip install researchclaw[anthropic]` | **可选依赖**,仅 Anthropic provider 需要 |
不使用 Anthropic provider 的用户 **无需安装 httpx**,`pip install researchclaw` 的行为完全不变。
---
> **总结**: 本次改动通过适配器模式在 `_raw_call()` 内部添加了一条 Anthropic 专用路径。
> 当 provider 不是 `"anthropic"` 时,`self._anthropic` 为 `None`,代码执行路径与改动前 **完全一致**,
> 不触及任何新增代码,不引入任何新依赖。
================================================
FILE: docs/PIPELINE_TEST_LOG_R5.md
================================================
# Pipeline Test Log — Round 5 (main branch)
> **分支**: `main` @ `e95527f`
> **日期**: 2026-03-18
> **目的**: 全面测试 main 分支 Pipeline 端到端流程,覆盖多领域、纯计算实验
> **环境**: Python 3.x, numpy 2.4.3, scipy 1.17.1, sklearn 1.8.0, matplotlib 3.10.8
> **LLM**: gpt-5.4 (fallback: gpt-5.1, gpt-4o) via Azure OpenAI
---
## 测试选题
| ID | 领域 | 主题 | metric_direction | 关键依赖 |
|----|------|------|-----------------|---------|
| N | 计算物理 | 随机矩阵理论:Marchenko-Pastur 分布的有限维修正分析 | minimize | numpy, scipy |
| O | 计算经济学 | 弱工具变量下 IV 估计量的 Monte Carlo 偏差-方差权衡 | minimize | numpy, scipy, sklearn |
| P | 计算流行病学 | SIR/SEIR 模型参数可辨识性:合成数据下的结构化似然分析 | maximize | numpy, scipy |
| Q | 数学/数值分析 | Krylov 子空间方法求解稀疏线性系统:预条件策略对比 | minimize | numpy, scipy |
### 选题原则
- 所有实验纯计算/模拟,无需外部数据集或 GPU
- 核心依赖仅 numpy/scipy/sklearn,sandbox 即可执行
- 覆盖 4 个不同领域:物理、经济学、流行病学、数值分析
- 避免 R4 中被 topic refinement 强行引向 ML 的问题——本轮 topic 描述更具体
### 备选 Topic(未选用)
1. **Agent/RL**: 网格世界中多智能体 emergent communication 的涌现 — 需要 gymnasium,sandbox 兼容性不确定
2. **信号处理**: 压缩感知中 RIP 条件的经验验证 — 可行但领域覆盖与 Q 重叠
3. **统计学**: Bayesian 变点检测的 MCMC 采样效率对比 — 可行,备用
4. **图论**: 随机图上 Erdos-Renyi 相变阈值的数值验证 — 可行,备用
---
## 运行状态
| Pipeline | Config | Run ID | PID | 启动时间 (UTC) | 最终阶段 | 状态 | 总耗时 |
|----------|--------|--------|-----|---------------|---------|------|--------|
| N | config_test_N.yaml | `rc-20260318-174754-fc94f2` | 2036352 | 17:47 | 28/29 (S23 fail) | ⚠️ 近完美 | ~2.5h |
| O | config_test_O.yaml | `rc-20260318-174826-01c0f3` | 2037261 | 17:48 | 28/29 (S23 fail) | ⚠️ 近完美 | ~3.0h |
| P | config_test_P.yaml | `rc-20260318-174900-d5371f` | 2037826 | 17:49 | 29/29 ✅ | ✅ 完美通过 | ~2.4h |
| Q | config_test_Q.yaml | `rc-20260318-174935-d0a717` | 2038664 | 17:49 | 28/29 (S23 fail) | ⚠️ 近完美 | ~2.5h |
---
## 观测记录
### OBS-R5-01: S2 + arXiv 429 限流(预期行为)(17:48 UTC)
- **严重度**: 🟢 预期行为
- **描述**: 4个并行 Pipeline 同时触发 S2/arXiv 429 限流
- S2 circuit breaker: 120s cooldown (trip #1)
- arXiv circuit breaker: 180s cooldown (trip #1)
- **关联**: R4-OBS-02 同类问题
- **影响**: 文献搜索阶段延迟增加,不阻塞
### OBS-R5-02: Pipeline Q 触发 IMP-35 Topic Refinement (17:49 UTC)
- **严重度**: 🟡 值得关注
- **描述**: Krylov 子空间方法的 topic 被评为 4/10,系统建议 refine 为 ML 相关方向
- 原始: "Comparative Analysis of Preconditioning Strategies for Krylov Subspace Methods..."
- 建议: "Learned preconditioner selection for Krylov solvers on sparse linear systems..."
- **评估**: IMP-35 倾向于把所有 topic 往 ML 方向引导(R4-OBS-03 同类问题)
- **影响**: 纯数值分析 topic 可能被扭曲为 ML topic,但实验代码仍应聚焦原始问题
### OBS-R5-03: 初始进度检查 (~17:55 UTC)
- N: Stage 7/SYNTHESIS ✅ 快速推进
- O: Stage 6/KNOWLEDGE_EXTRACT ✅ 正常
- P: Stage 4/LITERATURE_COLLECT — 稍慢(429 影响)
- Q: Stage 5/LITERATURE_SCREEN ✅ 正常
### OBS-R5-04: CodeSearcher query_gen.py TypeError (18:20 UTC)
- **严重度**: 🟡 中 — 不阻塞但影响代码质量
- **描述**: `researchclaw/agents/code_searcher/query_gen.py:149` 调用 `llm.chat()` 时传入不支持的 `user` 关键字参数
```
TypeError: LLMClient.chat() got an unexpected keyword argument 'user'
```
- **影响**: CodeSearcher 无法使用 LLM 生成 GitHub 搜索 query,退化到基于规则的 query
- **关联**: R4-BUG-02 (GitHub 401) — 401 问题仍在(无 GITHUB_TOKEN),加上此 TypeError 意味着 CodeSearcher 基本失效
- **需要修复**: ✅ 是 — query_gen.py 中 `llm.chat()` 调用签名与 LLMClient 接口不匹配
### OBS-R5-05: gpt-5.4 Read Timeout 导致 fallback (18:30 UTC)
- **严重度**: 🟡 中 — 自动 fallback 工作正常
- **描述**: Pipeline N 在代码生成阶段遭遇多次 gpt-5.4 read timeout
- 触发 fallback 到 gpt-5.1 或 gpt-4o
- 代码生成请求因 token 量大,更容易超时
- **影响**: 代码生成速度下降,但不阻塞
### OBS-R5-06: Sandbox execution timeout 60s (18:35 UTC)
- **严重度**: 🟡 中 — 影响代码验证
- **描述**: Pipeline O 代码生成阶段的 sandbox 验证执行超时(60s)
- 可能是验证生成的实验代码能否运行
- 代码生成后的 AST 验证 + 试运行超时
- **影响**: 代码可能未经充分验证就进入下一阶段
### OBS-R5-07: Stage 10 Deep Quality — Copy-paste Detection (18:35 UTC)
- **严重度**: 🟡 中 — 代码质量问题
- **描述**: Pipeline O 的 models.py 中检测到多组 copy-paste 类:
1. `FixedFullerOneBiasReducedBaseline` vs `FixedFullerFourAggressiveShrinkageBaseline` (16 vs 16 lines)
2. `FirstStageStrengthOnlyRiskSurfaceBaseline` vs `NoLeverageGeometryRiskSurfaceAblation` (9 vs 9 lines)
3. 多个 ablation 类仅 0-1 个非 dunder 方法
- **评估**: 这是 R4-BUG-13 的同类问题 — ablation 类之间差异不足
- **关联**: BUG-13 (copy-paste ablation)
### OBS-R5-08: 所有 Pipeline 在 Stage 10 停留超 25 分钟 (18:41 UTC)
- **严重度**: 🟢 预期行为
- **描述**: 代码生成是最重的 LLM 调用阶段,N=1 attempt, O/P=3 attempts, Q=3 attempts
- **评估**: 多次 attempt 表明 code validation loop 在工作,自动修复代码中的问题
- **耗时**: N=2441s (~41min), O=2485s (~41min), P=2796s (~47min), Q=2976s (~50min)
### OBS-R5-09: 所有已执行实验在 Stage 12 首次运行均失败 (18:55 UTC)
- **严重度**: 🔴 高 — 系统性 numpy 2.x API 不兼容
- **描述**: 3个已完成 Stage 12 的 Pipeline 均在首次实验运行失败:
- **N**: `AttributeError: module 'numpy' has no attribute 'trapz'`
- numpy 2.0 移除了 `np.trapz`,应使用 `np.trapezoid`
- **O**: `numpy.linalg.LinAlgError: 1-dimensional array given. Array must be two-dimensional`
- 代码向 linalg 函数传入了 1D 数组
- **P**: `AttributeError: module 'numpy' has no attribute 'erfinv'`
- `erfinv` 从未存在于 numpy 中,应使用 `scipy.special.erfinv`
- **根因**: gpt-5.4 生成的代码使用了已在 numpy 2.x 中移除或不存在的 API
- **关联**: R5-BUG-01 (见下方)
### OBS-R5-10: Stage 13 自动修复正确修复 numpy.trapz → numpy.trapezoid (18:55 UTC)
- **严重度**: 🟢 正面发现
- **描述**: Pipeline N 的 Stage 13 (ITERATIVE_REFINE) 成功检测到 `np.trapz` 错误并:
1. 创建了 `_trapz()` 包装函数
2. 内部使用 `np.trapezoid(y, x)` 替代
3. 同时创建了 `_cumulative_trapezoid_1d()` 辅助函数
- **评估**: 自我修复机制在 numpy API 变更场景中工作良好
### OBS-R5-11: Pipeline Q Stage 09 YAML 解析警告 (18:40 UTC)
- **严重度**: 🟢 低 — 自动恢复
- **描述**: Pipeline Q 的 Stage 09 LLM 返回内容无法直接解析为 YAML
- 返回了 38089 字符的响应,远超预期
- content extraction fallback 正常工作
- **影响**: 无实际影响,pipeline 继续正常运行
### OBS-R5-12: Stage 13 自动修复成功修复所有 numpy 2.x 不兼容 (19:10 UTC)
- **严重度**: 🟢 正面发现
- **描述**: 所有 4 个 Pipeline 的 Stage 13 成功修复了 Stage 12 首次运行失败:
- N: `np.trapz` → `np.trapezoid` (wrapper function) ✅
- O: 1D→2D array reshape 修复 ✅
- P: `np.erfinv` → `scipy.special.erfinv` ✅
- Q: 修复后成功运行 ✅
- **评估**: 自我修复机制可靠,但首次成功率仍可改善
### OBS-R5-13: 所有 4 个 Pipeline 首次 Research Decision 均为 REFINE (19:23-19:49 UTC)
- **严重度**: 🟡 值得关注
- **描述**: 所有 Pipeline 在第一轮实验后都被判定需要 refine
- 这可能意味着:(a) 实验结果不够convincing (b) 系统对首轮结果过于严格
- N、P、Q 在第二轮后仍被 refine → 达到 max refine (2次) → 下次将 forced PROCEED
- O 在第一轮 refine 中
- **影响**: Pipeline 总耗时增加(每次 refine 约增加 15-30 分钟实验时间)
### OBS-R5-14: Pipeline N 首先进入纸写作阶段 (~19:57 UTC)
- **严重度**: 🟢 正面进展
- **描述**: Pipeline N (Marchenko-Pastur) 完成 2 轮 refine,被 forced PROCEED 到 Stage 16
- Stage 14 (RESULT_ANALYSIS) 耗时 553s (~9min)
- Stage 15 decision 耗时 15s
### OBS-R5-15: Pipeline P 完美完成 29/29 stages! (20:13 UTC)
- **严重度**: 🟢🟢🟢 重大正面发现
- **描述**: Pipeline P (SIR/SEIR 流行病学) 是 R5 第一个(也是唯一一个)完美完成的 Pipeline
- 所有 29 个 stage 成功,0 失败
- 完整交付物:paper.tex (539行), references.bib (405行), 5 张图表, code package
- Stage 23 citation verify 成功验证 44 条引用
- LaTeX 编译成功(paper.aux, paper.log 生成)
- 总耗时约 2.4 小时
- **评估**: 这是本项目自 R0 以来第一次有 Pipeline 完整通过所有 29 个 stage
- R0: Pipeline A 29/29 但那是在较旧版本上
- R4: 所有 4 个 Pipeline 在 Stage 20 被拒(2/10 质量分)
- R5: Pipeline P 通过了 Stage 20(degraded 但非 rejected)
### OBS-R5-16: N 和 Q 在 Stage 23 (Citation Verify) 失败 (20:14-20:21 UTC)
- **严重度**: 🟡 中 — 不影响论文本身
- **描述**: N 和 Q 的 Stage 23 因 `references_verified.bib` 缺失而失败
- 错误信息: `Missing or empty output: references_verified.bib`
- Stage 23 耗时 0s — 意味着在验证前就失败了
- Pipeline P 的 Stage 23 成功(11s),说明这不是系统性问题
- **关联**: R5-BUG-04 (见下方)
### OBS-R5-17: Pipeline O 大量 ablation failure (20:20 UTC)
- **严重度**: 🟡 中 — 代码质量问题
- **描述**: Pipeline O (IV estimators) 的 Stage 13 v2 检测到大量 copy-paste ablation 问题
- 8+ 对 conditions 产生完全相同的输出
- 例: `mean_bias_only_jive_evaluation_ablation` ≡ `two_stage_least_squares_wald_baseline`
- 例: `no_instrument_density_geometry_risk_surface_ablation` ≡ `no_leverage_geometry_risk_surface_ablation`
- **关联**: R5-BUG-03, R4-BUG-13 — copy-paste ablation 问题持续存在
### OBS-R5-18: 纸面写作阶段高效 (Stage 16-22)
- **严重度**: 🟢 正面
- **描述**: 所有完成的 Pipeline 在纸面写作阶段均高效运行:
- Stage 16 (PAPER_OUTLINE): 99-119s
- Stage 17 (PAPER_DRAFT): 374-406s (~6-7min)
- Stage 18 (PEER_REVIEW): 72s
- Stage 19 (PAPER_REVISION): 242-277s (~4min)
- Stage 20 (QUALITY_GATE): 9-12s
- Stage 21 (KNOWLEDGE_ARCHIVE): 42-51s
- Stage 22 (EXPORT_PUBLISH): 122-130s (~2min)
- **总计**: 纸面写作 + 导出约 15 分钟
### OBS-R5-19: Pipeline N 论文承认实验失败 (20:14 UTC)
- **严重度**: 🟡 中 — 影响论文质量
- **描述**: Pipeline N 的 paper_draft.md 中写道:
> "the current execution failed before producing any analyzable spectral metrics"
- **分析**: 虽然 Stage 13 成功修复了 numpy 2.x 错误并重新运行了实验,但论文写作阶段可能
没有从修复后的实验结果中获取数据,而是检测到了第一次失败的状态
- **关联**: 可能是 Stage 14 (RESULT_ANALYSIS) 没有正确读取 Stage 13 v2/v3 的结果
---
## 新发现 Bug
### R5-BUG-01: CodeSearcher query_gen.py — LLMClient.chat() 签名不匹配 ✅ 已修复
- **严重度**: 🟡 中 — 不阻塞 pipeline 但降低代码质量
- **文件**: `researchclaw/agents/code_searcher/query_gen.py:149`
- **描述**:
- `llm.chat()` 被调用为 `llm.chat(system=..., user=..., max_tokens=...)`
- 实际签名是 `chat(messages: list[dict], *, system=, max_tokens=)`
- `user` 不是有效参数 → `TypeError`
- 另外代码错误地用 `asyncio.run()` 包装同步方法
- **修复**:
- 改为 `llm.chat([{"role": "user", "content": prompt}], system=..., max_tokens=...)`
- 移除不必要的 `asyncio.run()` 和 `chat_sync` 分支
- **影响**: 修复后 CodeSearcher 可正常使用 LLM 生成搜索查询(仍需 GITHUB_TOKEN)
### R5-BUG-02: 代码生成使用已弃用/不存在的 numpy 2.x API(系统性)
- **严重度**: 🔴 高 — 导致所有实验首次运行失败
- **描述**: gpt-5.4 生成的代码使用了已在 numpy 2.0 中移除的 API:
- `np.trapz` → 应使用 `np.trapezoid` (numpy 2.0 breaking change)
- `np.erfinv` → 从未存在于 numpy,应使用 `scipy.special.erfinv`
- `np.bool` / `np.int` 等 → 已在 numpy 1.24+ 移除
- **根因**: LLM 训练数据包含大量 numpy 1.x 代码,未适应 2.x 变化
- **自动修复**: Stage 13 (ITERATIVE_REFINE) 成功修复了这些问题 ✅
- **建议**: 在代码生成 prompt 中添加 numpy 2.x 兼容性提示,减少首次失败
### R5-BUG-03: Pipeline O copy-paste ablation 检测(已知问题复现)
- **严重度**: 🟡 中
- **描述**: Stage 10 deep quality check 检测到多组近似相同的 ablation 类
- Fuller1 vs Fuller4: 仅超参数不同,方法体相同
- Risk surface baseline vs ablation: 方法签名和体积完全相同
- **关联**: R4-BUG-13 (BUG-13 copy-paste ablation) — 该问题跨轮次持续存在
- **建议**: 需要在代码生成阶段强化 ablation 差异性检查
### R5-BUG-04: Stage 23 Citation Verify — references_verified.bib 缺失 ✅ FIXED
- **严重度**: 🔴 高 — 3/4 Pipeline 受影响
- **描述**: N、O 和 Q 在 Stage 23 因 `references_verified.bib` 未生成而失败
- 错误: `Missing or empty output: references_verified.bib`
- Stage 23 耗时 0s,说明在输出验证前就失败了
- Pipeline P 的 Stage 23 成功(11s),同一引用验证逻辑正常工作
- **根因分析**:
- Stage 23 在无引用时正确写入空的 `references_verified.bib`(executor.py L9082)
- 但 contract validation(executor.py L9351)拒绝 `st_size == 0` 的文件
- Pipeline P 有 19KB 的 references.bib → 验证后非空 → 通过
- N/O/Q 无引用 → Stage 23 写空文件 → 被 contract validation 拒绝
- **修复**: 将空文件改为写入 BibTeX 注释 `% No references to verify\n`(executor.py L9085-9086)
- 文件非空,通过 contract validation,同时语义上表示"无引用"
### R5-BUG-05: 论文未使用修复后的实验结果 ✅ FIXED
- **严重度**: 🔴 高 — 影响论文科学价值
- **描述**: Pipeline N/Q 的论文包含 "quality 2/10" 警告,声称实验失败
但 Stage 13 成功修复了 numpy 错误并产生了完整的实验结果(论文表格中实际包含真实数据)
- **根因分析**: Stage 14 LLM analysis 在所有三次 refine 迭代中均给出 2/10(包括最新的非版本化 stage-14),
而 BUG-23 guard(executor.py L7184)在 `_analysis_rating <= 2` 时强制 `has_real_metrics = False`,
即使 `_collect_raw_experiment_metrics()` 已成功从 Stage 13 stdout 解析出真实指标
- **注**: `_read_prior_artifact` 排序是正确的 — 非版本化目录确实是最新的(rollback 时旧目录会被重命名为 `_vN`)
- **修复**: 在 BUG-23 guard 中增加 `not _has_parsed_metrics` 条件(executor.py L7187)
- 当 Stage 13 refinement 产生了可解析的真实指标时,不再被 analysis rating 覆盖
- 同时保留了原始 BUG-23 防护:在确实没有真实指标时仍会触发
---
## 总结
### 整体评价
R5 是目前最成功的测试轮次:
| 指标 | R4 (feat/universal-codegen) | R5 (main) |
|------|---------------------------|-----------|
| 完美通过 (29/29) | 0/4 | **1/4 (Pipeline P)** |
| 近完美 (28/29) | 0/4 | **3/4 (N, O, Q)** |
| Stage 20 通过 | 0/4 (all rejected 2/10) | **4/4 (all degraded/pass)** |
| 崩溃/严重失败 | 1/4 (Pipeline K crash) | **0/4** |
| 平均完成阶段 | ~25/29 | **28.75/29** |
| 平均耗时 | ~3.5h | **~2.6h** |
### 关键改进
1. **Stage 20 Quality Gate 不再阻塞**: R4 中所有 Pipeline 被 2/10 拒绝,R5 全部通过
2. **自我修复能力可靠**: Stage 13 成功修复了所有 numpy 2.x API 不兼容问题
3. **跨领域能力验证**: 物理、经济学、流行病学、数值分析 4 个不同领域均可完成
4. **无崩溃**: 4/4 Pipeline 全部正常完成,无任何进程级崩溃
### 关键问题(全部已修复)
1. ✅ **R5-BUG-05**: BUG-23 guard 过度激进 → 论文声称实验失败
2. ✅ **R5-BUG-04**: Stage 23 写入空 bib 文件被 contract validation 拒绝 → 3/4 失败
3. ✅ **R5-BUG-01**: CodeSearcher query_gen.py 签名不匹配
4. ✅ **R5-BUG-02**: 代码生成使用已弃用 numpy 2.x API — 已在 7 个 prompt 中添加兼容性警告
5. ✅ **R5-BUG-03**: copy-paste ablation — 新增 <1% 近似检测 + prompt 强化
6. ✅ **R5-BUG-06**: LaTeX microtype 字体错误 — 已添加 `\usepackage{lmodern}`
### R5-BUG-06: LaTeX 编译失败 — pdfTeX font expansion 错误 ✅ FIXED
- **严重度**: 🟡 中
- **描述**: Pipeline Q 的 paper.tex 编译失败
```
pdfTeX error (font expansion): auto expansion is only possible with scalable
Fatal error occurred, no output PDF file produced!
```
- **根因**: `\usepackage[T1]{fontenc}` 激活了 T1 编码,但未加载可缩放字体(lmodern)
- **修复**: 在 `researchclaw/templates/conference.py` 的 NEURIPS_2024、NEURIPS_2025、GENERIC 三个模板中
在 `fontenc` 之后添加 `\usepackage{lmodern}`
### R5-BUG-02: numpy 2.x API 不兼容 ✅ FIXED
- **修复范围**: 在以下 7 个 prompt 位置添加了 numpy 2.x 兼容性警告
- `prompts.default.yaml` (legacy code_generation)
- `prompts.py`: architecture_planning, generate_single_file, code_repair, iterative_improve, iterative_repair, code_exec_fix
### R5-BUG-03: copy-paste ablation ✅ IMPROVED
- **修复**: executor.py 新增 P8 近似检测(<1% relative diff → warning),补充了原有的精确匹配检测
- **注**: prompt 中已有 Rule 9 (ABLATION DIFFERENTIATION) 和 Rule 8 (METHOD RICHNESS) 的引导
### 后续排查结论
- **`_read_prior_artifact` 排序**: ✅ 确认正确 — 非版本化目录确实是最新的(rollback 重命名旧目录为 `_vN`)
- **Stage 14 quality rating 问题**: 所有 3 次 refine 迭代的 Stage 14 均给出 2/10 → 这是 LLM 分析偏保守的问题,
但 BUG-05 的修复已绕过该问题(信任实际解析出的指标)
### 交付物检查
| Pipeline | paper.tex | references.bib | charts | code | LaTeX编译 |
|----------|-----------|---------------|--------|------|----------|
| N | ✅ | ❌ (S23 fail) | ✅ | ✅ | 未检查 |
| O | ✅ | ❌ (S23 fail) | ✅ | ✅ | 未检查 |
| P | ✅ (539行) | ✅ (405行) | ✅ (5张) | ✅ | ✅ |
| Q | ✅ | ❌ (S23 fail) | ✅ | ✅ | 未检查 |
### Pipeline 时间分布(以 Pipeline P 为例)
| 阶段 | 耗时 | 说明 |
|------|------|------|
| S1-S9 (研究+设计) | ~20min | 含 429 限流延迟 |
| S10 (代码生成) | ~47min | 最重的 LLM 阶段,3 次 attempt |
| S11 (资源规划) | ~14s | |
| S12-S13 (实验+修复) | ~15min | 首次失败 + 自动修复 + 重运行 × 2轮 refine |
| S14-S15 (分析+决策) | ~10min | 含 2 轮 refine 循环 |
| S16-S22 (论文写作+导出) | ~15min | |
| S23 (引用验证) | ~11s | |
| **总计** | **~2.4h** | |
================================================
FILE: docs/README_AR.md
================================================
<p align="center">
<img src="../image/logo.png" width="700" alt="AutoResearchClaw Logo">
</p>
<h2 align="center"><b>شارك فكرة. احصل على ورقة بحثية. مؤتمت بالكامل & ذاتي التطور.</b></h2>
<p align="center">
<b><i><font size="5">تحدث مع <a href="#-تكامل-openclaw">OpenClaw</a>: «ابحث عن X» → تمّ.</font></i></b>
</p>
<p align="center">
<img src="../image/framework_v2.png" width="100%" alt="AutoResearchClaw Framework">
</p>
<p align="center">
<a href="../LICENSE"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="MIT License"></a>
<a href="https://python.org"><img src="https://img.shields.io/badge/Python-3.11%2B-3776AB?logo=python&logoColor=white" alt="Python 3.11+"></a>
<a href="#الاختبار"><img src="https://img.shields.io/badge/Tests-1823%20passed-brightgreen?logo=pytest&logoColor=white" alt="1823 Tests Passed"></a>
<a href="https://github.com/aiming-lab/AutoResearchClaw"><img src="https://img.shields.io/badge/GitHub-AutoResearchClaw-181717?logo=github" alt="GitHub"></a>
<a href="#-تكامل-openclaw"><img src="https://img.shields.io/badge/OpenClaw-Compatible-ff4444?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTEyIDJDNi40OCAyIDIgNi40OCAyIDEyczQuNDggMTAgMTAgMTAgMTAtNC40OCAxMC0xMFMxNy41MiAyIDEyIDJ6IiBmaWxsPSJ3aGl0ZSIvPjwvc3ZnPg==" alt="OpenClaw Compatible"></a>
<a href="https://discord.gg/u4ksqW5P"><img src="https://img.shields.io/badge/Discord-Join%20Community-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
</p>
<p align="center">
<a href="../README.md">🇺🇸 English</a> ·
<a href="README_CN.md">🇨🇳 中文</a> ·
<a href="README_JA.md">🇯🇵 日本語</a> ·
<a href="README_KO.md">🇰🇷 한국어</a> ·
<a href="README_FR.md">🇫🇷 Français</a> ·
<a href="README_DE.md">🇩🇪 Deutsch</a> ·
<a href="README_ES.md">🇪🇸 Español</a> ·
<a href="README_PT.md">🇧🇷 Português</a> ·
<a href="README_RU.md">🇷🇺 Русский</a> ·
<a href="README_AR.md">🇸🇦 العربية</a>
</p>
<p align="center">
<a href="showcase/SHOWCASE.md">🏆 معرض الأوراق</a> · <a href="integration-guide.md">📖 دليل التكامل</a> · <a href="https://discord.gg/u4ksqW5P">💬 مجتمع Discord</a>
</p>
---
<table>
<tr>
<td width="18%">
<a href="showcase/SHOWCASE.md"><img src="showcase/thumbnails/paper_I_random_matrix-01.png" width="120" alt="ورقة نموذجية"/></a>
</td>
<td valign="middle">
<b>🏆 معرض الأوراق المُولّدة</b><br><br>
<b>8 أوراق في 8 مجالات</b> — الرياضيات، الإحصاء، الأحياء، الحوسبة، NLP، RL، الرؤية الحاسوبية، المتانة — مُولّدة بشكل مستقل تماماً بدون تدخل بشري.<br><br>
<a href="showcase/SHOWCASE.md"><img src="https://img.shields.io/badge/عرض_المعرض_الكامل_→-جميع_الأوراق_الـ8-d73a49?style=for-the-badge" alt="عرض المعرض"></a>
</td>
</tr>
</table>
---
> **🧪 نبحث عن مختبرين!** جرّب خط الأنابيب بفكرتك البحثية الخاصة — من أي مجال — و[أخبرنا برأيك](TESTER_GUIDE.md). ملاحظاتك تشكّل الإصدار القادم مباشرة. **[→ Testing Guide](TESTER_GUIDE.md)** | **[→ 中文测试指南](TESTER_GUIDE_CN.md)** | **[→ 日本語テストガイド](TESTER_GUIDE_JA.md)**
---
## 🔥 News
- **[03/22/2026]** [v0.3.2](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.2) — **دعم متعدد المنصات + استقرار كبير** — يعمل AutoResearchClaw الآن مع أي وكيل متوافق مع ACP (Claude Code، Codex CLI، Copilot CLI، Gemini CLI، Kimi CLI) ويدعم منصات المراسلة (Discord، Telegram، Lark، WeChat) عبر جسر OpenClaw. واجهة خلفية جديدة لتوليد الكود عبر CLI-agent تفوّض المرحلتين 10 و13 لوكلاء CLI خارجيين مع التحكم في الميزانية وإدارة المهلة الزمنية. يتضمن نظام مكافحة التلفيق (VerifiedRegistry + حلقة تشخيص وإصلاح التجارب)، 100+ إصلاح أخطاء، إعادة هيكلة modular executor، كشف تلقائي لـ `--resume`، تعزيز إعادة محاولات LLM، وإصلاحات المجتمع.
- **[03/18/2026]** [v0.3.1](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.1) — **OpenCode Beast Mode + Community Contributions** — New "Beast Mode" routes complex code generation to [OpenCode](https://github.com/anomalyco/opencode) with automatic complexity scoring and graceful fallback. Added Novita AI provider support, thread-safety hardening, improved LLM output parsing robustness, and 20+ bug fixes from community PRs and internal audit.
- **[03/17/2026]** [v0.3.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.0) — **MetaClaw Integration** — AutoResearchClaw now supports [MetaClaw](https://github.com/aiming-lab/MetaClaw) cross-run learning: pipeline failures → structured lessons → reusable skills, injected into all 23 stages. **+18.3%** robustness in controlled experiments. Opt-in (`metaclaw_bridge.enabled: true`), fully backward-compatible. See [Integration Guide](#-metaclaw-integration).
- **[03/16/2026]** [v0.2.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.2.0) — Three multi-agent subsystems (CodeAgent, BenchmarkAgent, FigureAgent), hardened Docker sandbox with network-policy-aware execution, 4-round paper quality audit (AI-slop detection, 7-dim review scoring, NeurIPS checklist), and 15+ bug fixes from production runs.
- **[03/15/2026]** [v0.1.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.1.0) — We release AutoResearchClaw: a fully autonomous 23-stage research pipeline that turns a single research idea into a conference-ready paper. No human intervention required.
---
## ⚡ أمر واحد. ورقة واحدة.
```bash
pip install -e . && researchclaw setup && researchclaw init && researchclaw run --topic "Your research idea here" --auto-approve
```
---
## 🤔 ما هذا؟
**أنت تفكر. AutoResearchClaw يكتب.**
أعطِ موضوعاً بحثياً — احصل على ورقة أكاديمية كاملة مع أدبيات حقيقية من OpenAlex و Semantic Scholar و arXiv، وتجارب في بيئة معزولة واعية بالعتاد (كشف تلقائي لـ GPU/MPS/CPU)، وتحليل إحصائي، ومراجعة أقران متعددة الوكلاء، و LaTeX جاهز للمؤتمرات يستهدف NeurIPS/ICML/ICLR. بدون مراقبة. بدون نسخ ولصق. بدون مراجع مُلفّقة.
<table>
<tr><td>📄</td><td><code>paper_draft.md</code></td><td>ورقة أكاديمية كاملة (مقدمة، أعمال سابقة، المنهجية، التجارب، النتائج، الخاتمة)</td></tr>
<tr><td>📐</td><td><code>paper.tex</code></td><td>LaTeX جاهز للمؤتمرات (قوالب NeurIPS / ICLR / ICML)</td></tr>
<tr><td>📚</td><td><code>references.bib</code></td><td>مراجع BibTeX حقيقية من OpenAlex و Semantic Scholar و arXiv — مُنقّحة تلقائياً لمطابقة الاستشهادات المضمّنة</td></tr>
<tr><td>🔍</td><td><code>verification_report.json</code></td><td>تحقق من سلامة الاستشهادات على 4 طبقات + التحقق من الصلة (arXiv، CrossRef، DataCite، LLM)</td></tr>
<tr><td>🧪</td><td><code>experiment runs/</code></td><td>كود مُولّد + نتائج البيئة المعزولة + مقاييس JSON منظمة</td></tr>
<tr><td>📊</td><td><code>charts/</code></td><td>رسوم بيانية مُولّدة تلقائياً لمقارنة الظروف مع أشرطة الخطأ وفترات الثقة</td></tr>
<tr><td>📝</td><td><code>reviews.md</code></td><td>مراجعة أقران متعددة الوكلاء مع فحص اتساق المنهجية والأدلة</td></tr>
<tr><td>🧬</td><td><code>evolution/</code></td><td>دروس تعلّم ذاتي مستخلصة من كل تشغيل</td></tr>
<tr><td>📦</td><td><code>deliverables/</code></td><td>جميع المخرجات النهائية في مجلد واحد — جاهزة للترجمة على Overleaf</td></tr>
</table>
يعمل خط الأنابيب **من البداية إلى النهاية بدون تدخل بشري**. عندما تفشل التجارب، يصلح نفسه. عندما لا تصمد الفرضيات، يغيّر المسار. عندما تكون الاستشهادات مُلفّقة، يزيلها.
🌍 **شغّله من أي مكان.** AutoResearchClaw ليس مقيّدًا بمنصة واحدة. استخدمه مستقلاً عبر CLI، أو وصّله بـ [OpenClaw](https://github.com/openclaw/openclaw)، أو ادمجه مع أي وكيل متوافق مع ACP — 🤖 Claude Code، 💻 Codex CLI، 🐙 Copilot CLI، ♊ Gemini CLI، 🌙 Kimi CLI، وغيرها. بفضل جسر الرسائل في OpenClaw، يمكنك إطلاق بحث كامل من 💬 Discord، ✈️ Telegram، 🐦 Lark (飞书)، 💚 WeChat، أو أي منصة يستخدمها فريقك بالفعل. موضوع واحد كمُدخل، ورقة بحثية كمُخرج — بغض النظر عن المكان الذي تكتب منه.
---
## 🚀 البداية السريعة
```bash
# 1. استنساخ وتثبيت
git clone https://github.com/aiming-lab/AutoResearchClaw.git
cd AutoResearchClaw
python3 -m venv .venv && source .venv/bin/activate
pip install -e .
# 2. الإعداد (تفاعلي — يثبّت OpenCode beast mode، يتحقق من Docker/LaTeX)
researchclaw setup
# 3. التهيئة
researchclaw init # تفاعلي: اختر مزوّد LLM، ينشئ config.arc.yaml
# أو يدوياً: cp config.researchclaw.example.yaml config.arc.yaml
# 4. التشغيل
export OPENAI_API_KEY="sk-..."
researchclaw run --config config.arc.yaml --topic "Your research idea" --auto-approve
```
المخرجات → `artifacts/rc-YYYYMMDD-HHMMSS-<hash>/deliverables/` — LaTeX و BibTeX وكود التجارب والرسوم البيانية جاهزة للترجمة.
<details>
<summary>📝 الحد الأدنى من التهيئة المطلوبة</summary>
```yaml
project:
name: "my-research"
research:
topic: "Your research topic here"
llm:
base_url: "https://api.openai.com/v1"
api_key_env: "OPENAI_API_KEY"
primary_model: "gpt-4o"
fallback_models: ["gpt-4o-mini"]
experiment:
mode: "sandbox"
sandbox:
python_path: ".venv/bin/python"
```
</details>
---
## 🧠 ما الذي يميّزه
| القدرة | كيف يعمل |
|-----------|-------------|
| **🔄 حلقة PIVOT / REFINE** | المرحلة 15 تقرر بشكل مستقل: PROCEED أو REFINE (تعديل المعاملات) أو PIVOT (اتجاه جديد). المخرجات تُحفظ بإصدارات تلقائياً. |
| **🤖 نقاش متعدد الوكلاء** | توليد الفرضيات وتحليل النتائج ومراجعة الأقران تستخدم نقاشاً منظماً بوجهات نظر متعددة. |
| **🧬 التعلّم الذاتي** | دروس مستخلصة من كل تشغيل (مبررات القرارات، تحذيرات وقت التشغيل، شذوذ المقاييس) مع تناقص زمني بنصف عمر 30 يوماً. التشغيلات المستقبلية تتعلم من الأخطاء السابقة. |
| **📚 قاعدة المعرفة** | كل تشغيل يبني قاعدة معرفة منظمة عبر 6 فئات (قرارات، تجارب، اكتشافات، أدبيات، أسئلة، مراجعات). |
| **🛡️ الحارس المراقب Sentinel** | مراقب جودة في الخلفية: كشف NaN/Inf، اتساق الورقة والأدلة، تقييم صلة الاستشهادات، حماية ضد التلفيق. |
---
## 🦞 تكامل OpenClaw
<table>
<tr>
**AutoResearchClaw هو خدمة متوافقة مع [OpenClaw](https://github.com/openclaw/openclaw).** قم بتثبيته في OpenClaw وابدأ بحثاً مستقلاً برسالة واحدة — أو استخدمه بشكل مستقل عبر سطر الأوامر أو Claude Code أو أي مساعد برمجة بالذكاء الاصطناعي.
</tr>
</table>
### 🚀 الاستخدام مع OpenClaw (موصى به)
إذا كنت تستخدم [OpenClaw](https://github.com/openclaw/openclaw) بالفعل كمساعد ذكاء اصطناعي:
```
1️⃣ شارك رابط مستودع GitHub مع OpenClaw
2️⃣ OpenClaw يقرأ تلقائياً RESEARCHCLAW_AGENTS.md → يفهم خط الأنابيب
3️⃣ قل: "ابحث عن [موضوعك]"
4️⃣ تم — OpenClaw يستنسخ، يثبّت، يهيّئ، يشغّل، ويعيد النتائج
```
**هذا كل شيء.** يتعامل OpenClaw مع `git clone`، `pip install`، إعداد التهيئة، وتنفيذ خط الأنابيب تلقائياً. أنت فقط تتحدث.
<details>
<summary>💡 ماذا يحدث خلف الكواليس</summary>
1. يقرأ OpenClaw ملف `RESEARCHCLAW_AGENTS.md` → يتعلم دور منسّق البحث
2. يقرأ OpenClaw ملف `README.md` → يفهم التثبيت وبنية خط الأنابيب
3. يقرأ OpenClaw ملف `config.researchclaw.example.yaml` → `config.yaml`
4. يسأل عن مفتاح API لنموذج اللغة (أو يستخدم متغير البيئة)
5. يشغّل `pip install -e .` + `researchclaw run --topic "..." --auto-approve`
6. يعيد الورقة و LaTeX والتجارب والاستشهادات
</details>
### 🔌 جسر OpenClaw (متقدم)
للتكامل الأعمق، يتضمن AutoResearchClaw **نظام محوّلات جسر** مع 6 إمكانيات اختيارية:
```yaml
# config.arc.yaml
openclaw_bridge:
use_cron: true # ⏰ عمليات تشغيل بحث مجدولة
use_message: true # 💬 إشعارات التقدم (Discord/Slack/Telegram)
use_memory: true # 🧠 استمرارية المعرفة عبر الجلسات
use_sessions_spawn: true # 🔀 إطلاق جلسات فرعية متوازية للمراحل المتزامنة
use_web_fetch: true # 🌐 بحث ويب مباشر أثناء مراجعة الأدبيات
use_browser: false # 🖥️ جمع الأوراق عبر المتصفح
```
كل علامة تفعّل بروتوكول محوّل مُحدد النوع. عندما يوفر OpenClaw هذه الإمكانيات، تستهلكها المحوّلات بدون تغييرات في الكود. راجع [`integration-guide.md`](integration-guide.md) للتفاصيل الكاملة.
### ACP (Agent Client Protocol)
يمكن لـ AutoResearchClaw استخدام **أي وكيل برمجة متوافق مع ACP** كواجهة خلفية لنموذج اللغة — بدون الحاجة لمفاتيح API. يتواصل الوكيل عبر [acpx](https://github.com/openclaw/acpx)، ويحافظ على جلسة واحدة مستمرة عبر جميع مراحل خط الأنابيب الـ 23.
| الوكيل | الأمر | ملاحظات |
|-------|---------|-------|
| Claude Code | `claude` | Anthropic |
| Codex CLI | `codex` | OpenAI |
| Copilot CLI | `gh` | GitHub |
| Gemini CLI | `gemini` | Google |
| OpenCode | `opencode` | SST |
| Kimi CLI | `kimi` | Moonshot |
```yaml
# config.yaml — مثال ACP
llm:
provider: "acp"
acp:
agent: "claude" # أي أمر CLI لوكيل متوافق مع ACP
cwd: "." # دليل العمل للوكيل
# لا حاجة لـ base_url أو api_key — الوكيل يدير مصادقته بنفسه.
```
```bash
# فقط شغّل — الوكيل يستخدم بيانات اعتماده الخاصة
researchclaw run --config config.yaml --topic "Your research idea" --auto-approve
```
### 🛠️ طرق أخرى للتشغيل
| الطريقة | الكيفية |
|--------|-----|
| **سطر أوامر مستقل** | `researchclaw setup` → `researchclaw init` → `researchclaw run --topic "..." --auto-approve` |
| **واجهة Python البرمجية** | `from researchclaw.pipeline import Runner; Runner(config).run()` |
| **Claude Code** | يقرأ `RESEARCHCLAW_CLAUDE.md` — فقط قل *"شغّل بحثاً عن [موضوع]"* |
| **Copilot CLI** | `researchclaw run --topic "..."` مع `llm.acp.agent: "gh"` |
| **OpenCode** | يقرأ `.claude/skills/` — نفس واجهة اللغة الطبيعية |
| **أي واجهة ذكاء اصطناعي** | قدّم `RESEARCHCLAW_AGENTS.md` كسياق → الوكيل يبدأ تلقائياً |
---
## 🔬 خط الأنابيب: 23 مرحلة، 8 أطوار
```
Phase A: تحديد نطاق البحث Phase E: تنفيذ التجارب
1. TOPIC_INIT 12. EXPERIMENT_RUN
2. PROBLEM_DECOMPOSE 13. ITERATIVE_REFINE ← إصلاح ذاتي
Phase B: اكتشاف الأدبيات Phase F: التحليل والقرار
3. SEARCH_STRATEGY 14. RESULT_ANALYSIS ← متعدد الوكلاء
4. LITERATURE_COLLECT ← API حقيقي 15. RESEARCH_DECISION ← PIVOT/REFINE
5. LITERATURE_SCREEN [بوابة]
6. KNOWLEDGE_EXTRACT Phase G: كتابة الورقة
16. PAPER_OUTLINE
Phase C: توليف المعرفة 17. PAPER_DRAFT
7. SYNTHESIS 18. PEER_REVIEW ← فحص الأدلة
8. HYPOTHESIS_GEN ← نقاش 19. PAPER_REVISION
Phase D: تصميم التجارب Phase H: الإنهاء
9. EXPERIMENT_DESIGN [بوابة] 20. QUALITY_GATE [بوابة]
10. CODE_GENERATION 21. KNOWLEDGE_ARCHIVE
11. RESOURCE_PLANNING 22. EXPORT_PUBLISH ← LaTeX
23. CITATION_VERIFY ← فحص الصلة
```
> **مراحل البوابات** (5، 9، 20) تتوقف للحصول على موافقة بشرية أو موافقة تلقائية مع `--auto-approve`. عند الرفض، يعود خط الأنابيب للخلف.
> **حلقات القرار**: يمكن للمرحلة 15 تفعيل REFINE (→ المرحلة 13) أو PIVOT (→ المرحلة 8)، مع إصدار تلقائي للمخرجات.
<details>
<summary>📋 ماذا يفعل كل طور</summary>
| الطور | ما يحدث |
|-------|-------------|
| **A: تحديد النطاق** | يفكك نموذج اللغة الموضوع إلى شجرة مشاكل منظمة مع أسئلة بحثية |
| **A+: العتاد** | كشف تلقائي لـ GPU (NVIDIA CUDA / Apple MPS / CPU فقط)، تحذير إذا كان العتاد المحلي محدوداً، تكييف توليد الكود وفقاً لذلك |
| **B: الأدبيات** | بحث متعدد المصادر (OpenAlex → Semantic Scholar → arXiv) عن أوراق حقيقية، فرز حسب الصلة، استخلاص بطاقات معرفية |
| **C: التوليف** | تجميع النتائج، تحديد فجوات البحث، توليد فرضيات قابلة للاختبار عبر نقاش متعدد الوكلاء |
| **D: التصميم** | تصميم خطة التجارب، توليد كود Python قابل للتشغيل واعٍ بالعتاد (مستوى GPU → اختيار الحزم)، تقدير احتياجات الموارد |
| **E: التنفيذ** | تشغيل التجارب في بيئة معزولة، كشف NaN/Inf وأخطاء وقت التشغيل، إصلاح ذاتي للكود عبر إصلاح مُستهدف بنموذج اللغة |
| **F: التحليل** | تحليل متعدد الوكلاء للنتائج؛ قرار مستقل PROCEED / REFINE / PIVOT مع المبررات |
| **G: الكتابة** | مخطط → صياغة قسم بقسم (5,000-6,500 كلمة) → مراجعات أقران (مع اتساق المنهجية والأدلة) → مراجعة مع حماية الطول |
| **H: الإنهاء** | بوابة جودة، أرشفة المعرفة، تصدير LaTeX مع قالب المؤتمر، التحقق من سلامة الاستشهادات + الصلة |
</details>
---
## ✨ الميزات الرئيسية
| الميزة | الوصف |
|---------|------------|
| **📚 أدبيات متعددة المصادر** | أوراق حقيقية من OpenAlex و Semantic Scholar و arXiv — توسيع الاستعلام، إزالة التكرار، قاطع دائرة مع تدهور أنيق |
| **🔍 تحقق من الاستشهادات على 4 طبقات** | فحص arXiv ID → CrossRef/DataCite DOI → مطابقة عنوان Semantic Scholar → تقييم صلة LLM. المراجع المُلفّقة تُزال تلقائياً. |
| **🖥️ تنفيذ واعٍ بالعتاد** | كشف تلقائي لـ GPU (NVIDIA CUDA / Apple MPS / CPU فقط) مع تكييف توليد الكود والاستيرادات ونطاق التجارب |
| **🦾 OpenCode Beast Mode** | التجارب المعقدة تُوجّه تلقائياً إلى [OpenCode](https://github.com/anomalyco/opencode) — يولّد مشاريع متعددة الملفات مع بنى مخصصة وحلقات تدريب ودراسات استئصال. التثبيت عبر `researchclaw setup`. |
| **🧪 تجارب في بيئة معزولة** | كود مُتحقق بـ AST، إطار غير قابل للتعديل، فشل سريع عند NaN/Inf، إصلاح ذاتي، تحسين تكراري (حتى 10 جولات)، التقاط نتائج جزئية |
| **📝 كتابة بمستوى المؤتمرات** | قوالب NeurIPS/ICML/ICLR، صياغة قسم بقسم (5,000-6,500 كلمة)، حماية ضد التلفيق، حماية طول المراجعة، فرض مضاد لإخلاءات المسؤولية |
| **📐 تبديل القوالب** | `neurips_2025`، `iclr_2026`، `icml_2026` — Markdown → LaTeX مع رياضيات وجداول وأشكال ومراجع تبادلية و `\cite{}` |
| **🚦 بوابات الجودة** | 3 بوابات بمشاركة بشرية (المراحل 5، 9، 20) مع إمكانية التراجع. تخطّ باستخدام `--auto-approve`. |
---
## 🧠 تكامل MetaClaw
**AutoResearchClaw + [MetaClaw](https://github.com/aiming-lab/MetaClaw) = خط أنابيب يتعلم من كل تشغيل.**
يضيف MetaClaw **نقل المعرفة عبر التشغيلات** إلى AutoResearchClaw. عند التفعيل، يلتقط خط الأنابيب تلقائياً الدروس من الإخفاقات والتحذيرات، ويحوّلها إلى مهارات قابلة لإعادة الاستخدام، ويحقنها في جميع مراحل خط الأنابيب الـ 23 في التشغيلات اللاحقة — بحيث لا تتكرر نفس الأخطاء أبداً.
### كيف يعمل
```
Run N ينفّذ → الإخفاقات/التحذيرات تُلتقط كـ Lessons
↓
MetaClaw Lesson → تحويل إلى Skill
↓
ملفات arc-* Skill تُخزّن في ~/.metaclaw/skills/
↓
Run N+1 → build_overlay() يحقن المهارات في كل أمر LLM
↓
LLM يتجنب المزالق المعروفة → جودة أعلى، محاولات أقل
```
### الإعداد السريع
```bash
# 1. تثبيت MetaClaw (إذا لم يكن مُثبّتاً)
pip install metaclaw
# 2. التفعيل في التهيئة
```
```yaml
# config.arc.yaml
metaclaw_bridge:
enabled: true
proxy_url: "http://localhost:30000" # وكيل MetaClaw (اختياري)
skills_dir: "~/.metaclaw/skills" # أين تُخزّن المهارات
fallback_url: "https://api.openai.com/v1" # بديل LLM مباشر
fallback_api_key: "" # مفتاح API لعنوان البديل
lesson_to_skill:
enabled: true
min_severity: "warning" # تحويل التحذيرات + الأخطاء
max_skills_per_run: 3
```
```bash
# 3. شغّل كالمعتاد — MetaClaw يعمل بشفافية
researchclaw run --config config.arc.yaml --topic "Your idea" --auto-approve
```
بعد كل تشغيل، تحقق من `~/.metaclaw/skills/arc-*/SKILL.md` لمشاهدة المهارات التي تعلّمها خط أنابيبك.
### نتائج التجارب
في تجارب A/B مُحكمة (نفس الموضوع، نفس LLM، نفس التهيئة):
| المقياس | خط الأساس | مع MetaClaw | التحسين |
|---------|----------|---------------|----------|
| معدل إعادة المحاولة لكل مرحلة | 10.5% | 7.9% | **-24.8%** |
| عدد دورات REFINE | 2.0 | 1.2 | **-40.0%** |
| إكمال مراحل خط الأنابيب | 18/19 | 19/19 | **+5.3%** |
| درجة المتانة الإجمالية (مركّبة) | 0.714 | 0.845 | **+18.3%** |
> درجة المتانة المركّبة هي متوسط مرجّح لمعدل إكمال المراحل (40%) وتقليل المحاولات (30%) وكفاءة دورات REFINE (30%).
### التوافق العكسي
- **الافتراضي: مُعطّل.** إذا كان `metaclaw_bridge` غائباً أو `enabled: false`، يعمل خط الأنابيب تماماً كما كان.
- **بدون تبعيات جديدة.** MetaClaw اختياري — خط الأنابيب الأساسي يعمل بدونه.
- **جميع الاختبارات الـ 1,823 الحالية تنجح** مع وجود كود التكامل.
---
## ⚙️ مرجع التهيئة
<details>
<summary>انقر لتوسيع مرجع التهيئة الكامل</summary>
```yaml
# === المشروع ===
project:
name: "my-research" # معرّف المشروع
mode: "docs-first" # docs-first | semi-auto | full-auto
# === البحث ===
research:
topic: "..." # موضوع البحث (مطلوب)
domains: ["ml", "nlp"] # مجالات البحث للبحث في الأدبيات
daily_paper_count: 8 # عدد الأوراق المستهدف لكل استعلام بحث
quality_threshold: 4.0 # الحد الأدنى لدرجة جودة الأوراق
# === وقت التشغيل ===
runtime:
timezone: "America/New_York" # للطوابع الزمنية
max_parallel_tasks: 3 # حد التجارب المتزامنة
approval_timeout_hours: 12 # مهلة مرحلة البوابة
retry_limit: 2 # عدد إعادة المحاولة عند فشل المرحلة
# === نموذج اللغة ===
llm:
provider: "openai-compatible" # openai | openrouter | deepseek | minimax | acp | openai-compatible
base_url: "https://..." # نقطة نهاية API (مطلوب لـ openai-compatible)
api_key_env: "OPENAI_API_KEY" # متغير بيئة لمفتاح API (مطلوب لـ openai-compatible)
api_key: "" # أو ضع المفتاح هنا مباشرة
primary_model: "gpt-4o" # النموذج الأساسي
fallback_models: ["gpt-4o-mini"] # سلسلة النماذج الاحتياطية
s2_api_key: "" # مفتاح Semantic Scholar API (اختياري، حدود معدل أعلى)
acp: # يُستخدم فقط عند provider: "acp"
agent: "claude" # أمر CLI لوكيل ACP (claude، codex، gemini، إلخ)
cwd: "." # دليل العمل للوكيل
# === التجارب ===
experiment:
mode: "sandbox" # simulated | sandbox | docker | ssh_remote
time_budget_sec: 300 # أقصى وقت تنفيذ لكل تشغيل (الافتراضي: 300 ثانية)
max_iterations: 10 # أقصى عدد تكرارات التحسين
metric_key: "val_loss" # اسم المقياس الأساسي
metric_direction: "minimize" # minimize | maximize
sandbox:
python_path: ".venv/bin/python"
gpu_required: false
allowed_imports: [math, random, json, csv, numpy, torch, sklearn]
max_memory_mb: 4096
docker:
image: "researchclaw/experiment:latest"
network_policy: "setup_only" # none | setup_only | pip_only | full
gpu_enabled: true
memory_limit_mb: 8192
auto_install_deps: true # كشف تلقائي للاستيراد → requirements.txt
ssh_remote:
host: "" # اسم مضيف خادم GPU
gpu_ids: [] # معرّفات GPU المتاحة
remote_workdir: "/tmp/researchclaw_experiments"
opencode: # OpenCode Beast Mode (يُثبّت تلقائياً عبر `researchclaw setup`)
enabled: true # المفتاح الرئيسي (الافتراضي: true)
auto: true # تشغيل تلقائي بدون تأكيد (الافتراضي: true)
complexity_threshold: 0.2 # 0.0-1.0 — أعلى = فقط للتجارب المعقدة
model: "" # تجاوز النموذج (فارغ = يستخدم llm.primary_model)
timeout_sec: 600 # أقصى ثوانٍ لتوليد OpenCode
max_retries: 1 # عدد المحاولات عند الفشل
workspace_cleanup: true # حذف مساحة العمل المؤقتة بعد الجمع
# === التصدير ===
export:
target_conference: "neurips_2025" # neurips_2025 | iclr_2026 | icml_2026
authors: "Anonymous"
bib_file: "references"
# === الأوامر النصية ===
prompts:
custom_file: "" # مسار ملف YAML للأوامر المخصصة (فارغ = الافتراضي)
# === الأمان ===
security:
hitl_required_stages: [5, 9, 20] # المراحل التي تتطلب موافقة بشرية
allow_publish_without_approval: false
redact_sensitive_logs: true
# === قاعدة المعرفة ===
knowledge_base:
backend: "markdown" # markdown | obsidian
root: "docs/kb"
# === الإشعارات ===
notifications:
channel: "console" # console | discord | slack
target: ""
# === جسر MetaClaw (اختياري) ===
metaclaw_bridge:
enabled: false # اضبط على true لتفعيل التعلم عبر التشغيلات
proxy_url: "http://localhost:30000" # عنوان وكيل MetaClaw
skills_dir: "~/.metaclaw/skills" # أين تُخزّن مهارات arc-*
fallback_url: "" # بديل LLM مباشر عند عدم توفر الوكيل
fallback_api_key: "" # مفتاح API لنقطة نهاية البديل
lesson_to_skill:
enabled: true # تحويل الدروس إلى مهارات تلقائياً
min_severity: "warning" # أدنى شدة للتحويل
max_skills_per_run: 3 # أقصى مهارات جديدة لكل تشغيل
# === جسر OpenClaw ===
openclaw_bridge:
use_cron: false # عمليات تشغيل بحث مجدولة
use_message: false # إشعارات التقدم
use_memory: false # استمرارية المعرفة عبر الجلسات
use_sessions_spawn: false # إطلاق جلسات فرعية متوازية
use_web_fetch: false # بحث ويب مباشر
use_browser: false # جمع الأوراق عبر المتصفح
```
</details>
---
## 🙏 شكر وتقدير
مستوحى من:
- 🔬 [AI Scientist](https://github.com/SakanaAI/AI-Scientist) (Sakana AI) — رائد البحث الآلي
- 🧠 [AutoResearch](https://github.com/karpathy/autoresearch) (Andrej Karpathy) — أتمتة البحث من البداية إلى النهاية
- 🌐 [FARS](https://analemma.ai/blog/introducing-fars/) (Analemma) — نظام بحث مؤتمت بالكامل
---
## 📄 الرخصة
MIT — راجع [LICENSE](../LICENSE) للتفاصيل.
---
## 📌 الاستشهاد
إذا وجدت AutoResearchClaw مفيداً، يرجى الاستشهاد:
```bibtex
@misc{liu2026autoresearchclaw,
author = {Liu, Jiaqi and Xia, Peng and Han, Siwei and Qiu, Shi and Zhang, Letian and Chen, Guiming and Tu, Haoqin and Yang, Xinyu and and Zhou, Jiawei and Zhu, Hongtu and Li, Yun and Zhou, Yuyin and Zheng, Zeyu and Xie, Cihang and Ding, Mingyu and Yao, Huaxiu},
title = {AutoResearchClaw: Fully Autonomous Research from Idea to Paper},
year = {2026},
organization = {GitHub},
url = {https://github.com/aiming-lab/AutoResearchClaw},
}
```
<p align="center">
<sub>بُني بـ 🦞 بواسطة فريق AutoResearchClaw</sub>
</p>
================================================
FILE: docs/README_CN.md
================================================
<p align="center">
<img src="../image/logo.png" width="700" alt="AutoResearchClaw Logo">
</p>
<h2 align="center"><b>聊一个想法。出一篇论文。全自动 & 自演化。</b></h2>
<p align="center">
<b><i><font size="5">直接与 <a href="#openclaw-集成">OpenClaw</a> 对话:"研究 X" → 搞定。</font></i></b>
</p>
<p align="center">
<img src="../image/framework_v2.png" width="100%" alt="AutoResearchClaw Framework">
</p>
<p align="center">
<a href="../LICENSE"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="MIT License"></a>
<a href="https://python.org"><img src="https://img.shields.io/badge/Python-3.11%2B-3776AB?logo=python&logoColor=white" alt="Python 3.11+"></a>
<a href="#测试"><img src="https://img.shields.io/badge/Tests-1823%20passed-brightgreen?logo=pytest&logoColor=white" alt="1823 Tests Passed"></a>
<a href="https://github.com/aiming-lab/AutoResearchClaw"><img src="https://img.shields.io/badge/GitHub-AutoResearchClaw-181717?logo=github" alt="GitHub"></a>
<a href="#openclaw-集成"><img src="https://img.shields.io/badge/OpenClaw-Compatible-ff4444?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTEyIDJDNi40OCAyIDIgNi40OCAyIDEyczQuNDggMTAgMTAgMTAgMTAtNC40OCAxMC0xMFMxNy41MiAyIDEyIDJ6IiBmaWxsPSJ3aGl0ZSIvPjwvc3ZnPg==" alt="OpenClaw Compatible"></a>
<a href="https://discord.gg/u4ksqW5P"><img src="https://img.shields.io/badge/Discord-Join%20Community-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
</p>
<p align="center">
<a href="../README.md">🇺🇸 English</a> ·
<a href="README_CN.md">🇨🇳 中文</a> ·
<a href="README_JA.md">🇯🇵 日本語</a> ·
<a href="README_KO.md">🇰🇷 한국어</a> ·
<a href="README_FR.md">🇫🇷 Français</a> ·
<a href="README_DE.md">🇩🇪 Deutsch</a> ·
<a href="README_ES.md">🇪🇸 Español</a> ·
<a href="README_PT.md">🇧🇷 Português</a> ·
<a href="README_RU.md">🇷🇺 Русский</a> ·
<a href="README_AR.md">🇸🇦 العربية</a>
</p>
<p align="center">
<a href="showcase/SHOWCASE.md">🏆 论文展示</a> · <a href="integration-guide.md">📖 集成指南</a> · <a href="https://discord.gg/u4ksqW5P">💬 Discord 社区</a>
</p>
---
<table>
<tr>
<td width="18%">
<a href="showcase/SHOWCASE.md"><img src="showcase/thumbnails/paper_I_random_matrix-01.png" width="120" alt="Sample Paper"/></a>
</td>
<td valign="middle">
<b>🏆 生成论文展示</b><br><br>
<b>8 篇论文覆盖 8 个领域</b> — 数学、统计、生物、计算、NLP、RL、视觉、鲁棒性 — 完全自主生成,零人工干预。<br><br>
<a href="showcase/SHOWCASE.md"><img src="https://img.shields.io/badge/View_Full_Showcase_→-All_8_Papers-d73a49?style=for-the-badge" alt="View Showcase"></a>
</td>
</tr>
</table>
---
> **🧪 我们正在寻找测试者!** 用你自己的研究想法试试这个流水线 — 任何领域 — 然后 [告诉我们你的反馈](TESTER_GUIDE.md)。你的反馈将直接影响下一个版本。 **[→ Testing Guide](TESTER_GUIDE.md)** | **[→ 中文测试指南](TESTER_GUIDE_CN.md)** | **[→ 日本語テストガイド](TESTER_GUIDE_JA.md)**
---
## 🔥 News
- **[03/22/2026]** [v0.3.2](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.2) — **跨平台支持 + 重大稳定性更新** — AutoResearchClaw 现已支持任何 ACP 兼容的 AI 代理后端(Claude Code、Codex CLI、Copilot CLI、Gemini CLI、Kimi CLI),并通过 OpenClaw 桥接支持消息平台(Discord、Telegram、飞书、微信)。新增 CLI-agent 代码生成后端,将 Stage 10 和 13 委托给外部 CLI agent,支持预算控制和超时管理。同时包含反数据捏造系统(VerifiedRegistry + 实验诊断与修复循环),100+ 个 bug 修复,模块化 executor 重构,`--resume` 自动检测,LLM 重试加固,以及社区反馈修复。
- **[03/18/2026]** [v0.3.1](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.1) — **OpenCode Beast Mode + Community Contributions** — New "Beast Mode" routes complex code generation to [OpenCode](https://github.com/anomalyco/opencode) with automatic complexity scoring and graceful fallback. Added Novita AI provider support, thread-safety hardening, improved LLM output parsing robustness, and 20+ bug fixes from community PRs and internal audit.
- **[03/17/2026]** [v0.3.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.0) — **MetaClaw Integration** — AutoResearchClaw now supports [MetaClaw](https://github.com/aiming-lab/MetaClaw) cross-run learning: pipeline failures → structured lessons → reusable skills, injected into all 23 stages. **+18.3%** robustness in controlled experiments. Opt-in (`metaclaw_bridge.enabled: true`), fully backward-compatible. See [Integration Guide](#-metaclaw-integration).
- **[03/16/2026]** [v0.2.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.2.0) — Three multi-agent subsystems (CodeAgent, BenchmarkAgent, FigureAgent), hardened Docker sandbox with network-policy-aware execution, 4-round paper quality audit (AI-slop detection, 7-dim review scoring, NeurIPS checklist), and 15+ bug fixes from production runs.
- **[03/15/2026]** [v0.1.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.1.0) — We release AutoResearchClaw: a fully autonomous 23-stage research pipeline that turns a single research idea into a conference-ready paper. No human intervention required.
---
## ⚡ 一行命令。一篇论文。
```bash
pip install -e . && researchclaw setup && researchclaw init && researchclaw run --topic "Your research idea here" --auto-approve
```
---
## 🤔 这是什么?
**你有一个灵感,AutoResearchClaw 把它写出来。**
输入一个研究主题——获得一篇完整的学术论文,包含来自 OpenAlex、Semantic Scholar 和 arXiv 的真实文献,硬件感知沙箱实验(自动检测 GPU/MPS/CPU),统计分析,多 Agent 同行评审,以及面向 NeurIPS/ICML/ICLR 的顶会级 LaTeX。不需要盯着看。不需要复制粘贴。不会出现幻觉引用。
<table>
<tr><td>📄</td><td><code>paper_draft.md</code></td><td>完整学术论文(引言、相关工作、方法、实验、结果、结论)</td></tr>
<tr><td>📐</td><td><code>paper.tex</code></td><td>适配顶会模板的 LaTeX 文件(NeurIPS / ICLR / ICML)</td></tr>
<tr><td>📚</td><td><code>references.bib</code></td><td>来自 OpenAlex、Semantic Scholar 和 arXiv 的真实 BibTeX 引用——自动精简至与正文引用一致</td></tr>
<tr><td>🔍</td><td><code>verification_report.json</code></td><td>四层引用完整性 + 相关性核查(arXiv、CrossRef、DataCite、LLM)</td></tr>
<tr><td>🧪</td><td><code>experiment runs/</code></td><td>生成的代码 + 沙箱结果 + 结构化 JSON 指标</td></tr>
<tr><td>📊</td><td><code>charts/</code></td><td>自动生成的条件对比图(含误差线和置信区间)</td></tr>
<tr><td>📝</td><td><code>reviews.md</code></td><td>多 Agent 同行评审(含方法论-证据一致性检查)</td></tr>
<tr><td>🧬</td><td><code>evolution/</code></td><td>从每次运行中提取的自学习教训</td></tr>
<tr><td>📦</td><td><code>deliverables/</code></td><td>所有最终产出集中在一个文件夹——可直接上传 Overleaf 编译</td></tr>
</table>
流水线**端到端无需人工介入**运行。实验失败时自动修复。假设不成立时自主转向。引用是假的?自动删除。
🌍 **随处可用。** AutoResearchClaw 不绑定任何单一平台。你可以通过 CLI 独立运行,接入 [OpenClaw](https://github.com/openclaw/openclaw),或对接任何 ACP 兼容的 AI 代理 —— 🤖 Claude Code、💻 Codex CLI、🐙 Copilot CLI、♊ Gemini CLI、🌙 Kimi CLI,应有尽有。而且,借助 OpenClaw 的消息桥接能力,你还可以从 💬 Discord、✈️ Telegram、🐦 飞书、💚 微信,或任何你团队日常使用的平台发起一次完整的研究。输入一个课题,输出一篇论文 —— 无论你在哪里输入。
---
## 🚀 快速开始
```bash
# 1. 克隆 & 安装
git clone https://github.com/aiming-lab/AutoResearchClaw.git
cd AutoResearchClaw
python3 -m venv .venv && source .venv/bin/activate
pip install -e .
# 2. 初始化(交互式 — 安装 OpenCode Beast Mode,检查 Docker/LaTeX)
researchclaw setup
# 3. 配置
researchclaw init # 交互式:选择 LLM 提供商,创建 config.arc.yaml
# 或手动:cp config.researchclaw.example.yaml config.arc.yaml
# 4. 运行
export OPENAI_API_KEY="sk-..."
researchclaw run --config config.arc.yaml --topic "Your research idea" --auto-approve
```
输出 → `artifacts/rc-YYYYMMDD-HHMMSS-<hash>/deliverables/` — 可编译的 LaTeX、BibTeX、实验代码、图表。
<details>
<summary>📝 最小必要配置</summary>
```yaml
project:
name: "my-research"
research:
topic: "Your research topic here"
llm:
base_url: "https://api.openai.com/v1"
api_key_env: "OPENAI_API_KEY"
primary_model: "gpt-4o"
fallback_models: ["gpt-4o-mini"]
experiment:
mode: "sandbox"
sandbox:
python_path: ".venv/bin/python"
```
</details>
---
## 🧠 有什么不同
| 能力 | 工作原理 |
|------|----------|
| **🔄 PIVOT / REFINE 循环** | 第 15 阶段自主决策:PROCEED、REFINE(调参)或 PIVOT(新方向)。产物自动版本化。 |
| **🤖 多 Agent 辩论** | 假设生成、结果分析、同行评审均使用结构化的多视角辩论。 |
| **🧬 自学习** | 每次运行提取教训(决策理由、运行时警告、指标异常),30 天时间衰减。未来运行从过去的错误中学习。 |
| **📚 知识库** | 每次运行在 6 个类别(决策、实验、发现、文献、问题、评审)中构建结构化知识库。 |
| **🛡️ Sentinel 看门狗** | 后台质量监控:NaN/Inf 检测、论文-证据一致性、引用相关性评分、反数据捏造守卫。 |
---
## 🦞 OpenClaw 集成
<table>
<tr>
**AutoResearchClaw 是 [OpenClaw](https://github.com/openclaw/openclaw) 兼容服务。** 在 OpenClaw 中安装后,一句话即可启动自主研究——也可通过 CLI、Claude Code 或其他 AI 编码助手独立使用。
</tr>
</table>
### 🚀 通过 OpenClaw 使用(推荐)
如果你已经在使用 [OpenClaw](https://github.com/openclaw/openclaw) 作为 AI 助手:
```
1️⃣ 把 GitHub 仓库地址分享给 OpenClaw
2️⃣ OpenClaw 自动读取 RESEARCHCLAW_AGENTS.md → 理解流水线
3️⃣ 对它说:"帮我研究 [你的主题]"
4️⃣ 完成 — OpenClaw 自动克隆、安装、配置、运行,然后返回结果
```
**就这么简单。** OpenClaw 自动处理 `git clone`、`pip install`、配置和流水线执行。你只需聊天。
<details>
<summary>💡 底层发生了什么</summary>
1. OpenClaw 读取 `RESEARCHCLAW_AGENTS.md` → 学习研究编排器角色
2. OpenClaw 读取 `README.md` → 理解安装方式和流水线结构
3. OpenClaw 复制 `config.researchclaw.example.yaml` → `config.yaml`
4. 向你询问 LLM API Key(或使用环境变量)
5. 运行 `pip install -e .` + `researchclaw run --topic "..." --auto-approve`
6. 返回论文、LaTeX、实验结果和引用
</details>
### 🔌 OpenClaw Bridge(高级功能)
AutoResearchClaw 内置了 **Bridge 适配器系统**,提供 6 个可选集成能力:
```yaml
# config.arc.yaml
openclaw_bridge:
use_cron: true # ⏰ 定时研究任务
use_message: true # 💬 进度通知(Discord/Slack/Telegram)
use_memory: true # 🧠 跨会话知识持久化
use_sessions_spawn: true # 🔀 为并行阶段派生子会话
use_web_fetch: true # 🌐 文献检索中的实时网络搜索
use_browser: false # 🖥️ 基于浏览器的论文采集
```
每个标志激活一个类型化适配器协议。当 OpenClaw 提供对应能力时,适配器无需改代码即可消费。详见 [`integration-guide.md`](integration-guide.md)。
### ACP (Agent Client Protocol)
AutoResearchClaw 可以使用**任何 ACP 兼容的编码 Agent** 作为其 LLM 后端——无需 API 密钥。Agent 通过 [acpx](https://github.com/openclaw/acpx) 通信,在全部 23 个流水线阶段中维持单个持久会话。
| Agent | 命令 | 备注 |
|-------|------|------|
| Claude Code | `claude` | Anthropic |
| Codex CLI | `codex` | OpenAI |
| Copilot CLI | `gh` | GitHub |
| Gemini CLI | `gemini` | Google |
| OpenCode | `opencode` | SST |
| Kimi CLI | `kimi` | Moonshot |
```yaml
# config.yaml — ACP 示例
llm:
provider: "acp"
acp:
agent: "claude" # 任何 ACP 兼容的 Agent CLI 命令
cwd: "." # Agent 的工作目录
# 无需 base_url 或 api_key — Agent 自行处理认证。
```
```bash
# 直接运行 — Agent 使用自己的凭据
researchclaw run --config config.yaml --topic "Your research idea" --auto-approve
```
### 🛠️ 其他运行方式
| 方式 | 怎么用 |
|------|--------|
| **独立 CLI** | `researchclaw setup` → `researchclaw init` → `researchclaw run --topic "..." --auto-approve` |
| **Python API** | `from researchclaw.pipeline import Runner; Runner(config).run()` |
| **Claude Code** | 读取 `RESEARCHCLAW_CLAUDE.md` — 直接说 *"Run research on [主题]"* |
| **Copilot CLI** | `researchclaw run --topic "..."` 配合 `llm.acp.agent: "gh"` |
| **OpenCode** | 读取 `.claude/skills/` — 同样的自然语言交互 |
| **任何 AI CLI** | 提供 `RESEARCHCLAW_AGENTS.md` 作为上下文 → agent 自动引导 |
---
## 🔬 流水线:23 个阶段,8 个阶段组
```
阶段组 A:研究定义 阶段组 E:实验执行
1. TOPIC_INIT 12. EXPERIMENT_RUN
2. PROBLEM_DECOMPOSE 13. ITERATIVE_REFINE ← 自修复
阶段组 B:文献发现 阶段组 F:分析与决策
3. SEARCH_STRATEGY 14. RESULT_ANALYSIS ← 多Agent
4. LITERATURE_COLLECT ← 真实API 15. RESEARCH_DECISION ← PIVOT/REFINE
5. LITERATURE_SCREEN [门控]
6. KNOWLEDGE_EXTRACT 阶段组 G:论文撰写
16. PAPER_OUTLINE
阶段组 C:知识综合 17. PAPER_DRAFT
7. SYNTHESIS 18. PEER_REVIEW ← 证据审查
8. HYPOTHESIS_GEN ← 辩论 19. PAPER_REVISION
阶段组 D:实验设计 阶段组 H:终稿
9. EXPERIMENT_DESIGN [门控] 20. QUALITY_GATE [门控]
10. CODE_GENERATION 21. KNOWLEDGE_ARCHIVE
11. RESOURCE_PLANNING 22. EXPORT_PUBLISH ← LaTeX
23. CITATION_VERIFY ← 相关性审查
```
> **门控阶段**(5、9、20)可暂停等待人工审批,也可用 `--auto-approve` 自动通过。拒绝后流水线回滚。
> **决策循环**:第 15 阶段可触发 REFINE(→ 第 13 阶段)或 PIVOT(→ 第 8 阶段),自动版本化之前的产物。
<details>
<summary>📋 各阶段组职责</summary>
| 阶段组 | 做什么 |
|--------|--------|
| **A:定义** | LLM 将主题分解为结构化问题树和研究问题 |
| **A+:硬件检测** | 自动检测 GPU(NVIDIA CUDA / Apple MPS / 纯 CPU),性能不足时警告用户,据此调整代码生成策略 |
| **B:文献** | 多源搜索(OpenAlex → Semantic Scholar → arXiv)获取真实论文,按相关性筛选,提取知识卡片 |
| **C:综合** | 聚类研究发现,识别研究空白,通过多 Agent 辩论生成可验证假设 |
| **D:设计** | 设计实验方案,生成硬件感知的可运行 Python 代码(GPU 等级 → 包选择),估算资源需求 |
| **E:执行** | 在沙箱中运行实验,检测 NaN/Inf 和运行时 Bug,通过定向 LLM 修复自愈代码 |
| **F:分析** | 多 Agent 分析实验结果;自主 PROCEED / REFINE / PIVOT 决策并附理由 |
| **G:写作** | 大纲 → 分段撰写初稿(5,000-6,500 词)→ 同行评审(含方法论-证据一致性)→ 带长度保障的修订 |
| **H:终稿** | 质量门控,知识归档,LaTeX 导出(适配顶会模板),引用完整性 + 相关性核查 |
</details>
---
## ✨ 核心功能
| 功能 | 说明 |
|------|------|
| **📚 多源文献** | 来自 OpenAlex、Semantic Scholar 和 arXiv 的真实论文——查询扩展、去重、三态熔断器与优雅降级 |
| **🔍 四层引用核查** | arXiv ID 校验 → CrossRef/DataCite DOI → Semantic Scholar 标题匹配 → LLM 相关性评分。幻觉引用自动删除。 |
| **🖥️ 硬件感知执行** | 自动检测 GPU(NVIDIA CUDA / Apple MPS / 纯 CPU),据此调整代码生成、import 和实验规模 |
| **🦾 OpenCode Beast Mode** | 复杂实验自动路由至 [OpenCode](https://github.com/anomalyco/opencode)——生成多文件项目,含自定义架构、训练循环和消融实验。通过 `researchclaw setup` 安装。 |
| **🧪 沙箱实验** | AST 验证代码、不可变 harness、NaN/Inf 快速失败、自修复、迭代优化(最多 10 轮)、部分结果捕获 |
| **📝 顶会级写作** | NeurIPS/ICML/ICLR 模板,分段撰写(5,000-6,500 词),反数据捏造守卫、修订长度保障、反免责声明强制 |
| **📐 模板切换** | `neurips_2025`、`iclr_2026`、`icml_2026` — Markdown → LaTeX,含数学公式、表格、图片、交叉引用、`\cite{}` |
| **🛡️ 反数据捏造** | VerifiedRegistry 强制论文中使用经过验证的实验数据。自动诊断失败实验并在写作前修复。未验证数字被清理。 |
| **🚦 质量门控** | 3 个人工审批门控(阶段 5、9、20),支持回滚。用 `--auto-approve` 跳过。 |
---
## 🧠 MetaClaw 集成
**AutoResearchClaw + [MetaClaw](https://github.com/aiming-lab/MetaClaw) = 一个能从每次运行中学习的流水线。**
MetaClaw 为 AutoResearchClaw 添加了**跨运行知识迁移**。启用后,流水线会自动从失败和警告中提取教训,将其转化为可复用的技能,并在后续运行中注入到全部 23 个阶段——让同样的错误不再重犯。
### 工作原理
```
运行 N 执行 → 失败/警告被捕获为 Lessons
↓
MetaClaw Lesson → Skill 转换
↓
arc-* Skill 文件存储在 ~/.metaclaw/skills/
↓
运行 N+1 → build_overlay() 将技能注入每个 LLM 提示
↓
LLM 规避已知陷阱 → 更高质量,更少重试
```
### 快速配置
```bash
# 1. 安装 MetaClaw(如未安装)
pip install metaclaw
# 2. 在配置中启用
```
```yaml
# config.arc.yaml
metaclaw_bridge:
enabled: true
proxy_url: "http://localhost:30000" # MetaClaw 代理(可选)
skills_dir: "~/.metaclaw/skills" # 技能存储位置
fallback_url: "https://api.openai.com/v1" # 直连 LLM 回退
fallback_api_key: "" # 回退 URL 的 API key
lesson_to_skill:
enabled: true
min_severity: "warning" # 转换 warning + error
max_skills_per_run: 3
```
```bash
# 3. 照常运行 — MetaClaw 透明运作
researchclaw run --config config.arc.yaml --topic "Your idea" --auto-approve
```
每次运行后,查看 `~/.metaclaw/skills/arc-*/SKILL.md` 以了解流水线学到了哪些技能。
### 实验结果
在对照 A/B 实验中(相同主题、相同 LLM、相同配置):
| 指标 | 基线 | 使用 MetaClaw | 改善 |
|------|------|---------------|------|
| 阶段重试率 | 10.5% | 7.9% | **-24.8%** |
| Refine 循环次数 | 2.0 | 1.2 | **-40.0%** |
| 流水线阶段完成率 | 18/19 | 19/19 | **+5.3%** |
| 整体鲁棒性得分(综合) | 0.714 | 0.845 | **+18.3%** |
> 综合鲁棒性得分是阶段完成率(40%)、重试减少(30%)和 Refine 循环效率(30%)的加权平均。
### 向后兼容性
- **默认:关闭。** 如果 `metaclaw_bridge` 不存在或 `enabled: false`,流水线行为与之前完全一致。
- **无新依赖。** MetaClaw 是可选的——核心流水线无需它即可运行。
- **所有 1,823 项现有测试通过**(包含集成代码)。
---
## ⚙️ 配置参考
<details>
<summary>点击展开完整配置参考</summary>
```yaml
# === 项目 ===
project:
name: "my-research" # 项目标识符
mode: "docs-first" # docs-first | semi-auto | full-auto
# === 研究 ===
research:
topic: "..." # 研究主题(必填)
domains: ["ml", "nlp"] # 文献搜索的研究领域
daily_paper_count: 8 # 每个搜索查询的目标论文数
quality_threshold: 4.0 # 论文最低质量分
# === 运行时 ===
runtime:
timezone: "America/New_York" # 用于时间戳
max_parallel_tasks: 3 # 并发实验限制
approval_timeout_hours: 12 # 门控阶段超时
retry_limit: 2 # 阶段失败重试次数
# === LLM ===
llm:
provider: "openai-compatible" # openai | openrouter | deepseek | minimax | acp | openai-compatible
base_url: "https://..." # API 端点(openai-compatible 必填)
api_key_env: "OPENAI_API_KEY" # API key 环境变量(openai-compatible 必填)
api_key: "" # 或直接填写 key
primary_model: "gpt-4o" # 主模型
fallback_models: ["gpt-4o-mini"] # 回退链
s2_api_key: "" # Semantic Scholar API key(可选,更高速率限制)
acp: # 仅在 provider: "acp" 时使用
agent: "claude" # ACP Agent CLI 命令(claude, codex, gemini 等)
cwd: "." # Agent 的工作目录
# === 实验 ===
experiment:
mode: "sandbox" # simulated | sandbox | docker | ssh_remote
time_budget_sec: 300 # 每次运行最大执行时间(默认:300 秒)
max_iterations: 10 # 最大优化迭代次数
metric_key: "val_loss" # 主指标名称
metric_direction: "minimize" # minimize | maximize
sandbox:
python_path: ".venv/bin/python"
gpu_required: false
allowed_imports: [math, random, json, csv, numpy, torch, sklearn]
max_memory_mb: 4096
docker:
image: "researchclaw/experiment:latest"
network_policy: "setup_only" # none | setup_only | pip_only | full
gpu_enabled: true
memory_limit_mb: 8192
auto_install_deps: true # 自动检测 import → requirements.txt
ssh_remote:
host: "" # GPU 服务器主机名
gpu_ids: [] # 可用 GPU ID
remote_workdir: "/tmp/researchclaw_experiments"
opencode: # OpenCode Beast Mode(通过 `researchclaw setup` 自动安装)
enabled: true # 主开关(默认:true)
auto: true # 无需确认自动触发(默认:true)
complexity_threshold: 0.2 # 0.0-1.0 — 越高 = 仅在复杂实验时触发
model: "" # 覆盖模型(空 = 使用 llm.primary_model)
timeout_sec: 600 # OpenCode 生成最大秒数
max_retries: 1 # 失败重试次数
workspace_cleanup: true # 采集后清理临时工作区
code_agent: # CodeAgent v2 — 多阶段代码生成
enabled: true # 使用 CodeAgent 替代传统单 prompt 代码生成
architecture_planning: true # 生成代码前先生成深度实现蓝图
sequential_generation: true # 按依赖 DAG 逐文件生成
hard_validation: true # 基于 AST 的验证门控(拦截相同消融、硬编码指标)
hard_validation_max_repairs: 2 # 验证失败时最大修复次数
exec_fix_max_iterations: 3 # 执行修复循环最大次数
exec_fix_timeout_sec: 60 # 每次执行修复超时(秒)
benchmark_agent: # BenchmarkAgent — 自动数据集和基线选择
enabled: true # 启用 4-agent 基准测试流水线(Surveyor→Selector→Acquirer→Validator)
enable_hf_search: true # 搜索 HuggingFace Datasets
enable_web_search: true # 搜索 Google Scholar 获取基准
tier_limit: 2 # 数据集级别过滤(1=小型/已缓存,2=中型,3=大型)
min_benchmarks: 1 # 最少需要的数据集数量
min_baselines: 2 # 最少需要的基线方法数量
figure_agent: # FigureAgent — 学术图表生成
enabled: true # 启用 5-agent 图表流水线(Planner→CodeGen→Renderer→Critic→Integrator)
min_figures: 3 # 最少生成图表数
max_figures: 8 # 最多生成图表数
max_iterations: 3 # Critic 驱动的迭代优化次数
dpi: 300 # 输出分辨率
strict_mode: false # 图表生成失败时是否阻塞流水线
repair: # 反数据捏造实验修复
enabled: true # 自动诊断并修复失败的实验
max_cycles: 3 # 修复重试循环数
min_completion_rate: 0.5 # >=50% 条件必须完成才可继续
min_conditions: 2 # 有效实验至少需要 2 个条件
use_opencode: true # 通过 OpenCode Beast Mode 进行修复
# === 网络搜索(可选)===
web_search:
enabled: true # 启用网络增强文献搜索
tavily_api_key_env: "TAVILY_API_KEY" # Tavily API key 环境变量(可选)
enable_scholar: true # Google Scholar 搜索
enable_pdf_extraction: true # 从 PDF 中提取文本
max_web_results: 10 # 每次查询最大网络结果数
# === 导出 ===
export:
target_conference: "neurips_2025" # neurips_2025 | iclr_2026 | icml_2026
authors: "Anonymous"
bib_file: "references"
# === Prompts ===
prompts:
custom_file: "" # 自定义 Prompt YAML 路径(空 = 使用默认)
# === 安全 ===
security:
hitl_required_stages: [5, 9, 20] # 需要人工审批的阶段
allow_publish_without_approval: false
redact_sensitive_logs: true
# === 知识库 ===
knowledge_base:
backend: "markdown" # markdown | obsidian
root: "docs/kb"
# === 通知 ===
notifications:
channel: "console" # console | discord | slack
target: ""
# === MetaClaw Bridge(可选)===
metaclaw_bridge:
enabled: false # 设为 true 以启用跨运行学习
proxy_url: "http://localhost:30000" # MetaClaw 代理 URL
skills_dir: "~/.metaclaw/skills" # arc-* 技能的存储位置
fallback_url: "" # 代理不可用时的直连 LLM 回退
fallback_api_key: "" # 回退端点的 API key
lesson_to_skill:
enabled: true # 自动将教训转换为技能
min_severity: "warning" # 转换的最低严重级别
max_skills_per_run: 3 # 每次流水线运行的最大新技能数
prm: # 过程奖励模型质量门控(可选)
enabled: false # 使用 LLM-as-judge 评分阶段产出
model: "gpt-5.4" # PRM 评判模型
votes: 3 # 多数投票次数
gate_stages: [5, 9, 15, 20] # 应用 PRM 门控的阶段
# === OpenClaw Bridge ===
openclaw_bridge:
use_cron: false # 定时研究运行
use_message: false # 进度通知
use_memory: false # 跨会话知识持久化
use_sessions_spawn: false # 派生并行子会话
use_web_fetch: false # 实时网络搜索
use_browser: false # 基于浏览器的论文采集
```
</details>
---
## 🙏 致谢
灵感来源:
- 🔬 [AI Scientist](https://github.com/SakanaAI/AI-Scientist)(Sakana AI)— 自动化研究先驱
- 🧠 [AutoResearch](https://github.com/karpathy/autoresearch)(Andrej Karpathy)— 端到端研究自动化
- 🌐 [FARS](https://analemma.ai/blog/introducing-fars/)(Analemma)— 全自动研究系统
---
## 📄 许可证
MIT — 详见 [LICENSE](../LICENSE)。
---
## 📌 引用
如果你觉得 AutoResearchClaw 有用,请引用:
```bibtex
@misc{liu2026autoresearchclaw,
author = {Liu, Jiaqi and Xia, Peng and Han, Siwei and Qiu, Shi and Zhang, Letian and Chen, Guiming and Tu, Haoqin and Yang, Xinyu and and Zhou, Jiawei and Zhu, Hongtu and Li, Yun and Zhou, Yuyin and Zheng, Zeyu and Xie, Cihang and Ding, Mingyu and Yao, Huaxiu},
title = {AutoResearchClaw: Fully Autonomous Research from Idea to Paper},
year = {2026},
organization = {GitHub},
url = {https://github.com/aiming-lab/AutoResearchClaw},
}
```
<p align="center">
<sub>Built with 🦞 by the AutoResearchClaw team</sub>
</p>
================================================
FILE: docs/README_DE.md
================================================
<p align="center">
<img src="../image/logo.png" width="700" alt="AutoResearchClaw Logo">
</p>
<h2 align="center"><b>Idee besprechen. Paper erhalten. Vollautomatisch & selbstentwickelnd.</b></h2>
<p align="center">
<b><i><font size="5">Einfach mit <a href="#-openclaw-integration">OpenClaw</a> chatten: "Research X" → erledigt.</font></i></b>
</p>
<p align="center">
<img src="../image/framework_v2.png" width="100%" alt="AutoResearchClaw Framework">
</p>
<p align="center">
<a href="../LICENSE"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="MIT License"></a>
<a href="https://python.org"><img src="https://img.shields.io/badge/Python-3.11%2B-3776AB?logo=python&logoColor=white" alt="Python 3.11+"></a>
<a href="#testing"><img src="https://img.shields.io/badge/Tests-1823%20passed-brightgreen?logo=pytest&logoColor=white" alt="1823 Tests Passed"></a>
<a href="https://github.com/aiming-lab/AutoResearchClaw"><img src="https://img.shields.io/badge/GitHub-AutoResearchClaw-181717?logo=github" alt="GitHub"></a>
<a href="#-openclaw-integration"><img src="https://img.shields.io/badge/OpenClaw-Compatible-ff4444?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTEyIDJDNi40OCAyIDIgNi40OCAyIDEyczQuNDggMTAgMTAgMTAgMTAtNC40OCAxMC0xMFMxNy41MiAyIDEyIDJ6IiBmaWxsPSJ3aGl0ZSIvPjwvc3ZnPg==" alt="OpenClaw Compatible"></a>
<a href="https://discord.gg/u4ksqW5P"><img src="https://img.shields.io/badge/Discord-Join%20Community-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
</p>
<p align="center">
<a href="../README.md">🇺🇸 English</a> ·
<a href="README_CN.md">🇨🇳 中文</a> ·
<a href="README_JA.md">🇯🇵 日本語</a> ·
<a href="README_KO.md">🇰🇷 한국어</a> ·
<a href="README_FR.md">🇫🇷 Français</a> ·
<a href="README_DE.md">🇩🇪 Deutsch</a> ·
<a href="README_ES.md">🇪🇸 Español</a> ·
<a href="README_PT.md">🇧🇷 Português</a> ·
<a href="README_RU.md">🇷🇺 Русский</a> ·
<a href="README_AR.md">🇸🇦 العربية</a>
</p>
<p align="center">
<a href="showcase/SHOWCASE.md">🏆 Paper-Showcase</a> · <a href="integration-guide.md">📖 Integrationsanleitung</a> · <a href="https://discord.gg/u4ksqW5P">💬 Discord-Community</a>
</p>
---
<table>
<tr>
<td width="18%">
<a href="showcase/SHOWCASE.md"><img src="showcase/thumbnails/paper_I_random_matrix-01.png" width="120" alt="Sample Paper"/></a>
</td>
<td valign="middle">
<b>🏆 Showcase generierter Paper</b><br><br>
<b>8 Paper aus 8 Disziplinen</b> — Mathematik, Statistik, Biologie, Informatik, NLP, RL, Vision, Robustheit — vollstaendig autonom generiert ohne menschliches Eingreifen.<br><br>
<a href="showcase/SHOWCASE.md"><img src="https://img.shields.io/badge/View_Full_Showcase_→-All_8_Papers-d73a49?style=for-the-badge" alt="View Showcase"></a>
</td>
</tr>
</table>
---
> **🧪 Wir suchen Tester!** Teste die Pipeline mit deiner eigenen Forschungsidee — aus jedem Fachgebiet — und [sag uns, was du denkst](TESTER_GUIDE.md). Dein Feedback beeinflusst direkt die naechste Version. **[→ Testing Guide](TESTER_GUIDE.md)** | **[→ 中文测试指南](TESTER_GUIDE_CN.md)** | **[→ 日本語テストガイド](TESTER_GUIDE_JA.md)**
---
## 🔥 News
- **[03/22/2026]** [v0.3.2](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.2) — **Plattformuebergreifende Unterstuetzung + grosse Stabilitaet** — AutoResearchClaw laeuft jetzt mit jedem ACP-kompatiblen Agenten-Backend (Claude Code, Codex CLI, Copilot CLI, Gemini CLI, Kimi CLI) und unterstuetzt Messaging-Plattformen (Discord, Telegram, Lark, WeChat) ueber die OpenClaw-Bruecke. Neues CLI-Agent-Code-Generierungs-Backend delegiert Stages 10 und 13 an externe CLI-Agenten mit Budgetkontrolle und Timeout-Management. Enthaelt Anti-Fabrication-System (VerifiedRegistry + Experiment-Diagnose- und Reparaturschleife), 100+ Bugfixes, modulares Executor-Refactoring, `--resume` Auto-Erkennung, LLM-Retry-Haertung und Community-Fixes.
- **[03/18/2026]** [v0.3.1](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.1) — **OpenCode Beast Mode + Community Contributions** — New "Beast Mode" routes complex code generation to [OpenCode](https://github.com/anomalyco/opencode) with automatic complexity scoring and graceful fallback. Added Novita AI provider support, thread-safety hardening, improved LLM output parsing robustness, and 20+ bug fixes from community PRs and internal audit.
- **[03/17/2026]** [v0.3.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.0) — **MetaClaw Integration** — AutoResearchClaw now supports [MetaClaw](https://github.com/aiming-lab/MetaClaw) cross-run learning: pipeline failures → structured lessons → reusable skills, injected into all 23 stages. **+18.3%** robustness in controlled experiments. Opt-in (`metaclaw_bridge.enabled: true`), fully backward-compatible. See [Integration Guide](#-metaclaw-integration).
- **[03/16/2026]** [v0.2.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.2.0) — Three multi-agent subsystems (CodeAgent, BenchmarkAgent, FigureAgent), hardened Docker sandbox with network-policy-aware execution, 4-round paper quality audit (AI-slop detection, 7-dim review scoring, NeurIPS checklist), and 15+ bug fixes from production runs.
- **[03/15/2026]** [v0.1.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.1.0) — We release AutoResearchClaw: a fully autonomous 23-stage research pipeline that turns a single research idea into a conference-ready paper. No human intervention required.
---
## ⚡ Ein Befehl. Ein Paper.
```bash
pip install -e . && researchclaw setup && researchclaw init && researchclaw run --topic "Your research idea here" --auto-approve
```
---
## 🤔 Was ist das?
**Du denkst es. AutoResearchClaw schreibt es.**
Gib ein Forschungsthema ein — erhalte ein vollstaendiges wissenschaftliches Paper mit echter Literatur von OpenAlex, Semantic Scholar und arXiv, hardwarebewussten Sandbox-Experimenten (automatische GPU/MPS/CPU-Erkennung), statistischer Analyse, Multi-Agenten-Peer-Review und konferenzfertigem LaTeX fuer NeurIPS/ICML/ICLR. Kein Babysitting. Kein Kopieren. Keine halluzinierten Referenzen.
<table>
<tr><td>📄</td><td><code>paper_draft.md</code></td><td>Vollstaendiges wissenschaftliches Paper (Einleitung, Verwandte Arbeiten, Methode, Experimente, Ergebnisse, Fazit)</td></tr>
<tr><td>📐</td><td><code>paper.tex</code></td><td>Konferenzfertiges LaTeX (NeurIPS / ICLR / ICML Templates)</td></tr>
<tr><td>📚</td><td><code>references.bib</code></td><td>Echte BibTeX-Referenzen von OpenAlex, Semantic Scholar und arXiv — automatisch bereinigt, um Inline-Zitationen zu entsprechen</td></tr>
<tr><td>🔍</td><td><code>verification_report.json</code></td><td>4-Schicht-Zitationsintegritaets- und Relevanzpruefung (arXiv, CrossRef, DataCite, LLM)</td></tr>
<tr><td>🧪</td><td><code>experiment runs/</code></td><td>Generierter Code + Sandbox-Ergebnisse + strukturierte JSON-Metriken</td></tr>
<tr><td>📊</td><td><code>charts/</code></td><td>Automatisch generierte Vergleichsdiagramme mit Fehlerbalken und Konfidenzintervallen</td></tr>
<tr><td>📝</td><td><code>reviews.md</code></td><td>Multi-Agenten-Peer-Review mit Methodik-Evidenz-Konsistenzpruefungen</td></tr>
<tr><td>🧬</td><td><code>evolution/</code></td><td>Selbstlernende Erkenntnisse aus jedem Durchlauf</td></tr>
<tr><td>📦</td><td><code>deliverables/</code></td><td>Alle finalen Ergebnisse in einem Ordner — kompilierbereit fuer Overleaf</td></tr>
</table>
Die Pipeline laeuft **vollstaendig ohne menschliches Eingreifen**. Wenn Experimente fehlschlagen, repariert sie sich selbst. Wenn Hypothesen nicht bestaetigt werden, schwenkt sie um. Wenn Zitationen gefaelscht sind, entfernt sie diese.
🌍 **Ueberall ausfuehrbar.** AutoResearchClaw ist nicht an eine einzelne Plattform gebunden. Nutzen Sie es eigenstaendig ueber die CLI, verbinden Sie es mit [OpenClaw](https://github.com/openclaw/openclaw), oder integrieren Sie es mit jedem ACP-kompatiblen AI-Agenten — 🤖 Claude Code, 💻 Codex CLI, 🐙 Copilot CLI, ♊ Gemini CLI, 🌙 Kimi CLI und mehr. Dank der Messaging-Bruecke von OpenClaw koennen Sie eine komplette Forschung von 💬 Discord, ✈️ Telegram, 🐦 Lark (飞书), 💚 WeChat oder jeder anderen Plattform starten, die Ihr Team bereits nutzt. Ein Thema rein, ein Paper raus — egal wo Sie tippen.
---
## 🚀 Schnellstart
```bash
# 1. Klonen & installieren
git clone https://github.com/aiming-lab/AutoResearchClaw.git
cd AutoResearchClaw
python3 -m venv .venv && source .venv/bin/activate
pip install -e .
# 2. Setup (interaktiv — installiert OpenCode Beast Mode, prueft Docker/LaTeX)
researchclaw setup
# 3. Konfigurieren
researchclaw init # Interaktiv: LLM-Anbieter waehlen, erstellt config.arc.yaml
# Oder manuell: cp config.researchclaw.example.yaml config.arc.yaml
# 4. Ausfuehren
export OPENAI_API_KEY="sk-..."
researchclaw run --config config.arc.yaml --topic "Your research idea" --auto-approve
```
Ausgabe → `artifacts/rc-YYYYMMDD-HHMMSS-<hash>/deliverables/` — kompilierfertiges LaTeX, BibTeX, Experimentcode, Diagramme.
<details>
<summary>📝 Minimale erforderliche Konfiguration</summary>
```yaml
project:
name: "my-research"
research:
topic: "Your research topic here"
llm:
base_url: "https://api.openai.com/v1"
api_key_env: "OPENAI_API_KEY"
primary_model: "gpt-4o"
fallback_models: ["gpt-4o-mini"]
experiment:
mode: "sandbox"
sandbox:
python_path: ".venv/bin/python"
```
</details>
---
## 🧠 Was macht es anders
| Faehigkeit | Funktionsweise |
|-----------|---------------|
| **🔄 PIVOT / REFINE Schleife** | Stufe 15 entscheidet autonom: PROCEED, REFINE (Parameter anpassen) oder PIVOT (neue Richtung). Artefakte automatisch versioniert. |
| **🤖 Multi-Agenten-Debatte** | Hypothesengenerierung, Ergebnisanalyse und Peer-Review verwenden jeweils strukturierte Multi-Perspektiven-Debatten. |
| **🧬 Selbstlernen** | Erkenntnisse pro Durchlauf extrahiert (Entscheidungsbegruendungen, Laufzeitwarnungen, Metrikanaomalien) mit 30-Tage-Zeitabklingung. Zukuenftige Durchlaeufe lernen aus vergangenen Fehlern. |
| **📚 Wissensdatenbank** | Jeder Durchlauf baut eine strukturierte KB ueber 6 Kategorien auf (Entscheidungen, Experimente, Ergebnisse, Literatur, Fragen, Reviews). |
| **🛡️ Sentinel Watchdog** | Hintergrund-Qualitaetsmonitor: NaN/Inf-Erkennung, Paper-Evidenz-Konsistenz, Zitationsrelevanz-Bewertung, Anti-Fabrikationsschutz. |
---
## 🦞 OpenClaw-Integration
<table>
<tr>
**AutoResearchClaw ist ein [OpenClaw](https://github.com/openclaw/openclaw)-kompatibler Dienst.** Installiere es in OpenClaw und starte autonome Forschung mit einer einzigen Nachricht — oder verwende es eigenstaendig ueber CLI, Claude Code oder jeden anderen KI-Coding-Assistenten.
</tr>
</table>
### 🚀 Verwendung mit OpenClaw (empfohlen)
Wenn du bereits [OpenClaw](https://github.com/openclaw/openclaw) als KI-Assistenten nutzt:
```
1️⃣ Teile die GitHub-Repo-URL mit OpenClaw
2️⃣ OpenClaw liest automatisch RESEARCHCLAW_AGENTS.md → versteht die Pipeline
3️⃣ Sage: "Research [dein Thema]"
4️⃣ Fertig — OpenClaw klont, installiert, konfiguriert, fuehrt aus und liefert Ergebnisse
```
**Das war's.** OpenClaw uebernimmt `git clone`, `pip install`, Konfiguration und Pipeline-Ausfuehrung automatisch. Du chattest einfach.
<details>
<summary>💡 Was unter der Haube passiert</summary>
1. OpenClaw liest `RESEARCHCLAW_AGENTS.md` → lernt die Forschungs-Orchestrator-Rolle
2. OpenClaw liest `README.md` → versteht Installation und Pipeline-Struktur
3. OpenClaw kopiert `config.researchclaw.example.yaml` → `config.yaml`
4. Fragt nach deinem LLM-API-Schluessel (oder verwendet deine Umgebungsvariable)
5. Fuehrt `pip install -e .` + `researchclaw run --topic "..." --auto-approve` aus
6. Liefert Paper, LaTeX, Experimente und Zitationen zurueck
</details>
### 🔌 OpenClaw Bridge (Fortgeschritten)
Fuer tiefere Integration enthaelt AutoResearchClaw ein **Bridge-Adapter-System** mit 6 optionalen Faehigkeiten:
```yaml
# config.arc.yaml
openclaw_bridge:
use_cron: true # ⏰ Geplante Forschungsdurchlaeufe
use_message: true # 💬 Fortschrittsbenachrichtigungen (Discord/Slack/Telegram)
use_memory: true # 🧠 Sitzungsuebergreifende Wissenspersistenz
use_sessions_spawn: true # 🔀 Parallele Sub-Sessions fuer gleichzeitige Stufen
use_web_fetch: true # 🌐 Live-Websuche waehrend der Literaturrecherche
use_browser: false # 🖥️ Browserbasierte Paper-Sammlung
```
Jedes Flag aktiviert ein typisiertes Adapter-Protokoll. Wenn OpenClaw diese Faehigkeiten bereitstellt, nutzen die Adapter sie ohne Codeaenderungen. Siehe [`integration-guide.md`](integration-guide.md) fuer vollstaendige Details.
### ACP (Agent Client Protocol)
AutoResearchClaw kann **jeden ACP-kompatiblen Coding-Agenten** als LLM-Backend verwenden — keine API-Schluessel erforderlich. Der Agent kommuniziert ueber [acpx](https://github.com/openclaw/acpx) und haelt eine einzige persistente Sitzung ueber alle 23 Pipeline-Stufen aufrecht.
| Agent | Befehl | Hinweise |
|-------|--------|----------|
| Claude Code | `claude` | Anthropic |
| Codex CLI | `codex` | OpenAI |
| Copilot CLI | `gh` | GitHub |
| Gemini CLI | `gemini` | Google |
| OpenCode | `opencode` | SST |
| Kimi CLI | `kimi` | Moonshot |
```yaml
# config.yaml — ACP-Beispiel
llm:
provider: "acp"
acp:
agent: "claude" # Jeder ACP-kompatible Agent-CLI-Befehl
cwd: "." # Arbeitsverzeichnis fuer den Agenten
# Kein base_url oder api_key noetig — der Agent verwaltet seine eigene Authentifizierung.
```
```bash
# Einfach ausfuehren — der Agent verwendet seine eigenen Anmeldedaten
researchclaw run --config config.yaml --topic "Your research idea" --auto-approve
```
### 🛠️ Weitere Ausfuehrungsmoeglichkeiten
| Methode | Anleitung |
|---------|-----------|
| **Standalone CLI** | `researchclaw setup` → `researchclaw init` → `researchclaw run --topic "..." --auto-approve` |
| **Python API** | `from researchclaw.pipeline import Runner; Runner(config).run()` |
| **Claude Code** | Liest `RESEARCHCLAW_CLAUDE.md` — sage einfach *"Run research on [Thema]"* |
| **Copilot CLI** | `researchclaw run --topic "..."` mit `llm.acp.agent: "gh"` |
| **OpenCode** | Liest `.claude/skills/` — gleiche natuerliche Sprachschnittstelle |
| **Jeder KI-CLI** | Uebergib `RESEARCHCLAW_AGENTS.md` als Kontext → Agent bootstrappt automatisch |
---
## 🔬 Pipeline: 23 Stufen, 8 Phasen
```
Phase A: Forschungsplanung Phase E: Experimentausfuehrung
1. TOPIC_INIT 12. EXPERIMENT_RUN
2. PROBLEM_DECOMPOSE 13. ITERATIVE_REFINE ← Selbstheilung
Phase B: Literaturrecherche Phase F: Analyse & Entscheidung
3. SEARCH_STRATEGY 14. RESULT_ANALYSIS ← Multi-Agent
4. LITERATURE_COLLECT ← echte API 15. RESEARCH_DECISION ← PIVOT/REFINE
5. LITERATURE_SCREEN [Gate]
6. KNOWLEDGE_EXTRACT Phase G: Papiererstellung
16. PAPER_OUTLINE
Phase C: Wissenssynthese 17. PAPER_DRAFT
7. SYNTHESIS 18. PEER_REVIEW ← Evidenzpruefung
8. HYPOTHESIS_GEN ← Debatte 19. PAPER_REVISION
Phase D: Experimentdesign Phase H: Finalisierung
9. EXPERIMENT_DESIGN [Gate] 20. QUALITY_GATE [Gate]
10. CODE_GENERATION 21. KNOWLEDGE_ARCHIVE
11. RESOURCE_PLANNING 22. EXPORT_PUBLISH ← LaTeX
23. CITATION_VERIFY ← Relevanzpruefung
```
> **Gate-Stufen** (5, 9, 20) pausieren fuer menschliche Genehmigung oder werden mit `--auto-approve` automatisch genehmigt. Bei Ablehnung wird die Pipeline zurueckgesetzt.
> **Entscheidungsschleifen**: Stufe 15 kann REFINE (→ Stufe 13) oder PIVOT (→ Stufe 8) ausloesen, mit automatischer Artefakt-Versionierung.
<details>
<summary>📋 Was jede Phase bewirkt</summary>
| Phase | Beschreibung |
|-------|-------------|
| **A: Planung** | LLM zerlegt das Thema in einen strukturierten Problembaum mit Forschungsfragen |
| **A+: Hardware** | Automatische GPU-Erkennung (NVIDIA CUDA / Apple MPS / nur CPU), Warnung bei eingeschraenkter Hardware, Codegenerierung wird entsprechend angepasst |
| **B: Literatur** | Multi-Source-Suche (OpenAlex → Semantic Scholar → arXiv) nach echten Papern, Relevanzscreening, Extraktion von Wissenskarten |
| **C: Synthese** | Clustering der Ergebnisse, Identifizierung von Forschungsluecken, Generierung testbarer Hypothesen via Multi-Agenten-Debatte |
| **D: Design** | Experimentplan entwerfen, hardwarebewussten ausfuehrbaren Python-Code generieren (GPU-Stufe → Paketauswahl), Ressourcenbedarf schaetzen |
| **E: Ausfuehrung** | Experimente in Sandbox ausfuehren, NaN/Inf und Laufzeitfehler erkennen, Code via gezielter LLM-Reparatur selbst heilen |
| **F: Analyse** | Multi-Agenten-Analyse der Ergebnisse; autonome PROCEED / REFINE / PIVOT Entscheidung mit Begruendung |
| **G: Schreiben** | Gliederung → abschnittsweises Verfassen (5.000-6.500 Woerter) → Peer-Review (mit Methodik-Evidenz-Konsistenz) → Revision mit Laengenpruefung |
| **H: Finalisierung** | Qualitaets-Gate, Wissensarchivierung, LaTeX-Export mit Konferenztemplate, Zitationsintegritaets- und Relevanzpruefung |
</details>
---
## ✨ Hauptfunktionen
| Funktion | Beschreibung |
|----------|-------------|
| **📚 Multi-Source-Literatur** | Echte Paper von OpenAlex, Semantic Scholar und arXiv — Abfrageerweiterung, Deduplizierung, Circuit Breaker mit Graceful Degradation |
| **🔍 4-Schicht-Zitationsverifikation** | arXiv-ID-Pruefung → CrossRef/DataCite-DOI → Semantic-Scholar-Titelabgleich → LLM-Relevanzbewertung. Halluzinierte Refs automatisch entfernt. |
| **🖥️ Hardwarebewusste Ausfuehrung** | Automatische GPU-Erkennung (NVIDIA CUDA / Apple MPS / nur CPU) und Anpassung von Codegenerierung, Imports und Experimentumfang |
| **🦾 OpenCode Beast Mode** | Komplexe Experimente werden automatisch an [OpenCode](https://github.com/anomalyco/opencode) weitergeleitet — generiert Multi-File-Projekte mit individuellen Architekturen, Trainingsschleifen und Ablationsstudien. Installation ueber `researchclaw setup`. |
| **🧪 Sandbox-Experimente** | AST-validierter Code, unveraenderlicher Harness, NaN/Inf-Schnellabbruch, selbstheilende Reparatur, iterative Verfeinerung (bis zu 10 Runden), Teilergebnis-Erfassung |
| **📝 Konferenzqualitaet** | NeurIPS/ICML/ICLR-Templates, abschnittsweises Verfassen (5.000-6.500 Woerter), Anti-Fabrikationsschutz, Revisions-Laengenschutz, Anti-Disclaimer-Durchsetzung |
| **📐 Template-Umschaltung** | `neurips_2025`, `iclr_2026`, `icml_2026` — Markdown → LaTeX mit Mathematik, Tabellen, Abbildungen, Querverweisen, `\cite{}` |
| **🚦 Qualitaets-Gates** | 3 Human-in-the-Loop-Gates (Stufen 5, 9, 20) mit Rollback. Ueberspringen mit `--auto-approve`. |
---
## 🧠 MetaClaw-Integration
**AutoResearchClaw + [MetaClaw](https://github.com/aiming-lab/MetaClaw) = Eine Pipeline, die aus jedem Durchlauf lernt.**
MetaClaw fuegt **durchlaufuebergreifenden Wissenstransfer** zu AutoResearchClaw hinzu. Wenn aktiviert, erfasst die Pipeline automatisch Erkenntnisse aus Fehlern und Warnungen, konvertiert sie in wiederverwendbare Skills und injiziert diese Skills in alle 23 Pipeline-Stufen bei nachfolgenden Durchlaeufen — damit dieselben Fehler nie wiederholt werden.
### Funktionsweise
```
Durchlauf N wird ausgefuehrt → Fehler/Warnungen als Lektionen erfasst
↓
MetaClaw Lektion → Skill-Konvertierung
↓
arc-* Skill-Dateien in ~/.metaclaw/skills/ gespeichert
↓
Durchlauf N+1 → build_overlay() injiziert Skills in jeden LLM-Prompt
↓
LLM vermeidet bekannte Fallstricke → hoehere Qualitaet, weniger Wiederholungen
```
### Schnelleinrichtung
```bash
# 1. MetaClaw installieren (falls nicht vorhanden)
pip install metaclaw
# 2. In der Konfiguration aktivieren
```
```yaml
# config.arc.yaml
metaclaw_bridge:
enabled: true
proxy_url: "http://localhost:30000" # MetaClaw-Proxy (optional)
skills_dir: "~/.metaclaw/skills" # Wo Skills gespeichert werden
fallback_url: "https://api.openai.com/v1" # Direkter LLM-Fallback
fallback_api_key: "" # API-Schluessel fuer Fallback-URL
lesson_to_skill:
enabled: true
min_severity: "warning" # Warnungen + Fehler konvertieren
max_skills_per_run: 3
```
```bash
# 3. Wie gewohnt ausfuehren — MetaClaw arbeitet transparent
researchclaw run --config config.arc.yaml --topic "Your idea" --auto-approve
```
Nach jedem Durchlauf kannst du `~/.metaclaw/skills/arc-*/SKILL.md` pruefen, um die erlernten Skills deiner Pipeline zu sehen.
### Experimentergebnisse
In kontrollierten A/B-Experimenten (gleiches Thema, gleiches LLM, gleiche Konfiguration):
| Metrik | Baseline | Mit MetaClaw | Verbesserung |
|--------|----------|--------------|--------------|
| Stufen-Wiederholungsrate | 10.5% | 7.9% | **-24.8%** |
| Anzahl REFINE-Zyklen | 2.0 | 1.2 | **-40.0%** |
| Pipeline-Stufenabschluss | 18/19 | 19/19 | **+5.3%** |
| Gesamtrobustheitswert (Komposit) | 0.714 | 0.845 | **+18.3%** |
> Der Komposit-Robustheitswert ist ein gewichteter Durchschnitt aus Stufenabschlussrate (40%), Wiederholungsreduktion (30%) und REFINE-Zykluseffizienz (30%).
### Abwaertskompatibilitaet
- **Standard: AUS.** Wenn `metaclaw_bridge` fehlt oder `enabled: false`, verhaelt sich die Pipeline exakt wie zuvor.
- **Keine neuen Abhaengigkeiten.** MetaClaw ist optional — die Kern-Pipeline funktioniert ohne.
- **Alle 1.823 bestehenden Tests bestehen** mit dem Integrationscode.
---
## ⚙️ Konfigurationsreferenz
<details>
<summary>Klicken zum Aufklappen der vollstaendigen Konfigurationsreferenz</summary>
```yaml
# === Projekt ===
project:
name: "my-research" # Projektbezeichner
mode: "docs-first" # docs-first | semi-auto | full-auto
# === Forschung ===
research:
topic: "..." # Forschungsthema (erforderlich)
domains: ["ml", "nlp"] # Forschungsdomaenen fuer Literatursuche
daily_paper_count: 8 # Ziel-Paperzahl pro Suchabfrage
quality_threshold: 4.0 # Mindestqualitaetswert fuer Paper
# === Laufzeit ===
runtime:
timezone: "America/New_York" # Fuer Zeitstempel
max_parallel_tasks: 3 # Limit gleichzeitiger Experimente
approval_timeout_hours: 12 # Gate-Stufen-Timeout
retry_limit: 2 # Wiederholungsanzahl bei Stufenfehler
# === LLM ===
llm:
provider: "openai-compatible" # openai | openrouter | deepseek | minimax | acp | openai-compatible
base_url: "https://..." # API-Endpunkt (erforderlich fuer openai-compatible)
api_key_env: "OPENAI_API_KEY" # Umgebungsvariable fuer API-Schluessel (erforderlich fuer openai-compatible)
api_key: "" # Oder Schluessel direkt eintragen
primary_model: "gpt-4o" # Primaeres Modell
fallback_models: ["gpt-4o-mini"] # Fallback-Kette
s2_api_key: "" # Semantic Scholar API-Schluessel (optional, hoehere Rate-Limits)
acp: # Nur verwendet wenn provider: "acp"
agent: "claude" # ACP-Agent-CLI-Befehl (claude, codex, gemini, etc.)
cwd: "." # Arbeitsverzeichnis fuer den Agenten
# === Experiment ===
experiment:
mode: "sandbox" # simulated | sandbox | docker | ssh_remote
time_budget_sec: 300 # Max. Ausfuehrungszeit pro Durchlauf (Standard: 300s)
max_iterations: 10 # Max. Optimierungsiterationen
metric_key: "val_loss" # Primaerer Metrikname
metric_direction: "minimize" # minimize | maximize
sandbox:
python_path: ".venv/bin/python"
gpu_required: false
allowed_imports: [math, random, json, csv, numpy, torch, sklearn]
max_memory_mb: 4096
docker:
image: "researchclaw/experiment:latest"
network_policy: "setup_only" # none | setup_only | pip_only | full
gpu_enabled: true
memory_limit_mb: 8192
auto_install_deps: true # Automatische Import-Erkennung → requirements.txt
ssh_remote:
host: "" # GPU-Server-Hostname
gpu_ids: [] # Verfuegbare GPU-IDs
remote_workdir: "/tmp/researchclaw_experiments"
opencode: # OpenCode Beast Mode (auto-installiert ueber `researchclaw setup`)
enabled: true # Hauptschalter (Standard: true)
auto: true # Auto-Ausloesung ohne Bestaetigung (Standard: true)
complexity_threshold: 0.2 # 0.0-1.0 — hoeher = nur bei komplexen Experimenten ausloesen
model: "" # Modell ueberschreiben (leer = llm.primary_model verwenden)
timeout_sec: 600 # Max. Sekunden fuer OpenCode-Generierung
max_retries: 1 # Wiederholungsanzahl bei Fehler
workspace_cleanup: true # Temporaeren Workspace nach Sammlung entfernen
# === Export ===
export:
target_conference: "neurips_2025" # neurips_2025 | iclr_2026 | icml_2026
authors: "Anonymous"
bib_file: "references"
# === Prompts ===
prompts:
custom_file: "" # Pfad zur benutzerdefinierten Prompts-YAML (leer = Standardwerte)
# === Sicherheit ===
security:
hitl_required_stages: [5, 9, 20] # Stufen, die menschliche Genehmigung erfordern
allow_publish_without_approval: false
redact_sensitive_logs: true
# === Wissensdatenbank ===
knowledge_base:
backend: "markdown" # markdown | obsidian
root: "docs/kb"
# === Benachrichtigungen ===
notifications:
channel: "console" # console | discord | slack
target: ""
# === MetaClaw Bridge (Optional) ===
metaclaw_bridge:
enabled: false # Auf true setzen fuer durchlaufuebergreifendes Lernen
proxy_url: "http://localhost:30000" # MetaClaw-Proxy-URL
skills_dir: "~/.metaclaw/skills" # Wo arc-* Skills gespeichert werden
fallback_url: "" # Direkter LLM-Fallback wenn Proxy nicht erreichbar
fallback_api_key: "" # API-Schluessel fuer Fallback-Endpunkt
lesson_to_skill:
enabled: true # Lektionen automatisch in Skills konvertieren
min_severity: "warning" # Mindestschwere fuer Konvertierung
max_skills_per_run: 3 # Max. neue Skills pro Pipeline-Durchlauf
# === OpenClaw Bridge ===
openclaw_bridge:
use_cron: false # Geplante Forschungsdurchlaeufe
use_message: false # Fortschrittsbenachrichtigungen
use_memory: false # Sitzungsuebergreifende Wissenspersistenz
use_sessions_spawn: false # Parallele Sub-Sessions starten
use_web_fetch: false # Live-Websuche
use_browser: false # Browserbasierte Paper-Sammlung
```
</details>
---
## 🙏 Danksagungen
Inspiriert von:
- 🔬 [AI Scientist](https://github.com/SakanaAI/AI-Scientist) (Sakana AI) — Pionier der automatisierten Forschung
- 🧠 [AutoResearch](https://github.com/karpathy/autoresearch) (Andrej Karpathy) — End-to-End-Forschungsautomatisierung
- 🌐 [FARS](https://analemma.ai/blog/introducing-fars/) (Analemma) — Fully Automated Research System
---
## 📄 Lizenz
MIT — siehe [LICENSE](../LICENSE) fuer Details.
---
## 📌 Zitation
Wenn du AutoResearchClaw nuetzlich findest, zitiere bitte:
```bibtex
@misc{liu2026autoresearchclaw,
author = {Liu, Jiaqi and Xia, Peng and Han, Siwei and Qiu, Shi and Zhang, Letian and Chen, Guiming and Tu, Haoqin and Yang, Xinyu and and Zhou, Jiawei and Zhu, Hongtu and Li, Yun and Zhou, Yuyin and Zheng, Zeyu and Xie, Cihang and Ding, Mingyu and Yao, Huaxiu},
title = {AutoResearchClaw: Fully Autonomous Research from Idea to Paper},
year = {2026},
organization = {GitHub},
url = {https://github.com/aiming-lab/AutoResearchClaw},
}
```
<p align="center">
<sub>Gebaut mit 🦞 vom AutoResearchClaw-Team</sub>
</p>
================================================
FILE: docs/README_ES.md
================================================
<p align="center">
<img src="../image/logo.png" width="700" alt="AutoResearchClaw Logo">
</p>
<h2 align="center"><b>Comparte una idea. Obten un articulo. Totalmente autonomo & autoevolutivo.</b></h2>
<p align="center">
<b><i><font size="5">Chatea con <a href="#-integracion-con-openclaw">OpenClaw</a>: "Investiga X" → hecho.</font></i></b>
</p>
<p align="center">
<img src="../image/framework_v2.png" width="100%" alt="AutoResearchClaw Framework">
</p>
<p align="center">
<a href="../LICENSE"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="MIT License"></a>
<a href="https://python.org"><img src="https://img.shields.io/badge/Python-3.11%2B-3776AB?logo=python&logoColor=white" alt="Python 3.11+"></a>
<a href="#testing"><img src="https://img.shields.io/badge/Tests-1823%20passed-brightgreen?logo=pytest&logoColor=white" alt="1823 Tests Passed"></a>
<a href="https://github.com/aiming-lab/AutoResearchClaw"><img src="https://img.shields.io/badge/GitHub-AutoResearchClaw-181717?logo=github" alt="GitHub"></a>
<a href="#-integracion-con-openclaw"><img src="https://img.shields.io/badge/OpenClaw-Compatible-ff4444?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTEyIDJDNi40OCAyIDIgNi40OCAyIDEyczQuNDggMTAgMTAgMTAgMTAtNC40OCAxMC0xMFMxNy41MiAyIDEyIDJ6IiBmaWxsPSJ3aGl0ZSIvPjwvc3ZnPg==" alt="OpenClaw Compatible"></a>
<a href="https://discord.gg/u4ksqW5P"><img src="https://img.shields.io/badge/Discord-Join%20Community-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
</p>
<p align="center">
<a href="../README.md">🇺🇸 English</a> ·
<a href="README_CN.md">🇨🇳 中文</a> ·
<a href="README_JA.md">🇯🇵 日本語</a> ·
<a href="README_KO.md">🇰🇷 한국어</a> ·
<a href="README_FR.md">🇫🇷 Français</a> ·
<a href="README_DE.md">🇩🇪 Deutsch</a> ·
<a href="README_ES.md">🇪🇸 Español</a> ·
<a href="README_PT.md">🇧🇷 Português</a> ·
<a href="README_RU.md">🇷🇺 Русский</a> ·
<a href="README_AR.md">🇸🇦 العربية</a>
</p>
<p align="center">
<a href="showcase/SHOWCASE.md">🏆 Galeria de articulos</a> · <a href="integration-guide.md">📖 Guia de integracion</a> · <a href="https://discord.gg/u4ksqW5P">💬 Comunidad Discord</a>
</p>
---
<table>
<tr>
<td width="18%">
<a href="showcase/SHOWCASE.md"><img src="showcase/thumbnails/paper_I_random_matrix-01.png" width="120" alt="Sample Paper"/></a>
</td>
<td valign="middle">
<b>🏆 Galeria de articulos generados</b><br><br>
<b>8 articulos en 8 dominios</b> — matematicas, estadistica, biologia, computacion, NLP, RL, vision, robustez — generados de forma completamente autonoma sin intervencion humana.<br><br>
<a href="showcase/SHOWCASE.md"><img src="https://img.shields.io/badge/View_Full_Showcase_→-All_8_Papers-d73a49?style=for-the-badge" alt="View Showcase"></a>
</td>
</tr>
</table>
---
> **🧪 Buscamos testers!** Prueba el pipeline con tu propia idea de investigacion — de cualquier campo — y [cuentanos que piensas](TESTER_GUIDE.md). Tu feedback da forma directamente a la proxima version. **[→ Testing Guide](TESTER_GUIDE.md)** | **[→ 中文测试指南](TESTER_GUIDE_CN.md)** | **[→ 日本語テストガイド](TESTER_GUIDE_JA.md)**
---
## 🔥 News
- **[03/22/2026]** [v0.3.2](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.2) — **Soporte multiplataforma + estabilidad mayor** — AutoResearchClaw ahora funciona con cualquier agente compatible con ACP (Claude Code, Codex CLI, Copilot CLI, Gemini CLI, Kimi CLI) y soporta plataformas de mensajeria (Discord, Telegram, Lark, WeChat) via el puente OpenClaw. Nuevo backend de generacion de codigo CLI-agent que delega las Stages 10 y 13 a agentes CLI externos con control de presupuesto y gestion de timeouts. Incluye sistema anti-fabricacion (VerifiedRegistry + bucle de diagnostico y reparacion), 100+ correcciones de bugs, refactorizacion modular del executor, auto-deteccion de `--resume`, endurecimiento de reintentos LLM y correcciones de la comunidad.
- **[03/18/2026]** [v0.3.1](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.1) — **OpenCode Beast Mode + Community Contributions** — New "Beast Mode" routes complex code generation to [OpenCode](https://github.com/anomalyco/opencode) with automatic complexity scoring and graceful fallback. Added Novita AI provider support, thread-safety hardening, improved LLM output parsing robustness, and 20+ bug fixes from community PRs and internal audit.
- **[03/17/2026]** [v0.3.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.0) — **MetaClaw Integration** — AutoResearchClaw now supports [MetaClaw](https://github.com/aiming-lab/MetaClaw) cross-run learning: pipeline failures → structured lessons → reusable skills, injected into all 23 stages. **+18.3%** robustness in controlled experiments. Opt-in (`metaclaw_bridge.enabled: true`), fully backward-compatible. See [Integration Guide](#-integracion-metaclaw).
- **[03/16/2026]** [v0.2.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.2.0) — Three multi-agent subsystems (CodeAgent, BenchmarkAgent, FigureAgent), hardened Docker sandbox with network-policy-aware execution, 4-round paper quality audit (AI-slop detection, 7-dim review scoring, NeurIPS checklist), and 15+ bug fixes from production runs.
- **[03/15/2026]** [v0.1.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.1.0) — We release AutoResearchClaw: a fully autonomous 23-stage research pipeline that turns a single research idea into a conference-ready paper. No human intervention required.
---
## ⚡ Un comando. Un articulo.
```bash
pip install -e . && researchclaw setup && researchclaw init && researchclaw run --topic "Your research idea here" --auto-approve
```
---
## 🤔 Que es esto?
**Tu lo piensas. AutoResearchClaw lo escribe.**
Proporciona un tema de investigacion — recibe un articulo academico completo con literatura real de OpenAlex, Semantic Scholar y arXiv, experimentos en sandbox adaptados al hardware (deteccion automatica GPU/MPS/CPU), analisis estadistico, revision multi-agentes, y LaTeX listo para conferencia orientado a NeurIPS/ICML/ICLR. Sin supervision. Sin copiar y pegar. Sin referencias alucinadas.
<table>
<tr><td>📄</td><td><code>paper_draft.md</code></td><td>Articulo academico completo (Introduccion, Trabajo relacionado, Metodo, Experimentos, Resultados, Conclusion)</td></tr>
<tr><td>📐</td><td><code>paper.tex</code></td><td>LaTeX listo para conferencia (plantillas NeurIPS / ICLR / ICML)</td></tr>
<tr><td>📚</td><td><code>references.bib</code></td><td>Referencias BibTeX reales de OpenAlex, Semantic Scholar y arXiv — auto-depuradas para coincidir con las citas en linea</td></tr>
<tr><td>🔍</td><td><code>verification_report.json</code></td><td>Verificacion de integridad + relevancia de citas en 4 capas (arXiv, CrossRef, DataCite, LLM)</td></tr>
<tr><td>🧪</td><td><code>experiment runs/</code></td><td>Codigo generado + resultados en sandbox + metricas JSON estructuradas</td></tr>
<tr><td>📊</td><td><code>charts/</code></td><td>Graficos de comparacion de condiciones auto-generados con barras de error e intervalos de confianza</td></tr>
<tr><td>📝</td><td><code>reviews.md</code></td><td>Revision por pares multi-agente con verificacion de consistencia metodologia-evidencia</td></tr>
<tr><td>🧬</td><td><code>evolution/</code></td><td>Lecciones de auto-aprendizaje extraidas de cada ejecucion</td></tr>
<tr><td>📦</td><td><code>deliverables/</code></td><td>Todos los entregables finales en una sola carpeta — listos para compilar en Overleaf</td></tr>
</table>
El pipeline se ejecuta **de principio a fin sin intervencion humana**. Cuando los experimentos fallan, se auto-repara. Cuando las hipotesis no se sostienen, pivotea. Cuando las citas son falsas, las elimina.
🌍 **Ejecutalo en cualquier lugar.** AutoResearchClaw no esta atado a una sola plataforma. Usalo de forma independiente por CLI, conectalo a [OpenClaw](https://github.com/openclaw/openclaw), o integralo con cualquier agente compatible con ACP — 🤖 Claude Code, 💻 Codex CLI, 🐙 Copilot CLI, ♊ Gemini CLI, 🌙 Kimi CLI, y mas. Gracias al puente de mensajeria de OpenClaw, puedes iniciar una investigacion completa desde 💬 Discord, ✈️ Telegram, 🐦 Lark (飞书), 💚 WeChat, o cualquier plataforma que tu equipo ya utilice. Un tema de entrada, un paper de salida — sin importar donde lo escribas.
---
## 🚀 Inicio rapido
```bash
# 1. Clonar e instalar
git clone https://github.com/aiming-lab/AutoResearchClaw.git
cd AutoResearchClaw
python3 -m venv .venv && source .venv/bin/activate
pip install -e .
# 2. Setup (interactivo — instala OpenCode beast mode, verifica Docker/LaTeX)
researchclaw setup
# 3. Configurar
researchclaw init # Interactivo: elegir proveedor LLM, crea config.arc.yaml
# O manualmente: cp config.researchclaw.example.yaml config.arc.yaml
# 4. Ejecutar
export OPENAI_API_KEY="sk-..."
researchclaw run --config config.arc.yaml --topic "Your research idea" --auto-approve
```
Salida → `artifacts/rc-YYYYMMDD-HHMMSS-<hash>/deliverables/` — LaTeX listo para compilar, BibTeX, codigo experimental, graficos.
<details>
<summary>📝 Configuracion minima requerida</summary>
```yaml
project:
name: "my-research"
research:
topic: "Your research topic here"
llm:
base_url: "https://api.openai.com/v1"
api_key_env: "OPENAI_API_KEY"
primary_model: "gpt-4o"
fallback_models: ["gpt-4o-mini"]
experiment:
mode: "sandbox"
sandbox:
python_path: ".venv/bin/python"
```
</details>
---
## 🧠 Que lo hace diferente
| Capacidad | Como funciona |
|-----------|--------------|
| **🔄 Bucle PIVOT / REFINE** | La etapa 15 decide de forma autonoma: PROCEED, REFINE (ajustar parametros) o PIVOT (nueva direccion). Artefactos auto-versionados. |
| **🤖 Debate multi-agente** | La generacion de hipotesis, el analisis de resultados y la revision por pares utilizan cada uno debate estructurado multi-perspectiva. |
| **🧬 Auto-aprendizaje** | Lecciones extraidas por ejecucion (justificacion de decisiones, advertencias de ejecucion, anomalias de metricas) con decaimiento temporal de 30 dias. Las ejecuciones futuras aprenden de errores pasados. |
| **📚 Base de conocimiento** | Cada ejecucion construye una KB estructurada en 6 categorias (decisiones, experimentos, hallazgos, literatura, preguntas, revisiones). |
| **🛡️ Vigilante Sentinel** | Monitor de calidad en segundo plano: deteccion NaN/Inf, consistencia articulo-evidencia, puntuacion de relevancia de citas, guardia anti-fabricacion. |
---
## 🦞 Integracion con OpenClaw
<table>
<tr>
**AutoResearchClaw es un servicio compatible con [OpenClaw](https://github.com/openclaw/openclaw).** Instalalo en OpenClaw y lanza investigacion autonoma con un solo mensaje — o usalo de forma independiente via CLI, Claude Code o cualquier asistente de programacion con IA.
</tr>
</table>
### 🚀 Uso con OpenClaw (Recomendado)
Si ya usas [OpenClaw](https://github.com/openclaw/openclaw) como tu asistente de IA:
```
1️⃣ Comparte la URL del repositorio de GitHub con OpenClaw
2️⃣ OpenClaw lee automaticamente RESEARCHCLAW_AGENTS.md → comprende el pipeline
3️⃣ Di: "Research [tu tema]"
4️⃣ Listo — OpenClaw clona, instala, configura, ejecuta y devuelve los resultados
```
**Eso es todo.** OpenClaw se encarga de `git clone`, `pip install`, configuracion y ejecucion del pipeline automaticamente. Tu solo chateas.
<details>
<summary>💡 Que sucede internamente</summary>
1. OpenClaw lee `RESEARCHCLAW_AGENTS.md` → aprende el rol de orquestador de investigacion
2. OpenClaw lee `README.md` → comprende la instalacion y la estructura del pipeline
3. OpenClaw copia `config.researchclaw.example.yaml` → `config.yaml`
4. Solicita tu clave API del LLM (o usa tu variable de entorno)
5. Ejecuta `pip install -e .` + `researchclaw run --topic "..." --auto-approve`
6. Devuelve el articulo, LaTeX, experimentos y citas
</details>
### 🔌 Bridge de OpenClaw (Avanzado)
Para una integracion mas profunda, AutoResearchClaw incluye un **sistema de adaptadores bridge** con 6 capacidades opcionales:
```yaml
# config.arc.yaml
openclaw_bridge:
use_cron: true # ⏰ Ejecuciones de investigacion programadas
use_message: true # 💬 Notificaciones de progreso (Discord/Slack/Telegram)
use_memory: true # 🧠 Persistencia de conocimiento entre sesiones
use_sessions_spawn: true # 🔀 Generar sub-sesiones paralelas para etapas concurrentes
use_web_fetch: true # 🌐 Busqueda web en vivo durante la revision de literatura
use_browser: false # 🖥️ Recopilacion de articulos basada en navegador
```
Cada flag activa un protocolo de adaptador tipado. Cuando OpenClaw proporciona estas capacidades, los adaptadores las consumen sin cambios en el codigo. Consulta [`integration-guide.md`](integration-guide.md) para mas detalles.
### ACP (Agent Client Protocol)
AutoResearchClaw puede usar **cualquier agente de programacion compatible con ACP** como backend LLM — sin necesidad de claves API. El agente se comunica via [acpx](https://github.com/openclaw/acpx), manteniendo una sola sesion persistente a traves de las 23 etapas del pipeline.
| Agente | Comando | Notas |
|--------|---------|-------|
| Claude Code | `claude` | Anthropic |
| Codex CLI | `codex` | OpenAI |
| Copilot CLI | `gh` | GitHub |
| Gemini CLI | `gemini` | Google |
| OpenCode | `opencode` | SST |
| Kimi CLI | `kimi` | Moonshot |
```yaml
# config.yaml — ejemplo ACP
llm:
provider: "acp"
acp:
agent: "claude" # Cualquier comando CLI de agente compatible con ACP
cwd: "." # Directorio de trabajo para el agente
# No se necesita base_url ni api_key — el agente gestiona su propia autenticacion.
```
```bash
# Solo ejecuta — el agente usa sus propias credenciales
researchclaw run --config config.yaml --topic "Your research idea" --auto-approve
```
### 🛠️ Otras formas de ejecucion
| Metodo | Como |
|--------|------|
| **CLI independiente** | `researchclaw setup` → `researchclaw init` → `researchclaw run --topic "..." --auto-approve` |
| **API de Python** | `from researchclaw.pipeline import Runner; Runner(config).run()` |
| **Claude Code** | Lee `RESEARCHCLAW_CLAUDE.md` — solo di *"Run research on [tema]"* |
| **Copilot CLI** | `researchclaw run --topic "..."` con `llm.acp.agent: "gh"` |
| **OpenCode** | Lee `.claude/skills/` — la misma interfaz en lenguaje natural |
| **Cualquier CLI de IA** | Proporciona `RESEARCHCLAW_AGENTS.md` como contexto → el agente se auto-configura |
---
## 🔬 Pipeline: 23 etapas, 8 fases
```
Fase A: Alcance de investigacion Fase E: Ejecucion de experimentos
1. TOPIC_INIT 12. EXPERIMENT_RUN
2. PROBLEM_DECOMPOSE 13. ITERATIVE_REFINE ← auto-reparacion
Fase B: Descubrimiento de literatura Fase F: Analisis y decision
3. SEARCH_STRATEGY 14. RESULT_ANALYSIS ← multi-agente
4. LITERATURE_COLLECT ← API real 15. RESEARCH_DECISION ← PIVOT/REFINE
5. LITERATURE_SCREEN [compuerta]
6. KNOWLEDGE_EXTRACT Fase G: Redaccion del articulo
16. PAPER_OUTLINE
Fase C: Sintesis de conocimiento 17. PAPER_DRAFT
7. SYNTHESIS 18. PEER_REVIEW ← verif. evidencia
8. HYPOTHESIS_GEN ← debate 19. PAPER_REVISION
Fase D: Diseno experimental Fase H: Finalizacion
9. EXPERIMENT_DESIGN [compuerta] 20. QUALITY_GATE [compuerta]
10. CODE_GENERATION 21. KNOWLEDGE_ARCHIVE
11. RESOURCE_PLANNING 22. EXPORT_PUBLISH ← LaTeX
23. CITATION_VERIFY ← verif. relevancia
```
> Las **etapas con compuerta** (5, 9, 20) se pausan para aprobacion humana o se auto-aprueban con `--auto-approve`. Al rechazar, el pipeline retrocede.
> **Bucles de decision**: La etapa 15 puede activar REFINE (→ Etapa 13) o PIVOT (→ Etapa 8), con versionado automatico de artefactos.
<details>
<summary>📋 Que hace cada fase</summary>
| Fase | Que sucede |
|------|-----------|
| **A: Alcance** | El LLM descompone el tema en un arbol de problemas estructurado con preguntas de investigacion |
| **A+: Hardware** | Deteccion automatica de GPU (NVIDIA CUDA / Apple MPS / solo CPU), advierte si el hardware local es limitado, adapta la generacion de codigo en consecuencia |
| **B: Literatura** | Busqueda multi-fuente (OpenAlex → Semantic Scholar → arXiv) de articulos reales, filtrado por relevancia, extraccion de fichas de conocimiento |
| **C: Sintesis** | Agrupa hallazgos, identifica brechas de investigacion, genera hipotesis comprobables mediante debate multi-agente |
| **D: Diseno** | Disena plan experimental, genera Python ejecutable adaptado al hardware (nivel de GPU → seleccion de paquetes), estima necesidades de recursos |
| **E: Ejecucion** | Ejecuta experimentos en sandbox, detecta NaN/Inf y errores en tiempo de ejecucion, auto-repara codigo mediante reparacion LLM dirigida |
| **F: Analisis** | Analisis multi-agente de resultados; decision autonoma PROCEED / REFINE / PIVOT con justificacion |
| **G: Redaccion** | Esquema → redaccion seccion por seccion (5,000-6,500 palabras) → revision por pares (con consistencia metodologia-evidencia) → revision con guardia de longitud |
| **H: Finalizacion** | Compuerta de calidad, archivado de conocimiento, exportacion LaTeX con plantilla de conferencia, verificacion de integridad + relevancia de citas |
</details>
---
## ✨ Caracteristicas principales
| Caracteristica | Descripcion |
|----------------|------------|
| **📚 Literatura multi-fuente** | Articulos reales de OpenAlex, Semantic Scholar y arXiv — expansion de consultas, deduplicacion, circuit breaker con degradacion gradual |
| **🔍 Verificacion de citas en 4 capas** | Verificacion de arXiv ID → DOI CrossRef/DataCite → coincidencia de titulo Semantic Scholar → puntuacion de relevancia LLM. Referencias alucinadas auto-eliminadas. |
| **🖥️ Ejecucion adaptada al hardware** | Deteccion automatica de GPU (NVIDIA CUDA / Apple MPS / solo CPU) y adaptacion de la generacion de codigo, imports y escala experimental |
| **🦾 OpenCode Beast Mode** | Los experimentos complejos se enrutan automaticamente a [OpenCode](https://github.com/anomalyco/opencode) — genera proyectos multi-archivo con arquitecturas personalizadas, bucles de entrenamiento y estudios de ablacion. Instalacion via `researchclaw setup`. |
| **🧪 Experimentos en sandbox** | Codigo validado por AST, harness inmutable, fallo rapido NaN/Inf, reparacion auto-curativa, refinamiento iterativo (hasta 10 rondas), captura de resultados parciales |
| **📝 Redaccion de calidad conferencia** | Plantillas NeurIPS/ICML/ICLR, redaccion seccion por seccion (5,000-6,500 palabras), guardia anti-fabricacion, guardia de longitud en revision, enforcement anti-disclaimer |
| **📐 Cambio de plantilla** | `neurips_2025`, `iclr_2026`, `icml_2026` — Markdown → LaTeX con formulas, tablas, figuras, referencias cruzadas, `\cite{}` |
| **🚦 Compuertas de calidad** | 3 compuertas con intervencion humana posible (etapas 5, 9, 20) con retroceso. Omitir con `--auto-approve`. |
---
## 🧠 Integracion MetaClaw
**AutoResearchClaw + [MetaClaw](https://github.com/aiming-lab/MetaClaw) = Un pipeline que aprende de cada ejecucion.**
MetaClaw agrega **transferencia de conocimiento entre ejecuciones** a AutoResearchClaw. Cuando esta habilitado, el pipeline captura automaticamente lecciones de fallos y advertencias, las convierte en habilidades reutilizables, e inyecta esas habilidades en las 23 etapas del pipeline en ejecuciones posteriores — para que los mismos errores nunca se repitan.
### Como funciona
```
Ejecucion N se ejecuta → fallos/advertencias capturados como Lecciones
↓
MetaClaw Leccion → conversion a Habilidad
↓
Archivos de habilidades arc-* almacenados en ~/.metaclaw/skills/
↓
Ejecucion N+1 → build_overlay() inyecta habilidades en cada prompt LLM
↓
El LLM evita trampas conocidas → mayor calidad, menos reintentos
```
### Configuracion rapida
```bash
# 1. Instalar MetaClaw (si no esta instalado)
pip install metaclaw
# 2. Habilitar en tu configuracion
```
```yaml
# config.arc.yaml
metaclaw_bridge:
enabled: true
proxy_url: "http://localhost:30000" # Proxy MetaClaw (opcional)
skills_dir: "~/.metaclaw/skills" # Donde se almacenan las habilidades
fallback_url: "https://api.openai.com/v1" # Fallback directo al LLM
fallback_api_key: "" # Clave API para la URL de fallback
lesson_to_skill:
enabled: true
min_severity: "warning" # Convertir advertencias + errores
max_skills_per_run: 3
```
```bash
# 3. Ejecuta como siempre — MetaClaw funciona de forma transparente
researchclaw run --config config.arc.yaml --topic "Your idea" --auto-approve
```
Despues de cada ejecucion, revisa `~/.metaclaw/skills/arc-*/SKILL.md` para ver las habilidades que tu pipeline ha aprendido.
### Resultados experimentales
En experimentos controlados A/B (mismo tema, mismo LLM, misma configuracion):
| Metrica | Linea base | Con MetaClaw | Mejora |
|---------|------------|--------------|--------|
| Tasa de reintento de etapas | 10.5% | 7.9% | **-24.8%** |
| Conteo de ciclos REFINE | 2.0 | 1.2 | **-40.0%** |
| Completacion de etapas del pipeline | 18/19 | 19/19 | **+5.3%** |
| Puntuacion de robustez global (compuesta) | 0.714 | 0.845 | **+18.3%** |
> La puntuacion de robustez compuesta es un promedio ponderado de la tasa de completacion de etapas (40%), reduccion de reintentos (30%) y eficiencia de ciclos REFINE (30%).
### Retrocompatibilidad
- **Por defecto: DESACTIVADO.** Si `metaclaw_bridge` esta ausente o `enabled: false`, el pipeline se comporta exactamente como antes.
- **Sin nuevas dependencias.** MetaClaw es opcional — el pipeline base funciona sin el.
- **Los 1,823 tests existentes pasan** con el codigo de integracion presente.
---
## ⚙️ Referencia de configuracion
<details>
<summary>Haz clic para expandir la referencia completa de configuracion</summary>
```yaml
# === Proyecto ===
project:
name: "my-research" # Identificador del proyecto
mode: "docs-first" # docs-first | semi-auto | full-auto
# === Investigacion ===
research:
topic: "..." # Tema de investigacion (requerido)
domains: ["ml", "nlp"] # Dominios de investigacion para busqueda de literatura
daily_paper_count: 8 # Articulos objetivo por consulta de busqueda
quality_threshold: 4.0 # Puntuacion minima de calidad para articulos
# === Tiempo de ejecucion ===
runtime:
timezone: "America/New_York" # Para marcas de tiempo
max_parallel_tasks: 3 # Limite de experimentos concurrentes
approval_timeout_hours: 12 # Timeout de etapas con compuerta
retry_limit: 2 # Numero de reintentos por fallo de etapa
# === LLM ===
llm:
provider: "openai-compatible" # openai | openrouter | deepseek | minimax | acp | openai-compatible
base_url: "https://..." # Endpoint de API (requerido para openai-compatible)
api_key_env: "OPENAI_API_KEY" # Variable de entorno para la clave API (requerido para openai-compatible)
api_key: "" # O codifica la clave aqui directamente
primary_model: "gpt-4o" # Modelo principal
fallback_models: ["gpt-4o-mini"] # Cadena de fallback
s2_api_key: "" # Clave API de Semantic Scholar (opcional, mayores limites de tasa)
acp: # Solo se usa cuando provider: "acp"
agent: "claude" # Comando CLI del agente ACP (claude, codex, gemini, etc.)
cwd: "." # Directorio de trabajo para el agente
# === Experimento ===
experiment:
mode: "sandbox" # simulated | sandbox | docker | ssh_remote
time_budget_sec: 300 # Tiempo maximo de ejecucion por corrida (por defecto: 300s)
max_iterations: 10 # Maximo de iteraciones de optimizacion
metric_key: "val_loss" # Nombre de la metrica principal
metric_direction: "minimize" # minimize | maximize
sandbox:
python_path: ".venv/bin/python"
gpu_required: false
allowed_imports: [math, random, json, csv, numpy, torch, sklearn]
max_memory_mb: 4096
docker:
image: "researchclaw/experiment:latest"
network_policy: "setup_only" # none | setup_only | pip_only | full
gpu_enabled: true
memory_limit_mb: 8192
auto_install_deps: true # Deteccion automatica de imports → requirements.txt
ssh_remote:
host: "" # Nombre de host del servidor GPU
gpu_ids: [] # IDs de GPU disponibles
remote_workdir: "/tmp/researchclaw_experiments"
opencode: # OpenCode Beast Mode (auto-instalado via `researchclaw setup`)
enabled: true # Interruptor principal (por defecto: true)
auto: true # Auto-activacion sin confirmacion (por defecto: true)
complexity_threshold: 0.2 # 0.0-1.0 — mas alto = solo se activa para experimentos complejos
model: "" # Modelo a forzar (vacio = usa llm.primary_model)
timeout_sec: 600 # Segundos maximos para generacion OpenCode
max_retries: 1 # Numero de reintentos por fallo
workspace_cleanup: true # Eliminar workspace temporal despues de recoleccion
# === Exportacion ===
export:
target_conference: "neurips_2025" # neurips_2025 | iclr_2026 | icml_2026
authors: "Anonymous"
bib_file: "references"
# === Prompts ===
prompts:
custom_file: "" # Ruta a YAML de prompts personalizados (vacio = valores por defecto)
# === Seguridad ===
security:
hitl_required_stages: [5, 9, 20] # Etapas que requieren aprobacion humana
allow_publish_without_approval: false
redact_sensitive_logs: true
# === Base de conocimiento ===
knowledge_base:
backend: "markdown" # markdown | obsidian
root: "docs/kb"
# === Notificaciones ===
notifications:
channel: "console" # console | discord | slack
target: ""
# === Puente MetaClaw (Opcional) ===
metaclaw_bridge:
enabled: false # Establecer en true para habilitar aprendizaje entre ejecuciones
proxy_url: "http://localhost:30000" # URL del proxy MetaClaw
skills_dir: "~/.metaclaw/skills" # Donde se almacenan las habilidades arc-*
fallback_url: "" # Fallback directo al LLM cuando el proxy esta caido
fallback_api_key: "" # Clave API para el endpoint de fallback
lesson_to_skill:
enabled: true # Convertir lecciones en habilidades automaticamente
min_severity: "warning" # Severidad minima para conversion
max_skills_per_run: 3 # Max de nuevas habilidades por ejecucion del pipeline
# === Bridge de OpenClaw ===
openclaw_bridge:
use_cron: false # Ejecuciones de investigacion programadas
use_message: false # Notificaciones de progreso
use_memory: false # Persistencia de conocimiento entre sesiones
use_sessions_spawn: false # Generar sub-sesiones paralelas
use_web_fetch: false # Busqueda web en vivo
use_browser: false # Recopilacion de articulos basada en navegador
```
</details>
---
## 🙏 Agradecimientos
Inspirado por:
- 🔬 [AI Scientist](https://github.com/SakanaAI/AI-Scientist) (Sakana AI) — Pionero en investigacion automatizada
- 🧠 [AutoResearch](https://github.com/karpathy/autoresearch) (Andrej Karpathy) — Automatizacion de investigacion de principio a fin
- 🌐 [FARS](https://analemma.ai/blog/introducing-fars/) (Analemma) — Sistema de investigacion completamente automatizado
---
## 📄 Licencia
MIT — consulta [LICENSE](../LICENSE) para mas detalles.
---
## 📌 Citacion
Si encuentras AutoResearchClaw util, por favor cita:
```bibtex
@misc{liu2026autoresearchclaw,
author = {Liu, Jiaqi and Xia, Peng and Han, Siwei and Qiu, Shi and Zhang, Letian and Chen, Guiming and Tu, Haoqin and Yang, Xinyu and and Zhou, Jiawei and Zhu, Hongtu and Li, Yun and Zhou, Yuyin and Zheng, Zeyu and Xie, Cihang and Ding, Mingyu and Yao, Huaxiu},
title = {AutoResearchClaw: Fully Autonomous Research from Idea to Paper},
year = {2026},
organization = {GitHub},
url = {https://github.com/aiming-lab/AutoResearchClaw},
}
```
<p align="center">
<sub>Construido con 🦞 por el equipo de AutoResearchClaw</sub>
</p>
================================================
FILE: docs/README_FR.md
================================================
<p align="center">
<img src="../image/logo.png" width="700" alt="AutoResearchClaw Logo">
</p>
<h2 align="center"><b>Discutez une idee. Obtenez un article. Entierement autonome & auto-evolutif.</b></h2>
<p align="center">
<b><i><font size="5">Discutez avec <a href="#-integration-openclaw">OpenClaw</a> : "Recherche X" → termine.</font></i></b>
</p>
<p align="center">
<img src="../image/framework_v2.png" width="100%" alt="AutoResearchClaw Framework">
</p>
<p align="center">
<a href="../LICENSE"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" alt="MIT License"></a>
<a href="https://python.org"><img src="https://img.shields.io/badge/Python-3.11%2B-3776AB?logo=python&logoColor=white" alt="Python 3.11+"></a>
<a href="#testing"><img src="https://img.shields.io/badge/Tests-1823%20passed-brightgreen?logo=pytest&logoColor=white" alt="1823 Tests Passed"></a>
<a href="https://github.com/aiming-lab/AutoResearchClaw"><img src="https://img.shields.io/badge/GitHub-AutoResearchClaw-181717?logo=github" alt="GitHub"></a>
<a href="#-integration-openclaw"><img src="https://img.shields.io/badge/OpenClaw-Compatible-ff4444?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgMCAyNCAyNCI+PHBhdGggZD0iTTEyIDJDNi40OCAyIDIgNi40OCAyIDEyczQuNDggMTAgMTAgMTAgMTAtNC40OCAxMC0xMFMxNy41MiAyIDEyIDJ6IiBmaWxsPSJ3aGl0ZSIvPjwvc3ZnPg==" alt="OpenClaw Compatible"></a>
<a href="https://discord.gg/u4ksqW5P"><img src="https://img.shields.io/badge/Discord-Join%20Community-5865F2?logo=discord&logoColor=white" alt="Discord"></a>
</p>
<p align="center">
<a href="../README.md">🇺🇸 English</a> ·
<a href="README_CN.md">🇨🇳 中文</a> ·
<a href="README_JA.md">🇯🇵 日本語</a> ·
<a href="README_KO.md">🇰🇷 한국어</a> ·
<a href="README_FR.md">🇫🇷 Français</a> ·
<a href="README_DE.md">🇩🇪 Deutsch</a> ·
<a href="README_ES.md">🇪🇸 Español</a> ·
<a href="README_PT.md">🇧🇷 Português</a> ·
<a href="README_RU.md">🇷🇺 Русский</a> ·
<a href="README_AR.md">🇸🇦 العربية</a>
</p>
<p align="center">
<a href="showcase/SHOWCASE.md">🏆 Vitrine des articles</a> · <a href="integration-guide.md">📖 Guide d'integration</a> · <a href="https://discord.gg/u4ksqW5P">💬 Communaute Discord</a>
</p>
---
<table>
<tr>
<td width="18%">
<a href="showcase/SHOWCASE.md"><img src="showcase/thumbnails/paper_I_random_matrix-01.png" width="120" alt="Sample Paper"/></a>
</td>
<td valign="middle">
<b>🏆 Vitrine des articles generes</b><br><br>
<b>8 articles couvrant 8 domaines</b> — mathematiques, statistiques, biologie, informatique, NLP, RL, vision, robustesse — generes de maniere entierement autonome sans aucune intervention humaine.<br><br>
<a href="showcase/SHOWCASE.md"><img src="https://img.shields.io/badge/View_Full_Showcase_→-All_8_Papers-d73a49?style=for-the-badge" alt="View Showcase"></a>
</td>
</tr>
</table>
---
> **🧪 Nous recherchons des testeurs !** Essayez le pipeline avec votre propre idee de recherche — dans n'importe quel domaine — et [dites-nous ce que vous en pensez](TESTER_GUIDE.md). Vos retours faconnent directement la prochaine version. **[→ Testing Guide](TESTER_GUIDE.md)** | **[→ 中文测试指南](TESTER_GUIDE_CN.md)** | **[→ 日本語テストガイド](TESTER_GUIDE_JA.md)**
---
## 🔥 News
- **[03/22/2026]** [v0.3.2](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.2) — **Support multiplateforme + stabilite majeure** — AutoResearchClaw fonctionne desormais avec tout agent compatible ACP (Claude Code, Codex CLI, Copilot CLI, Gemini CLI, Kimi CLI) et supporte les plateformes de messagerie (Discord, Telegram, Lark, WeChat) via le pont OpenClaw. Nouveau backend de generation de code CLI-agent qui delegue les Stages 10 et 13 a des agents CLI externes avec controle de budget et gestion des timeouts. Inclut le systeme anti-fabrication (VerifiedRegistry + boucle diagnostic/reparation), 100+ corrections de bugs, refactoring modulaire de l'executor, auto-detection `--resume`, renforcement des retries LLM, et corrections communautaires.
- **[03/18/2026]** [v0.3.1](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.1) — **OpenCode Beast Mode + Community Contributions** — New "Beast Mode" routes complex code generation to [OpenCode](https://github.com/anomalyco/opencode) with automatic complexity scoring and graceful fallback. Added Novita AI provider support, thread-safety hardening, improved LLM output parsing robustness, and 20+ bug fixes from community PRs and internal audit.
- **[03/17/2026]** [v0.3.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.3.0) — **MetaClaw Integration** — AutoResearchClaw now supports [MetaClaw](https://github.com/aiming-lab/MetaClaw) cross-run learning: pipeline failures → structured lessons → reusable skills, injected into all 23 stages. **+18.3%** robustness in controlled experiments. Opt-in (`metaclaw_bridge.enabled: true`), fully backward-compatible. See [Integration Guide](#-integration-metaclaw).
- **[03/16/2026]** [v0.2.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.2.0) — Three multi-agent subsystems (CodeAgent, BenchmarkAgent, FigureAgent), hardened Docker sandbox with network-policy-aware execution, 4-round paper quality audit (AI-slop detection, 7-dim review scoring, NeurIPS checklist), and 15+ bug fixes from production runs.
- **[03/15/2026]** [v0.1.0](https://github.com/aiming-lab/AutoResearchClaw/releases/tag/v0.1.0) — We release AutoResearchClaw: a fully autonomous 23-stage research pipeline that turns a single research idea into a conference-ready paper. No human intervention required.
---
## ⚡ Une commande. Un article.
```bash
pip install -e . && researchclaw setup && researchclaw init && researchclaw run --topic "Your research idea here" --auto-approve
```
---
## 🤔 De quoi s'agit-il ?
**Vous y pensez. AutoResearchClaw l'ecrit.**
Donnez un sujet de recherche — recevez un article academique complet avec de la vraie litterature provenant d'OpenAlex, Semantic Scholar et arXiv, des experiences en sandbox adaptees au materiel (detection automatique GPU/MPS/CPU), une analyse statistique, une relecture multi-agents, et du LaTeX pret pour les conferences ciblant NeurIPS/ICML/ICLR. Aucune supervision. Aucun copier-coller. Aucune reference hallucinee.
<table>
<tr><td>📄</td><td><code>paper_draft.md</code></td><td>Article academique complet (Introduction, Travaux connexes, Methode, Experiences, Resultats, Conclusion)</td></tr>
<tr><td>📐</td><td><code>paper.tex</code></td><td>LaTeX pret pour les conferences (templates NeurIPS / ICLR / ICML)</td></tr>
<tr><td>📚</td><td><code>references.bib</code></td><td>References BibTeX reelles provenant d'OpenAlex, Semantic Scholar et arXiv — auto-elaguees pour correspondre aux citations dans le texte</td></tr>
<tr><td>🔍</td><td><code>verification_report.json</code></td><td>Verification d'integrite et de pertinence des citations sur 4 couches (arXiv, CrossRef, DataCite, LLM)</td></tr>
<tr><td>🧪</td><td><code>experiment runs/</code></td><td>Code genere + resultats sandbox + metriques JSON structurees</td></tr>
<tr><td>📊</td><td><code>charts/</code></td><td>Graphiques de comparaison de conditions auto-generes avec barres d'erreur et intervalles de confiance</td></tr>
<tr><td>📝</td><td><code>reviews.md</code></td><td>Relecture multi-agents avec verification de coherence methodologie-preuves</td></tr>
<tr><td>🧬</td><td><code>evolution/</code></td><td>Lecons d'auto-apprentissage extraites de chaque execution</td></tr>
<tr><td>📦</td><td><code>deliverables/</code></td><td>Tous les livrables finaux dans un seul dossier — pret a compiler pour Overleaf</td></tr>
</table>
Le pipeline s'execute **de bout en bout sans intervention humaine**. Quand les experiences echouent, il s'auto-repare. Quand les hypotheses ne tiennent pas, il pivote. Quand les citations sont fausses, il les supprime.
🌍 **Utilisable partout.** AutoResearchClaw n'est pas verrouille sur une seule plateforme. Utilisez-le en CLI autonome, connectez-le a [OpenClaw](https://github.com/openclaw/openclaw), ou integrez-le avec n'importe quel agent compatible ACP — 🤖 Claude Code, 💻 Codex CLI, 🐙 Copilot CLI, ♊ Gemini CLI, 🌙 Kimi CLI, et bien d'autres. Grace au pont de messagerie d'OpenClaw, vous pouvez lancer une recherche complete depuis 💬 Discord, ✈️ Telegram, 🐦 Lark (飞书), 💚 WeChat, ou la plateforme que votre equipe utilise deja. Un sujet en entree, un article en sortie — peu importe d'ou vous l'envoyez.
---
## 🚀 Demarrage rapide
```bash
# 1. Cloner & installer
git clone https://github.com/aiming-lab/AutoResearchClaw.git
cd AutoResearchClaw
python3 -m venv .venv && source .venv/bin/activate
pip install -e .
# 2. Setup (interactif — installe OpenCode beast mode, verifie Docker/LaTeX)
researchclaw setup
# 3. Configurer
researchclaw init # Interactif : choisir le fournisseur LLM, cree config.arc.yaml
# Ou manuellement : cp config.researchclaw.example.yaml config.arc.yaml
# 4. Executer
export OPENAI_API_KEY="sk-..."
researchclaw run --config config.arc.yaml --topic "Your research idea" --auto-approve
```
Sortie → `artifacts/rc-YYYYMMDD-HHMMSS-<hash>/deliverables/` — LaTeX pret a compiler, BibTeX, code d'experience, graphiques.
<details>
<summary>📝 Configuration minimale requise</summary>
```yaml
project:
name: "my-research"
research:
topic: "Your research topic here"
llm:
base_url: "https://api.openai.com/v1"
api_key_env: "OPENAI_API_KEY"
primary_model: "gpt-4o"
fallback_models: ["gpt-4o-mini"]
experiment:
mode: "sandbox"
sandbox:
python_path: ".venv/bin/python"
```
</details>
---
## 🧠 Ce qui le distingue
| Capacite | Fonctionnement |
|----------|---------------|
| **🔄 Boucle PIVOT / REFINE** | L'etape 15 decide de maniere autonome : PROCEED, REFINE (ajuster les parametres) ou PIVOT (nouvelle direction). Artefacts auto-versionnes. |
| **🤖 Debat multi-agents** | La generation d'hypotheses, l'analyse de resultats et la relecture par les pairs utilisent chacune un debat structure multi-perspectives. |
| **🧬 Auto-apprentissage** | Lecons extraites a chaque execution (justification des decisions, avertissements d'execution, anomalies de metriques) avec decroissance temporelle a 30 jours. Les executions futures apprennent des erreurs passees. |
| **📚 Base de connaissances** | Chaque execution construit une KB structuree couvrant 6 categories (decisions, experiences, resultats, litterature, questions, relectures). |
| **🛡️ Sentinel Watchdog** | Moniteur de qualite en arriere-plan : detection NaN/Inf, coherence article-preuves, score de pertinence des citations, protection anti-fabrication. |
---
## 🦞 Integration OpenClaw
<table>
<tr>
**AutoResearchClaw est un service compatible [OpenClaw](https://github.com/openclaw/openclaw).** Installez-le dans OpenClaw et lancez une recherche autonome avec un seul message — ou utilisez-le de maniere autonome via CLI, Claude Code, ou tout assistant de codage IA.
</tr>
</table>
### 🚀 Utilisation avec OpenClaw (recommande)
Si vous utilisez deja [OpenClaw](https://github.com/openclaw/openclaw) comme assistant IA :
```
1️⃣ Partagez l'URL du depot GitHub avec OpenClaw
2️⃣ OpenClaw lit automatiquement RESEARCHCLAW_AGENTS.md → comprend le pipeline
3️⃣ Dites : "Research [votre sujet]"
4️⃣ C'est fait — OpenClaw clone, installe, configure, execute et renvoie les resultats
```
**C'est tout.** OpenClaw gere `git clone`, `pip install`, la configuration et l'execution du pipeline automatiquement. Vous n'avez qu'a discuter.
<details>
<summary>💡 Ce qui se passe en coulisses</summary>
1. OpenClaw lit `RESEARCHCLAW_AGENTS.md` → apprend le role d'orchestrateur de recherche
2. OpenClaw lit `README.md` → comprend l'installation et la structure du pipeline
3. OpenClaw copie `config.researchclaw.example.yaml` → `config.yaml`
4. Demande votre cle API LLM (ou utilise votre variable d'environnement)
5. Execute `pip install -e .` + `researchclaw run --topic "..." --auto-approve`
6. Renvoie l'article, le LaTeX, les experiences et les citations
</details>
### 🔌 Pont OpenClaw (avance)
Pour une integration plus poussee, AutoResearchClaw inclut un **systeme d'adaptateurs pont** avec 6 fonctionnalites optionnelles :
```yaml
# config.arc.yaml
openclaw_bridge:
use_cron: true # ⏰ Executions de recherche planifiees
use_message: true # 💬 Notifications de progression (Discord/Slack/Telegram)
use_memory: true # 🧠 Persistance des connaissances inter-sessions
use_sessions_spawn: true # 🔀 Lancement de sous-sessions paralleles pour les etapes concurrentes
use_web_fetch: true # 🌐 Recherche web en direct pendant la revue de litterature
use_browser: false # 🖥️ Collecte d'articles via navigateur
```
Chaque option active un protocole d'adaptateur type. Qua
gitextract_tp1xyq09/
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── config.researchclaw.example.yaml
├── docs/
│ ├── BUG_FIX_DOCUMENT_20260316.md
│ ├── BUG_TRACKER.md
│ ├── CHANGELOG_ANTHROPIC_ADAPTER.md
│ ├── PIPELINE_TEST_LOG_R5.md
│ ├── README_AR.md
│ ├── README_CN.md
│ ├── README_DE.md
│ ├── README_ES.md
│ ├── README_FR.md
│ ├── README_JA.md
│ ├── README_KO.md
│ ├── README_PT.md
│ ├── README_RU.md
│ ├── TESTER_GUIDE.md
│ ├── TESTER_GUIDE_CN.md
│ ├── TESTER_GUIDE_JA.md
│ ├── agent_figure_and_benchmark_plan.md
│ ├── figure_prompts/
│ │ ├── case_a_meta_learning.md
│ │ └── case_b_rlhf_alignment.md
│ ├── integration-guide.md
│ ├── issue_tracker_v9.md
│ ├── iteration_plan_v8.md
│ ├── iteration_showcase_narrative.md
│ ├── metaclaw-integration-plan.md
│ ├── next_phase_showcase_plan.md
│ ├── pipeline_critical_fixes_v8.md
│ ├── rate_limit_fix_plan.md
│ ├── sandbox_environment_fix_plan.md
│ └── showcase/
│ └── SHOWCASE.md
├── prompts.default.yaml
├── pyproject.toml
├── researchclaw/
│ ├── __init__.py
│ ├── __main__.py
│ ├── adapters.py
│ ├── agents/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── benchmark_agent/
│ │ │ ├── __init__.py
│ │ │ ├── acquirer.py
│ │ │ ├── orchestrator.py
│ │ │ ├── selector.py
│ │ │ ├── surveyor.py
│ │ │ └── validator.py
│ │ ├── code_searcher/
│ │ │ ├── __init__.py
│ │ │ ├── agent.py
│ │ │ ├── cache.py
│ │ │ ├── github_client.py
│ │ │ ├── pattern_extractor.py
│ │ │ └── query_gen.py
│ │ └── figure_agent/
│ │ ├── __init__.py
│ │ ├── codegen.py
│ │ ├── critic.py
│ │ ├── decision.py
│ │ ├── integrator.py
│ │ ├── nano_banana.py
│ │ ├── orchestrator.py
│ │ ├── planner.py
│ │ ├── renderer.py
│ │ └── style_config.py
│ ├── assessor/
│ │ ├── __init__.py
│ │ ├── comparator.py
│ │ ├── rubrics.py
│ │ ├── scorer.py
│ │ └── venue_recommender.py
│ ├── calendar/
│ │ ├── __init__.py
│ │ ├── data/
│ │ │ └── conferences.yaml
│ │ ├── deadlines.py
│ │ ├── planner.py
│ │ └── reminder.py
│ ├── cli.py
│ ├── collaboration/
│ │ ├── __init__.py
│ │ ├── dedup.py
│ │ ├── publisher.py
│ │ ├── repository.py
│ │ └── subscriber.py
│ ├── config.py
│ ├── copilot/
│ │ ├── __init__.py
│ │ ├── branching.py
│ │ ├── controller.py
│ │ ├── feedback.py
│ │ └── modes.py
│ ├── dashboard/
│ │ ├── __init__.py
│ │ ├── broadcaster.py
│ │ ├── collector.py
│ │ └── metrics.py
│ ├── data/
│ │ ├── __init__.py
│ │ ├── benchmark_knowledge.yaml
│ │ ├── dataset_registry.yaml
│ │ ├── docker_profiles.yaml
│ │ ├── framework_docs/
│ │ │ ├── axolotl.md
│ │ │ ├── llamafactory.md
│ │ │ ├── peft.md
│ │ │ ├── transformers_training.md
│ │ │ └── trl.md
│ │ └── seminal_papers.yaml
│ ├── docker/
│ │ ├── Dockerfile
│ │ ├── Dockerfile.biology
│ │ ├── Dockerfile.chemistry
│ │ ├── Dockerfile.economics
│ │ ├── Dockerfile.generic
│ │ ├── Dockerfile.math
│ │ ├── Dockerfile.physics
│ │ └── entrypoint.sh
│ ├── domains/
│ │ ├── __init__.py
│ │ ├── adapters/
│ │ │ ├── __init__.py
│ │ │ ├── biology.py
│ │ │ ├── chemistry.py
│ │ │ ├── economics.py
│ │ │ ├── generic.py
│ │ │ ├── math.py
│ │ │ ├── ml.py
│ │ │ ├── neuroscience.py
│ │ │ ├── physics.py
│ │ │ ├── robotics.py
│ │ │ └── security.py
│ │ ├── detector.py
│ │ ├── experiment_schema.py
│ │ ├── profiles/
│ │ │ ├── _generic.yaml
│ │ │ ├── biology_genomics.yaml
│ │ │ ├── biology_protein.yaml
│ │ │ ├── biology_singlecell.yaml
│ │ │ ├── chemistry_molprop.yaml
│ │ │ ├── chemistry_qm.yaml
│ │ │ ├── economics_empirical.yaml
│ │ │ ├── mathematics_numerical.yaml
│ │ │ ├── mathematics_optimization.yaml
│ │ │ ├── ml_compression.yaml
│ │ │ ├── ml_generative.yaml
│ │ │ ├── ml_generic.yaml
│ │ │ ├── ml_graph.yaml
│ │ │ ├── ml_nlp.yaml
│ │ │ ├── ml_rl.yaml
│ │ │ ├── ml_tabular.yaml
│ │ │ ├── ml_vision.yaml
│ │ │ ├── neuroscience_computational.yaml
│ │ │ ├── neuroscience_imaging.yaml
│ │ │ ├── physics_pde.yaml
│ │ │ ├── physics_quantum.yaml
│ │ │ ├── physics_simulation.yaml
│ │ │ ├── robotics_control.yaml
│ │ │ └── security_detection.yaml
│ │ └── prompt_adapter.py
│ ├── evolution.py
│ ├── experiment/
│ │ ├── __init__.py
│ │ ├── agentic_sandbox.py
│ │ ├── code_agent.py
│ │ ├── colab_sandbox.py
│ │ ├── docker_sandbox.py
│ │ ├── evaluators/
│ │ │ ├── __init__.py
│ │ │ └── convergence.py
│ │ ├── factory.py
│ │ ├── git_manager.py
│ │ ├── harness_template.py
│ │ ├── metrics.py
│ │ ├── runner.py
│ │ ├── sandbox.py
│ │ ├── ssh_sandbox.py
│ │ ├── validator.py
│ │ └── visualize.py
│ ├── feedback/
│ │ └── FEEDBACK_ANALYSIS_PROMPT.md
│ ├── hardware.py
│ ├── health.py
│ ├── knowledge/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ └── graph/
│ │ ├── __init__.py
│ │ ├── builder.py
│ │ ├── entities.py
│ │ ├── query.py
│ │ ├── relations.py
│ │ └── visualizer.py
│ ├── literature/
│ │ ├── __init__.py
│ │ ├── arxiv_client.py
│ │ ├── cache.py
│ │ ├── models.py
│ │ ├── novelty.py
│ │ ├── openalex_client.py
│ │ ├── search.py
│ │ ├── semantic_scholar.py
│ │ ├── trends.py
│ │ └── verify.py
│ ├── llm/
│ │ ├── __init__.py
│ │ ├── acp_client.py
│ │ ├── anthropic_adapter.py
│ │ └── client.py
│ ├── mcp/
│ │ ├── __init__.py
│ │ ├── client.py
│ │ ├── registry.py
│ │ ├── server.py
│ │ ├── tools.py
│ │ └── transport.py
│ ├── memory/
│ │ ├── __init__.py
│ │ ├── decay.py
│ │ ├── embeddings.py
│ │ ├── experiment_memory.py
│ │ ├── ideation_memory.py
│ │ ├── retriever.py
│ │ ├── store.py
│ │ └── writing_memory.py
│ ├── metaclaw_bridge/
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── lesson_to_skill.py
│ │ ├── prm_gate.py
│ │ ├── session.py
│ │ ├── skill_feedback.py
│ │ └── stage_skill_map.py
│ ├── overleaf/
│ │ ├── __init__.py
│ │ ├── conflict.py
│ │ ├── formatter.py
│ │ ├── sync.py
│ │ └── watcher.py
│ ├── pipeline/
│ │ ├── __init__.py
│ │ ├── _domain.py
│ │ ├── _helpers.py
│ │ ├── code_agent.py
│ │ ├── contracts.py
│ │ ├── executor.py
│ │ ├── experiment_diagnosis.py
│ │ ├── experiment_repair.py
│ │ ├── opencode_bridge.py
│ │ ├── paper_verifier.py
│ │ ├── runner.py
│ │ ├── stage_impls/
│ │ │ ├── __init__.py
│ │ │ ├── _analysis.py
│ │ │ ├── _code_generation.py
│ │ │ ├── _execution.py
│ │ │ ├── _experiment_design.py
│ │ │ ├── _literature.py
│ │ │ ├── _paper_writing.py
│ │ │ ├── _review_publish.py
│ │ │ ├── _synthesis.py
│ │ │ └── _topic.py
│ │ ├── stages.py
│ │ └── verified_registry.py
│ ├── project/
│ │ ├── __init__.py
│ │ ├── idea_pool.py
│ │ ├── manager.py
│ │ ├── models.py
│ │ └── scheduler.py
│ ├── prompts.py
│ ├── quality.py
│ ├── report.py
│ ├── server/
│ │ ├── __init__.py
│ │ ├── app.py
│ │ ├── dialog/
│ │ │ ├── __init__.py
│ │ │ ├── intents.py
│ │ │ ├── router.py
│ │ │ └── session.py
│ │ ├── middleware/
│ │ │ ├── __init__.py
│ │ │ └── auth.py
│ │ ├── routes/
│ │ │ ├── __init__.py
│ │ │ ├── chat.py
│ │ │ ├── pipeline.py
│ │ │ ├── projects.py
│ │ │ └── voice.py
│ │ └── websocket/
│ │ ├── __init__.py
│ │ ├── events.py
│ │ └── manager.py
│ ├── servers/
│ │ ├── __init__.py
│ │ ├── cloud_executor.py
│ │ ├── dispatcher.py
│ │ ├── monitor.py
│ │ ├── registry.py
│ │ ├── slurm_executor.py
│ │ └── ssh_executor.py
│ ├── skills/
│ │ ├── __init__.py
│ │ ├── builtin/
│ │ │ ├── __init__.py
│ │ │ ├── domain/
│ │ │ │ ├── cv-classification/
│ │ │ │ │ └── SKILL.md
│ │ │ │ ├── cv-detection/
│ │ │ │ │ └── SKILL.md
│ │ │ │ ├── nlp-alignment/
│ │ │ │ │ └── SKILL.md
│ │ │ │ ├── nlp-pretraining/
│ │ │ │ │ └── SKILL.md
│ │ │ │ └── rl-policy-optimization/
│ │ │ │ └── SKILL.md
│ │ │ ├── experiment/
│ │ │ │ ├── experimental-design/
│ │ │ │ │ └── SKILL.md
│ │ │ │ ├── meta-analysis/
│ │ │ │ │ └── SKILL.md
│ │ │ │ └── systematic-review/
│ │ │ │ └── SKILL.md
│ │ │ └── tooling/
│ │ │ ├── data-loading/
│ │ │ │ └── SKILL.md
│ │ │ ├── distributed-training/
│ │ │ │ └── SKILL.md
│ │ │ ├── mixed-precision/
│ │ │ │ └── SKILL.md
│ │ │ └── pytorch-training/
│ │ │ └── SKILL.md
│ │ ├── loader.py
│ │ ├── matcher.py
│ │ ├── registry.py
│ │ └── schema.py
│ ├── templates/
│ │ ├── __init__.py
│ │ ├── compiler.py
│ │ ├── conference.py
│ │ ├── converter.py
│ │ ├── results_table_builder.py
│ │ └── styles/
│ │ ├── iclr_2025/
│ │ │ ├── iclr2025_conference.bst
│ │ │ └── iclr2025_conference.sty
│ │ ├── iclr_2026/
│ │ │ ├── iclr2026_conference.bst
│ │ │ └── iclr2026_conference.sty
│ │ ├── icml_2025/
│ │ │ ├── icml2025.bst
│ │ │ └── icml2025.sty
│ │ ├── icml_2026/
│ │ │ ├── icml2026.bst
│ │ │ └── icml2026.sty
│ │ ├── neurips_2024/
│ │ │ └── neurips_2024.sty
│ │ └── neurips_2025/
│ │ └── neurips_2025.sty
│ ├── trends/
│ │ ├── __init__.py
│ │ ├── auto_topic.py
│ │ ├── daily_digest.py
│ │ ├── feeds.py
│ │ ├── opportunity_finder.py
│ │ └── trend_analyzer.py
│ ├── utils/
│ │ ├── __init__.py
│ │ ├── sanitize.py
│ │ └── thinking_tags.py
│ ├── voice/
│ │ ├── __init__.py
│ │ ├── commands.py
│ │ ├── synthesizer.py
│ │ └── transcriber.py
│ ├── web/
│ │ ├── __init__.py
│ │ ├── _ssrf.py
│ │ ├── agent.py
│ │ ├── crawler.py
│ │ ├── pdf_extractor.py
│ │ ├── scholar.py
│ │ └── search.py
│ ├── wizard/
│ │ ├── __init__.py
│ │ ├── quickstart.py
│ │ ├── templates.py
│ │ └── validator.py
│ └── writing_guide.py
├── scripts/
│ ├── metaclaw_start.sh
│ ├── plot_iteration_showcase.py
│ ├── test_beast_mode_e2e.py
│ ├── test_code_agent_live.py
│ ├── test_code_agent_sandbox.py
│ └── test_codegen_v2.py
├── sentinel.sh
├── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── e2e_docker_sandbox.py
│ ├── e2e_real_llm.py
│ ├── test_anthropic.py
│ ├── test_assessor.py
│ ├── test_benchmark_agent.py
│ ├── test_calendar.py
│ ├── test_cli.py
│ ├── test_code_agent.py
│ ├── test_code_searcher.py
│ ├── test_collaboration.py
│ ├── test_compiler.py
│ ├── test_convergence_evaluator.py
│ ├── test_copilot.py
│ ├── test_decision_agent.py
│ ├── test_domain_detector.py
│ ├── test_entry_point_validation.py
│ ├── test_experiment_diagnosis.py
│ ├── test_experiment_repair.py
│ ├── test_experiment_schema.py
│ ├── test_figure_agent.py
│ ├── test_knowledge_graph.py
│ ├── test_mcp.py
│ ├── test_memory_system.py
│ ├── test_metaclaw_bridge/
│ │ ├── __init__.py
│ │ ├── test_config.py
│ │ ├── test_lesson_to_skill.py
│ │ ├── test_prm_gate.py
│ │ ├── test_session.py
│ │ ├── test_skill_feedback.py
│ │ └── test_stage_skill_map.py
│ ├── test_metric_parser.py
│ ├── test_minimax_provider.py
│ ├── test_neuroscience_domain.py
│ ├── test_opencode_bridge.py
│ ├── test_overleaf.py
│ ├── test_paper_verifier.py
│ ├── test_project_manager.py
│ ├── test_prompt_adapter.py
│ ├── test_rc_adapters.py
│ ├── test_rc_cache.py
│ ├── test_rc_checkpoint.py
│ ├── test_rc_citation_resolve.py
│ ├── test_rc_citation_verify.py
│ ├── test_rc_cli.py
│ ├── test_rc_config.py
│ ├── test_rc_contracts.py
│ ├── test_rc_docker_sandbox.py
│ ├── test_rc_e2e_regression.py
│ ├── test_rc_evolution.py
│ ├── test_rc_executor.py
│ ├── test_rc_hardware.py
│ ├── test_rc_health.py
│ ├── test_rc_kb.py
│ ├── test_rc_literature.py
│ ├── test_rc_llm.py
│ ├── test_rc_novelty.py
│ ├── test_rc_preflight.py
│ ├── test_rc_prompts.py
│ ├── test_rc_quality.py
│ ├── test_rc_report.py
│ ├── test_rc_runner.py
│ ├── test_rc_sanitization.py
│ ├── test_rc_sentinel.py
│ ├── test_rc_stages.py
│ ├── test_rc_templates.py
│ ├── test_rc_validator.py
│ ├── test_results_table_builder.py
│ ├── test_robotics_adapter.py
│ ├── test_servers.py
│ ├── test_skills_library.py
│ ├── test_ssh_and_colab_sandbox.py
│ ├── test_trends.py
│ ├── test_universal_codegen_integration.py
│ ├── test_v6_improvements.py
│ ├── test_verified_registry.py
│ ├── test_web_crawler.py
│ ├── test_web_integration.py
│ ├── test_web_pdf_extractor.py
│ ├── test_web_platform.py
│ ├── test_web_scholar.py
│ └── test_web_search.py
└── website/
├── features.html
├── getting-started.html
├── index.html
├── papers.html
├── pipeline.html
└── style.css
Showing preview only (414K chars total). Download the full file or copy to clipboard to get everything.
SYMBOL INDEX (4464 symbols across 269 files)
FILE: researchclaw/adapters.py
class FetchResponse (line 10) | class FetchResponse:
class BrowserPage (line 17) | class BrowserPage:
class CronAdapter (line 22) | class CronAdapter(Protocol):
method schedule_resume (line 23) | def schedule_resume(self, run_id: str, stage_id: int, reason: str) -> ...
class MessageAdapter (line 26) | class MessageAdapter(Protocol):
method notify (line 27) | def notify(self, channel: str, subject: str, body: str) -> str: ...
class MemoryAdapter (line 30) | class MemoryAdapter(Protocol):
method append (line 31) | def append(self, namespace: str, content: str) -> str: ...
class SessionsAdapter (line 34) | class SessionsAdapter(Protocol):
method spawn (line 35) | def spawn(self, name: str, command: tuple[str, ...]) -> str: ...
class WebFetchAdapter (line 38) | class WebFetchAdapter(Protocol):
method fetch (line 39) | def fetch(self, url: str) -> FetchResponse: ...
class BrowserAdapter (line 42) | class BrowserAdapter(Protocol):
method open (line 43) | def open(self, url: str) -> BrowserPage: ...
class RecordingCronAdapter (line 47) | class RecordingCronAdapter:
method schedule_resume (line 50) | def schedule_resume(self, run_id: str, stage_id: int, reason: str) -> ...
class RecordingMessageAdapter (line 56) | class RecordingMessageAdapter:
method notify (line 59) | def notify(self, channel: str, subject: str, body: str) -> str:
class RecordingMemoryAdapter (line 65) | class RecordingMemoryAdapter:
method append (line 68) | def append(self, namespace: str, content: str) -> str:
class RecordingSessionsAdapter (line 74) | class RecordingSessionsAdapter:
method spawn (line 77) | def spawn(self, name: str, command: tuple[str, ...]) -> str:
class RecordingWebFetchAdapter (line 83) | class RecordingWebFetchAdapter:
method fetch (line 86) | def fetch(self, url: str) -> FetchResponse:
class RecordingBrowserAdapter (line 92) | class RecordingBrowserAdapter:
method open (line 95) | def open(self, url: str) -> BrowserPage:
class MCPMessageAdapter (line 101) | class MCPMessageAdapter:
method notify (line 106) | def notify(self, channel: str, subject: str, body: str) -> str:
class MCPWebFetchAdapter (line 111) | class MCPWebFetchAdapter:
method fetch (line 116) | def fetch(self, url: str) -> FetchResponse:
class AdapterBundle (line 121) | class AdapterBundle:
method from_config (line 130) | def from_config(cls, config: object) -> AdapterBundle:
FILE: researchclaw/agents/base.py
class _LLMResponseLike (line 25) | class _LLMResponseLike(Protocol): # pragma: no cover
class _LLMClientLike (line 32) | class _LLMClientLike(Protocol): # pragma: no cover
method chat (line 33) | def chat(
class AgentStepResult (line 50) | class AgentStepResult:
class BaseAgent (line 65) | class BaseAgent:
method __init__ (line 73) | def __init__(self, llm: _LLMClientLike) -> None:
method _chat (line 81) | def _chat(
method _chat_json (line 102) | def _chat_json(
method _parse_json (line 122) | def _parse_json(text: str) -> dict[str, Any] | None:
method execute (line 163) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
method _make_result (line 167) | def _make_result(
class AgentOrchestrator (line 189) | class AgentOrchestrator:
method __init__ (line 196) | def __init__(self, llm: _LLMClientLike, *, max_iterations: int = 3) ->...
method _accumulate (line 203) | def _accumulate(self, result: AgentStepResult) -> None:
method orchestrate (line 208) | def orchestrate(self, context: dict[str, Any]) -> dict[str, Any]:
FILE: researchclaw/agents/benchmark_agent/acquirer.py
class AcquirerAgent (line 19) | class AcquirerAgent(BaseAgent):
method _generate_data_loader (line 24) | def _generate_data_loader(
method _generate_baseline_code (line 63) | def _generate_baseline_code(
method _generate_setup_script (line 106) | def _generate_setup_script(
method _generate_requirements (line 192) | def _generate_requirements(self, required_pip: list[str]) -> str:
method _strip_fences (line 212) | def _strip_fences(code: str) -> str:
method execute (line 225) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
FILE: researchclaw/agents/benchmark_agent/orchestrator.py
class BenchmarkAgentConfig (line 33) | class BenchmarkAgentConfig:
class BenchmarkPlan (line 58) | class BenchmarkPlan:
method to_dict (line 89) | def to_dict(self) -> dict[str, Any]:
method to_prompt_block (line 108) | def to_prompt_block(self) -> str:
class BenchmarkOrchestrator (line 163) | class BenchmarkOrchestrator(AgentOrchestrator):
method __init__ (line 166) | def __init__(
method _save_artifact (line 201) | def _save_artifact(self, name: str, data: Any) -> None:
method orchestrate (line 215) | def orchestrate(self, context: dict[str, Any]) -> BenchmarkPlan:
FILE: researchclaw/agents/benchmark_agent/selector.py
class SelectorAgent (line 30) | class SelectorAgent(BaseAgent):
method __init__ (line 35) | def __init__(
method _filter_benchmarks (line 58) | def _filter_benchmarks(
method _filter_baselines (line 85) | def _filter_baselines(
method _rank_benchmarks (line 100) | def _rank_benchmarks(
method _rank_baselines (line 120) | def _rank_baselines(
method _select_with_llm (line 133) | def _select_with_llm(
method _resolve_selection (line 187) | def _resolve_selection(
method _inject_required_baselines (line 218) | def _inject_required_baselines(
method execute (line 266) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
FILE: researchclaw/agents/benchmark_agent/surveyor.py
class SurveyorAgent (line 48) | class SurveyorAgent(BaseAgent):
method __init__ (line 53) | def __init__(
method _load_knowledge (line 68) | def _load_knowledge() -> dict[str, Any]:
method _match_domains (line 77) | def _match_domains(self, topic: str) -> list[str]:
method _get_local_candidates (line 89) | def _get_local_candidates(self, domain_ids: list[str]) -> dict[str, Any]:
method _search_hf_datasets (line 113) | def _search_hf_datasets(self, topic: str, domain_ids: list[str]) -> li...
method _extract_search_keywords (line 176) | def _extract_search_keywords(topic: str) -> list[str]:
method _llm_suggest_benchmarks (line 198) | def _llm_suggest_benchmarks(self, topic: str, hypothesis: str) -> dict...
method execute (line 244) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
FILE: researchclaw/agents/benchmark_agent/validator.py
class ValidatorAgent (line 37) | class ValidatorAgent(BaseAgent):
method _check_syntax (line 42) | def _check_syntax(self, code: str, label: str) -> list[str]:
method _check_imports (line 52) | def _check_imports(
method _llm_review (line 94) | def _llm_review(
method execute (line 141) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
FILE: researchclaw/agents/code_searcher/agent.py
class CodeSearchResult (line 33) | class CodeSearchResult:
method to_prompt_context (line 43) | def to_prompt_context(self) -> str:
method to_cache_dict (line 49) | def to_cache_dict(self) -> dict[str, Any]:
method from_cache_dict (line 69) | def from_cache_dict(cls, data: dict[str, Any]) -> CodeSearchResult:
class CodeSearchAgent (line 94) | class CodeSearchAgent:
method __init__ (line 108) | def __init__(
method search (line 122) | def search(
method _analyze_repo (line 249) | def _analyze_repo(self, repo: RepoInfo) -> RepoAnalysis | None:
FILE: researchclaw/agents/code_searcher/cache.py
class SearchCache (line 24) | class SearchCache:
method __init__ (line 34) | def __init__(
method get (line 42) | def get(self, domain_id: str, topic: str) -> dict[str, Any] | None:
method put (line 61) | def put(self, domain_id: str, topic: str, data: dict[str, Any]) -> None:
method clear (line 79) | def clear(self, domain_id: str | None = None) -> int:
method stats (line 95) | def stats(self) -> dict[str, int]:
method _cache_path (line 117) | def _cache_path(self, domain_id: str, topic: str) -> Path:
method _topic_hash (line 121) | def _topic_hash(topic: str) -> str:
FILE: researchclaw/agents/code_searcher/github_client.py
class RepoInfo (line 30) | class RepoInfo:
class CodeSnippet (line 43) | class CodeSnippet:
class RepoAnalysis (line 53) | class RepoAnalysis:
class GitHubClient (line 62) | class GitHubClient:
method __init__ (line 69) | def __init__(self, token: str | None = None) -> None:
method _headers (line 75) | def _headers(self) -> dict[str, str]:
method _rate_limit_wait (line 84) | def _rate_limit_wait(self) -> None:
method _get (line 93) | def _get(self, url: str, params: dict[str, str] | None = None) -> dict...
method search_repos (line 122) | def search_repos(
method search_code (line 176) | def search_code(
method get_file_content (line 224) | def get_file_content(
method get_readme (line 268) | def get_readme(self, repo_full_name: str) -> str | None:
method get_repo_tree (line 286) | def get_repo_tree(
method request_count (line 302) | def request_count(self) -> int:
method has_token (line 306) | def has_token(self) -> bool:
FILE: researchclaw/agents/code_searcher/pattern_extractor.py
class CodePatterns (line 22) | class CodePatterns:
method to_prompt_context (line 31) | def to_prompt_context(self) -> str:
method has_content (line 54) | def has_content(self) -> bool:
function extract_patterns (line 95) | def extract_patterns(
function _llm_extract (line 127) | def _llm_extract(
function _heuristic_extract (line 183) | def _heuristic_extract(snippets: list[str]) -> CodePatterns:
FILE: researchclaw/agents/code_searcher/query_gen.py
function generate_search_queries (line 41) | def generate_search_queries(
function _heuristic_generate (line 77) | def _heuristic_generate(
function _llm_generate (line 116) | def _llm_generate(
function _extract_key_phrases (line 160) | def _extract_key_phrases(text: str, max_words: int = 5) -> str:
FILE: researchclaw/agents/figure_agent/codegen.py
function _esc (line 29) | def _esc(s: str) -> str:
function _is_degenerate_data (line 38) | def _is_degenerate_data(values: list[float]) -> bool:
function _humanize_label (line 75) | def _humanize_label(raw: str) -> str:
class CodeGenAgent (line 404) | class CodeGenAgent(BaseAgent):
method __init__ (line 414) | def __init__(self, llm: Any, *, output_format: str = "python", use_doc...
method execute (line 423) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
method _generate_script (line 493) | def _generate_script(
method _fill_template (line 569) | def _fill_template(
method _fill_bar_template (line 646) | def _fill_bar_template(
method _fill_grouped_bar_template (line 711) | def _fill_grouped_bar_template(
method _fill_heatmap_template (line 756) | def _fill_heatmap_template(
method _llm_generate_script (line 822) | def _llm_generate_script(
method _llm_generate_latex (line 921) | def _llm_generate_latex(
method _strip_fences (line 990) | def _strip_fences(text: str) -> str:
method _strip_latex_fences (line 998) | def _strip_latex_fences(text: str) -> str:
FILE: researchclaw/agents/figure_agent/critic.py
class CriticAgent (line 23) | class CriticAgent(BaseAgent):
method __init__ (line 28) | def __init__(
method execute (line 41) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
method _review_figure (line 124) | def _review_figure(
method _check_numerical_accuracy (line 179) | def _check_numerical_accuracy(
method _check_text_correctness (line 252) | def _check_text_correctness(
method _check_visual_quality (line 309) | def _check_visual_quality(
method _check_rendered_image (line 368) | def _check_rendered_image(
FILE: researchclaw/agents/figure_agent/decision.py
function _safe_priority (line 30) | def _safe_priority(val: object, default: int = 2) -> int:
class FigureDecisionAgent (line 95) | class FigureDecisionAgent(BaseAgent):
method __init__ (line 108) | def __init__(
method execute (line 123) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
method _llm_decide (line 196) | def _llm_decide(
method _parse_decisions (line 253) | def _parse_decisions(self, raw: str) -> list[dict[str, Any]]:
method _heuristic_decide (line 295) | def _heuristic_decide(
method _infer_backend (line 369) | def _infer_backend(figure_type: str) -> str:
method _enforce_bounds (line 380) | def _enforce_bounds(
FILE: researchclaw/agents/figure_agent/integrator.py
class IntegratorAgent (line 35) | class IntegratorAgent(BaseAgent):
method __init__ (line 40) | def __init__(self, llm: Any) -> None:
method execute (line 47) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
method _build_manifest (line 107) | def _build_manifest(
method _section_order (line 153) | def _section_order(section: str) -> int:
method _generate_markdown_refs (line 174) | def _generate_markdown_refs(
method _generate_descriptions (line 194) | def _generate_descriptions(
FILE: researchclaw/agents/figure_agent/nano_banana.py
class NanoBananaAgent (line 61) | class NanoBananaAgent(BaseAgent):
method __init__ (line 70) | def __init__(
method execute (line 110) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
method _build_prompt (line 219) | def _build_prompt(
method _get_type_guidelines (line 243) | def _get_type_guidelines(figure_type: str) -> str:
method _generate_image (line 305) | def _generate_image(
method _generate_via_sdk (line 318) | def _generate_via_sdk(
method _generate_via_rest (line 358) | def _generate_via_rest(
FILE: researchclaw/agents/figure_agent/orchestrator.py
class FigureAgentConfig (line 39) | class FigureAgentConfig:
class FigurePlan (line 70) | class FigurePlan:
method to_dict (line 97) | def to_dict(self) -> dict[str, Any]:
method get_chart_files (line 112) | def get_chart_files(self) -> list[str]:
class FigureOrchestrator (line 126) | class FigureOrchestrator(AgentOrchestrator):
method __init__ (line 129) | def __init__(
method _save_artifact (line 182) | def _save_artifact(self, name: str, data: Any) -> None:
method orchestrate (line 196) | def orchestrate(self, context: dict[str, Any]) -> FigurePlan:
method _run_code_pipeline (line 309) | def _run_code_pipeline(
method _run_nano_banana (line 449) | def _run_nano_banana(
FILE: researchclaw/agents/figure_agent/planner.py
class PlannerAgent (line 85) | class PlannerAgent(BaseAgent):
method __init__ (line 90) | def __init__(
method execute (line 105) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
method _detect_domain (line 156) | def _detect_domain(self, topic: str) -> str:
method _analyze_data (line 172) | def _analyze_data(
method _generate_plan (line 233) | def _generate_plan(
method _fallback_plan (line 325) | def _fallback_plan(
method _augment_plan (line 407) | def _augment_plan(
FILE: researchclaw/agents/figure_agent/renderer.py
function _docker_available (line 38) | def _docker_available() -> bool:
class RendererAgent (line 52) | class RendererAgent(BaseAgent):
method __init__ (line 67) | def __init__(
method execute (line 103) | def execute(self, context: dict[str, Any]) -> AgentStepResult:
method _render_one (line 157) | def _render_one(
method _execute_local (line 256) | def _execute_local(
method _execute_in_docker (line 284) | def _execute_in_docker(
FILE: researchclaw/agents/figure_agent/style_config.py
function get_font_sizes (line 49) | def get_font_sizes(width_key: str = "single_column") -> dict[str, int]:
function get_style_preamble (line 166) | def get_style_preamble(
FILE: researchclaw/assessor/comparator.py
class HistoryComparator (line 14) | class HistoryComparator:
method __init__ (line 17) | def __init__(self, history_dir: Path | None = None):
method _load_history (line 23) | def _load_history(self) -> None:
method record (line 37) | def record(
method _save_history (line 54) | def _save_history(self) -> None:
method compare (line 64) | def compare(
method get_best_run (line 114) | def get_best_run(self) -> dict[str, Any] | None:
method get_history (line 120) | def get_history(self) -> list[dict[str, Any]]:
FILE: researchclaw/assessor/rubrics.py
class Rubric (line 9) | class Rubric:
FILE: researchclaw/assessor/scorer.py
class PaperScorer (line 15) | class PaperScorer:
method __init__ (line 18) | def __init__(
method score (line 26) | async def score(
method _score_dimension (line 69) | async def _score_dimension(
method _parse_score_response (line 102) | def _parse_score_response(
method _heuristic_score (line 120) | def _heuristic_score(
FILE: researchclaw/assessor/venue_recommender.py
class VenueRecommender (line 8) | class VenueRecommender:
method recommend (line 55) | def recommend(
method _get_suggestion (line 96) | def _get_suggestion(venue: str, scores: dict[str, Any]) -> str:
method format_recommendations (line 111) | def format_recommendations(
FILE: researchclaw/calendar/deadlines.py
class Conference (line 19) | class Conference:
method from_dict (line 34) | def from_dict(cls, data: dict[str, Any]) -> Conference:
method next_deadline (line 57) | def next_deadline(self) -> date | None:
method days_until_deadline (line 68) | def days_until_deadline(self) -> int | None:
class ConferenceCalendar (line 76) | class ConferenceCalendar:
method __init__ (line 79) | def __init__(self, conferences: list[Conference] | None = None):
method load_builtin (line 83) | def load_builtin(cls) -> ConferenceCalendar:
method load (line 92) | def load(cls, path: Path | str) -> ConferenceCalendar:
method conferences (line 107) | def conferences(self) -> list[Conference]:
method get_upcoming (line 110) | def get_upcoming(
method get_by_name (line 134) | def get_by_name(self, name: str) -> Conference | None:
method get_by_domain (line 142) | def get_by_domain(self, domain: str) -> list[Conference]:
method format_upcoming (line 146) | def format_upcoming(
FILE: researchclaw/calendar/planner.py
class SubmissionPlanner (line 11) | class SubmissionPlanner:
method __init__ (line 26) | def __init__(self, calendar: ConferenceCalendar):
method plan (line 29) | def plan(
method format_plan (line 74) | def format_plan(
FILE: researchclaw/calendar/reminder.py
class Reminder (line 13) | class Reminder:
class ReminderCalculator (line 23) | class ReminderCalculator:
method __init__ (line 26) | def __init__(
method check (line 32) | def check(
method get_active_reminders (line 64) | def get_active_reminders(
method _classify_urgency (line 96) | def _classify_urgency(days_until: int) -> str:
method format_reminders (line 103) | def format_reminders(self, reminders: list[Reminder]) -> str:
FILE: researchclaw/cli.py
function _is_opencode_installed (line 29) | def _is_opencode_installed() -> bool:
function _is_npm_installed (line 44) | def _is_npm_installed() -> bool:
function _install_opencode (line 49) | def _install_opencode() -> bool:
function _prompt_opencode_install (line 78) | def _prompt_opencode_install() -> bool:
function _resolve_config_or_exit (line 128) | def _resolve_config_or_exit(args: argparse.Namespace) -> Path | None:
function _generate_run_id (line 149) | def _generate_run_id(topic: str) -> str:
function cmd_run (line 155) | def cmd_run(args: argparse.Namespace) -> int:
function cmd_validate (line 307) | def cmd_validate(args: argparse.Namespace) -> int:
function cmd_doctor (line 343) | def cmd_doctor(args: argparse.Namespace) -> int:
function cmd_project (line 356) | def cmd_project(args: argparse.Namespace) -> int:
function cmd_mcp (line 404) | def cmd_mcp(args: argparse.Namespace) -> int:
function cmd_overleaf (line 426) | def cmd_overleaf(args: argparse.Namespace) -> int:
function cmd_serve (line 464) | def cmd_serve(args: argparse.Namespace) -> int:
function cmd_dashboard (line 490) | def cmd_dashboard(args: argparse.Namespace) -> int:
function cmd_wizard (line 516) | def cmd_wizard(args: argparse.Namespace) -> int:
function cmd_init (line 560) | def cmd_init(args: argparse.Namespace) -> int:
function cmd_setup (line 656) | def cmd_setup(args: argparse.Namespace) -> int:
function cmd_report (line 699) | def cmd_report(args: argparse.Namespace) -> int:
function cmd_trends (line 721) | def cmd_trends(args: argparse.Namespace) -> int:
function cmd_calendar (line 775) | def cmd_calendar(args: argparse.Namespace) -> int:
function main (line 796) | def main(argv: list[str] | None = None) -> int:
FILE: researchclaw/collaboration/dedup.py
function content_hash (line 12) | def content_hash(content: Any) -> str:
function deduplicate_artifacts (line 30) | def deduplicate_artifacts(
FILE: researchclaw/collaboration/publisher.py
class ArtifactPublisher (line 15) | class ArtifactPublisher:
method __init__ (line 22) | def __init__(self, repository: ResearchRepository) -> None:
method publish_from_run_dir (line 25) | def publish_from_run_dir(
method _extract_literature (line 67) | def _extract_literature(self, run_dir: Path) -> Any:
method _extract_experiments (line 76) | def _extract_experiments(self, run_dir: Path) -> Any:
method _extract_code (line 87) | def _extract_code(self, run_dir: Path) -> Any:
method _extract_review (line 95) | def _extract_review(self, run_dir: Path) -> Any:
FILE: researchclaw/collaboration/repository.py
class ResearchRepository (line 21) | class ResearchRepository:
method __init__ (line 28) | def __init__(self, repo_dir: str | Path = ".researchclaw/shared") -> N...
method repo_dir (line 32) | def repo_dir(self) -> Path:
method publish (line 36) | def publish(self, run_id: str, artifacts: dict[str, Any]) -> int:
method search (line 75) | def search(
method list_runs (line 122) | def list_runs(self) -> list[str]:
method get_run_artifacts (line 135) | def get_run_artifacts(self, run_id: str) -> dict[str, Any]:
method import_literature (line 158) | def import_literature(self, source_run_id: str) -> list[dict[str, Any]]:
method import_code_template (line 176) | def import_code_template(
FILE: researchclaw/collaboration/subscriber.py
class ArtifactSubscriber (line 13) | class ArtifactSubscriber:
method __init__ (line 20) | def __init__(self, repository: ResearchRepository) -> None:
method find_relevant_literature (line 23) | def find_relevant_literature(
method find_similar_experiments (line 41) | def find_similar_experiments(
method find_code_templates (line 59) | def find_code_templates(
method import_best_practices (line 77) | def import_best_practices(
FILE: researchclaw/config.py
function _safe_int (line 17) | def _safe_int(val: Any, default: int) -> int:
function _validate_network_policy (line 30) | def _validate_network_policy(val: object, default: str = "setup_only") -...
function _safe_float (line 42) | def _safe_float(val: Any, default: float) -> float:
function resolve_config_path (line 60) | def resolve_config_path(explicit: str | None) -> Path | None:
function _get_by_path (line 94) | def _get_by_path(data: dict[str, Any], dotted_key: str) -> Any:
function _is_blank (line 103) | def _is_blank(value: Any) -> bool:
class ValidationResult (line 108) | class ValidationResult:
class ProjectConfig (line 115) | class ProjectConfig:
class ResearchConfig (line 121) | class ResearchConfig:
class RuntimeConfig (line 130) | class RuntimeConfig:
class NotificationsConfig (line 138) | class NotificationsConfig:
class KnowledgeBaseConfig (line 147) | class KnowledgeBaseConfig:
class OpenClawBridgeConfig (line 154) | class OpenClawBridgeConfig:
class AcpConfig (line 164) | class AcpConfig:
class LlmConfig (line 175) | class LlmConfig:
class SecurityConfig (line 188) | class SecurityConfig:
class SandboxConfig (line 195) | class SandboxConfig:
class SshRemoteConfig (line 211) | class SshRemoteConfig:
class ColabDriveConfig (line 231) | class ColabDriveConfig:
class DockerSandboxConfig (line 241) | class DockerSandboxConfig:
class AgenticConfig (line 257) | class AgenticConfig:
class CodeAgentConfig (line 278) | class CodeAgentConfig:
class OpenCodeConfig (line 302) | class OpenCodeConfig:
class BenchmarkAgentConfig (line 318) | class BenchmarkAgentConfig:
class FigureAgentConfig (line 339) | class FigureAgentConfig:
class ExperimentRepairConfig (line 365) | class ExperimentRepairConfig:
class CliAgentConfig (line 385) | class CliAgentConfig:
class ExperimentConfig (line 405) | class ExperimentConfig:
class MetaClawPRMConfig (line 427) | class MetaClawPRMConfig:
class MetaClawLessonToSkillConfig (line 441) | class MetaClawLessonToSkillConfig:
class MetaClawBridgeConfig (line 450) | class MetaClawBridgeConfig:
class WebSearchConfig (line 465) | class WebSearchConfig:
class ExportConfig (line 480) | class ExportConfig:
class PromptsConfig (line 488) | class PromptsConfig:
class MemoryConfig (line 498) | class MemoryConfig:
class SkillsConfig (line 511) | class SkillsConfig:
class KnowledgeGraphConfig (line 524) | class KnowledgeGraphConfig:
class ServerConfig (line 537) | class ServerConfig:
class DashboardConfig (line 551) | class DashboardConfig:
class MultiProjectConfig (line 564) | class MultiProjectConfig:
class ServerEntryConfig (line 574) | class ServerEntryConfig:
class ServersConfig (line 589) | class ServersConfig:
class MCPIntegrationConfig (line 600) | class MCPIntegrationConfig:
class OverleafConfig (line 610) | class OverleafConfig:
class TrendsConfig (line 626) | class TrendsConfig:
class CoPilotConfig (line 639) | class CoPilotConfig:
class QualityAssessorConfig (line 651) | class QualityAssessorConfig:
class CalendarConfig (line 663) | class CalendarConfig:
class RCConfig (line 673) | class RCConfig:
method to_dict (line 707) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 711) | def from_dict(
method load (line 835) | def load(
function validate_config (line 858) | def validate_config(
function _parse_llm_config (line 923) | def _parse_llm_config(data: dict[str, Any]) -> LlmConfig:
function _parse_agentic_config (line 944) | def _parse_agentic_config(data: dict[str, Any]) -> AgenticConfig:
function _parse_experiment_config (line 963) | def _parse_experiment_config(data: dict[str, Any]) -> ExperimentConfig:
function _parse_benchmark_agent_config (line 1040) | def _parse_benchmark_agent_config(data: dict[str, Any]) -> BenchmarkAgen...
function _parse_figure_agent_config (line 1058) | def _parse_figure_agent_config(data: dict[str, Any]) -> FigureAgentConfig:
function _parse_experiment_repair_config (line 1081) | def _parse_experiment_repair_config(data: dict[str, Any]) -> ExperimentR...
function _parse_cli_agent_config (line 1094) | def _parse_cli_agent_config(data: dict[str, Any]) -> CliAgentConfig:
function _parse_code_agent_config (line 1108) | def _parse_code_agent_config(data: dict[str, Any]) -> CodeAgentConfig:
function _parse_opencode_config (line 1129) | def _parse_opencode_config(data: dict[str, Any]) -> OpenCodeConfig:
function _parse_metaclaw_bridge_config (line 1143) | def _parse_metaclaw_bridge_config(data: dict[str, Any]) -> MetaClawBridg...
function _parse_memory_config (line 1172) | def _parse_memory_config(data: dict[str, Any]) -> MemoryConfig:
function _parse_skills_config (line 1187) | def _parse_skills_config(data: dict[str, Any]) -> SkillsConfig:
function _parse_knowledge_graph_config (line 1201) | def _parse_knowledge_graph_config(data: dict[str, Any]) -> KnowledgeGrap...
function _parse_multi_project_config (line 1212) | def _parse_multi_project_config(data: dict[str, Any]) -> MultiProjectCon...
function _parse_servers_config (line 1223) | def _parse_servers_config(data: dict[str, Any]) -> ServersConfig:
function _parse_mcp_config (line 1250) | def _parse_mcp_config(data: dict[str, Any]) -> MCPIntegrationConfig:
function _parse_overleaf_config (line 1261) | def _parse_overleaf_config(data: dict[str, Any]) -> OverleafConfig:
function _parse_server_config (line 1274) | def _parse_server_config(data: dict[str, Any]) -> ServerConfig:
function _parse_dashboard_config (line 1296) | def _parse_dashboard_config(data: dict[str, Any]) -> DashboardConfig:
function _parse_trends_config (line 1309) | def _parse_trends_config(data: dict[str, Any]) -> TrendsConfig:
function _parse_copilot_config (line 1329) | def _parse_copilot_config(data: dict[str, Any]) -> CoPilotConfig:
function _parse_quality_assessor_config (line 1342) | def _parse_quality_assessor_config(data: dict[str, Any]) -> QualityAsses...
function _parse_calendar_config (line 1356) | def _parse_calendar_config(data: dict[str, Any]) -> CalendarConfig:
function load_config (line 1373) | def load_config(
FILE: researchclaw/copilot/branching.py
class BranchManager (line 15) | class BranchManager:
method __init__ (line 18) | def __init__(self, run_dir: Path, max_branches: int = 3):
method create_branch (line 23) | def create_branch(
method list_branches (line 62) | def list_branches(self) -> list[dict[str, Any]]:
method switch_branch (line 91) | def switch_branch(self, name: str) -> Path:
method delete_branch (line 98) | def delete_branch(self, name: str) -> None:
method compare_branches (line 106) | def compare_branches(
method _count_stages (line 138) | def _count_stages(branch_dir: Path) -> int:
method _read_experiment_summary (line 147) | def _read_experiment_summary(
FILE: researchclaw/copilot/controller.py
class CoPilotController (line 18) | class CoPilotController:
method __init__ (line 21) | def __init__(
method should_pause (line 34) | def should_pause(self, stage_num: int, is_gate: bool) -> bool:
method present_stage_result (line 45) | def present_stage_result(
method request_feedback (line 72) | def request_feedback(
method handle_feedback (line 99) | def handle_feedback(
method from_config (line 149) | def from_config(
FILE: researchclaw/copilot/feedback.py
class Feedback (line 27) | class Feedback:
class FeedbackHandler (line 39) | class FeedbackHandler:
method __init__ (line 42) | def __init__(self, run_dir: Path):
method write_feedback_request (line 45) | def write_feedback_request(
method read_feedback_response (line 69) | def read_feedback_response(self) -> Feedback | None:
method wait_for_feedback (line 97) | def wait_for_feedback(
method clear_request (line 123) | def clear_request(self) -> None:
FILE: researchclaw/copilot/modes.py
class ResearchMode (line 8) | class ResearchMode(Enum):
FILE: researchclaw/dashboard/broadcaster.py
class DashboardBroadcaster (line 16) | class DashboardBroadcaster:
method __init__ (line 19) | def __init__(
method tick (line 28) | async def tick(self) -> None:
function start_dashboard_loop (line 70) | async def start_dashboard_loop(
FILE: researchclaw/dashboard/collector.py
class RunSnapshot (line 16) | class RunSnapshot:
method to_dict (line 34) | def to_dict(self) -> dict[str, Any]:
class DashboardCollector (line 52) | class DashboardCollector:
method __init__ (line 55) | def __init__(
method collect_all (line 63) | def collect_all(self) -> list[RunSnapshot]:
method collect_run (line 78) | def collect_run(self, run_dir: str | Path) -> RunSnapshot:
method _collect_run (line 82) | def _collect_run(self, run_dir: Path) -> RunSnapshot:
FILE: researchclaw/dashboard/metrics.py
function aggregate_metrics (line 8) | def aggregate_metrics(runs: list[dict[str, Any]]) -> dict[str, Any]:
function extract_training_curve (line 28) | def extract_training_curve(metrics: dict[str, Any]) -> list[dict[str, fl...
FILE: researchclaw/data/__init__.py
function detect_frameworks (line 47) | def detect_frameworks(topic: str, hypothesis: str = "", plan: str = "") ...
function load_framework_docs (line 62) | def load_framework_docs(framework_ids: list[str], max_chars: int = 8000)...
function _load_all (line 102) | def _load_all() -> list[dict[str, Any]]:
function load_seminal_papers (line 116) | def load_seminal_papers(topic: str) -> list[dict[str, Any]]:
FILE: researchclaw/domains/adapters/biology.py
class BiologyPromptAdapter (line 14) | class BiologyPromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 17) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 46) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 66) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
method _default_hints (line 77) | def _default_hints(self) -> str:
FILE: researchclaw/domains/adapters/chemistry.py
class ChemistryPromptAdapter (line 10) | class ChemistryPromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 13) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 37) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 54) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
method _default_hints (line 64) | def _default_hints(self) -> str:
FILE: researchclaw/domains/adapters/economics.py
class EconomicsPromptAdapter (line 14) | class EconomicsPromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 17) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 42) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 65) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
method _default_hints (line 80) | def _default_hints(self) -> str:
method _output_format (line 94) | def _output_format(self) -> str:
FILE: researchclaw/domains/adapters/math.py
class MathPromptAdapter (line 10) | class MathPromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 13) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 34) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 46) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
method _hints (line 56) | def _hints(self, paradigm: str) -> str:
method _output_format (line 75) | def _output_format(self, paradigm: str) -> str:
FILE: researchclaw/domains/adapters/ml.py
class MLPromptAdapter (line 15) | class MLPromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 18) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 21) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 24) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
FILE: researchclaw/domains/adapters/neuroscience.py
class NeurosciencePromptAdapter (line 15) | class NeurosciencePromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 18) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 39) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 75) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
method _default_code_hints (line 98) | def _default_code_hints(self, paradigm: str) -> str:
method _default_compute_budget (line 120) | def _default_compute_budget(self) -> str:
method _default_dataset_guidance (line 129) | def _default_dataset_guidance(self) -> str:
method _default_hp_reporting (line 139) | def _default_hp_reporting(self) -> str:
method _output_format (line 147) | def _output_format(self, paradigm: str) -> str:
FILE: researchclaw/domains/adapters/physics.py
class PhysicsPromptAdapter (line 14) | class PhysicsPromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 17) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 32) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 55) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
method _default_code_hints (line 67) | def _default_code_hints(self, paradigm: str) -> str:
method _default_compute_budget (line 87) | def _default_compute_budget(self) -> str:
method _default_dataset_guidance (line 96) | def _default_dataset_guidance(self) -> str:
method _default_hp_reporting (line 105) | def _default_hp_reporting(self) -> str:
method _output_format (line 112) | def _output_format(self, paradigm: str) -> str:
FILE: researchclaw/domains/adapters/robotics.py
class RoboticsPromptAdapter (line 14) | class RoboticsPromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 17) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 37) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 73) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
method _default_code_hints (line 95) | def _default_code_hints(self) -> str:
method _default_compute_budget (line 108) | def _default_compute_budget(self) -> str:
method _default_dataset_guidance (line 117) | def _default_dataset_guidance(self) -> str:
method _default_hp_reporting (line 126) | def _default_hp_reporting(self) -> str:
method _output_format (line 134) | def _output_format(self) -> str:
FILE: researchclaw/domains/adapters/security.py
class SecurityPromptAdapter (line 10) | class SecurityPromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 13) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 36) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 45) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
FILE: researchclaw/domains/detector.py
class ExperimentParadigm (line 35) | class ExperimentParadigm(str, Enum):
class MetricType (line 45) | class MetricType(str, Enum):
class DomainProfile (line 61) | class DomainProfile:
function _load_profile (line 121) | def _load_profile(path: Path) -> DomainProfile:
function load_all_profiles (line 154) | def load_all_profiles() -> dict[str, DomainProfile]:
function get_profile (line 175) | def get_profile(domain_id: str) -> DomainProfile | None:
function get_generic_profile (line 181) | def get_generic_profile() -> DomainProfile:
function _keyword_detect (line 295) | def _keyword_detect(text: str) -> str | None:
function _llm_detect (line 345) | def _llm_detect(
function detect_domain (line 385) | def detect_domain(
function detect_domain_async (line 448) | async def detect_domain_async(
function detect_domain_id (line 485) | def detect_domain_id(topic: str, hypotheses: str = "", literature: str =...
function is_ml_domain (line 494) | def is_ml_domain(domain: DomainProfile) -> bool:
FILE: researchclaw/domains/experiment_schema.py
class ConditionRole (line 17) | class ConditionRole(str, Enum):
class ExperimentType (line 24) | class ExperimentType(str, Enum):
class Condition (line 33) | class Condition:
class MetricSpec (line 44) | class MetricSpec:
class EvaluationSpec (line 53) | class EvaluationSpec:
class UniversalExperimentPlan (line 63) | class UniversalExperimentPlan:
method references (line 92) | def references(self) -> list[Condition]:
method proposed (line 97) | def proposed(self) -> list[Condition]:
method variants (line 102) | def variants(self) -> list[Condition]:
method to_legacy_format (line 106) | def to_legacy_format(self) -> dict[str, Any]:
method to_yaml (line 141) | def to_yaml(self) -> str:
function from_legacy_exp_plan (line 179) | def from_legacy_exp_plan(
FILE: researchclaw/domains/prompt_adapter.py
class PromptBlocks (line 27) | class PromptBlocks:
class PromptAdapter (line 44) | class PromptAdapter(ABC):
method __init__ (line 52) | def __init__(self, domain: DomainProfile) -> None:
method get_code_generation_blocks (line 56) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 60) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 64) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
method get_blueprint_context (line 67) | def get_blueprint_context(self) -> str:
method get_condition_terminology (line 87) | def get_condition_terminology(self) -> dict[str, str]:
class MLPromptAdapter (line 97) | class MLPromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 102) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 106) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 109) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
class GenericPromptAdapter (line 118) | class GenericPromptAdapter(PromptAdapter):
method get_code_generation_blocks (line 125) | def get_code_generation_blocks(self, context: dict[str, Any]) -> Promp...
method get_experiment_design_blocks (line 142) | def get_experiment_design_blocks(self, context: dict[str, Any]) -> Pro...
method get_result_analysis_blocks (line 167) | def get_result_analysis_blocks(self, context: dict[str, Any]) -> Promp...
method _default_code_hints (line 184) | def _default_code_hints(self, paradigm: str, libs: str) -> str:
method _default_dataset_guidance (line 217) | def _default_dataset_guidance(self, paradigm: str) -> str:
method _default_hp_guidance (line 226) | def _default_hp_guidance(self) -> str:
method _output_format_guidance (line 232) | def _output_format_guidance(self) -> str:
function _build_adapter_registry (line 256) | def _build_adapter_registry() -> dict[str, type[PromptAdapter]]:
function register_adapter (line 308) | def register_adapter(domain_prefix: str, adapter_cls: type[PromptAdapter...
function get_adapter (line 313) | def get_adapter(domain: DomainProfile) -> PromptAdapter:
FILE: researchclaw/evolution.py
class LessonCategory (line 40) | class LessonCategory(str, Enum):
class LessonEntry (line 52) | class LessonEntry:
method to_dict (line 63) | def to_dict(self) -> dict[str, object]:
method from_dict (line 67) | def from_dict(cls, data: dict[str, object]) -> LessonEntry:
function _classify_error (line 106) | def _classify_error(stage_name: str, error_text: str) -> str:
function extract_lessons (line 136) | def extract_lessons(
function _extract_decision_rationale (line 210) | def _extract_decision_rationale(run_dir: Path) -> str:
function _parse_justification_from_excerpt (line 237) | def _parse_justification_from_excerpt(text: str) -> str:
function _extract_runtime_lessons (line 261) | def _extract_runtime_lessons(
function _time_weight (line 326) | def _time_weight(timestamp_iso: str) -> float:
class EvolutionStore (line 350) | class EvolutionStore:
method __init__ (line 353) | def __init__(self, store_dir: Path) -> None:
method lessons_path (line 359) | def lessons_path(self) -> Path:
method append (line 362) | def append(self, lesson: LessonEntry) -> None:
method append_many (line 367) | def append_many(self, lessons: list[LessonEntry]) -> None:
method load_all (line 376) | def load_all(self) -> list[LessonEntry]:
method query_for_stage (line 392) | def query_for_stage(
method build_overlay (line 416) | def build_overlay(
method count (line 476) | def count(self) -> int:
method export_to_memory (line 480) | def export_to_memory(self, memory_store: object) -> int:
method get_lessons_for_stage_with_memory (line 523) | def get_lessons_for_stage_with_memory(
FILE: researchclaw/experiment/agentic_sandbox.py
function _next_container_name (line 29) | def _next_container_name() -> str:
class AgenticResult (line 37) | class AgenticResult:
class AgenticSandbox (line 51) | class AgenticSandbox:
method __init__ (line 54) | def __init__(
method run_agent_session (line 68) | def run_agent_session(
method to_sandbox_result (line 165) | def to_sandbox_result(self, result: AgenticResult) -> SandboxResult:
method _start_container (line 178) | def _start_container(self, container: str, workspace: Path) -> None:
method _docker_exec (line 205) | def _docker_exec(
method _build_agent_command (line 222) | def _build_agent_command(self, prompt: str) -> str:
method _cleanup_container (line 245) | def _cleanup_container(self, container: str) -> None:
method _collect_outputs (line 267) | def _collect_outputs(workspace: Path) -> tuple[list[str], list[str]]:
method _parse_result_metrics (line 282) | def _parse_result_metrics(
method _count_agent_steps (line 311) | def _count_agent_steps(stdout: str) -> int:
method check_docker_available (line 336) | def check_docker_available() -> bool:
FILE: researchclaw/experiment/code_agent.py
class CodeAgentResult (line 40) | class CodeAgentResult:
method ok (line 50) | def ok(self) -> bool:
class CodeAgentProvider (line 58) | class CodeAgentProvider(Protocol):
method name (line 62) | def name(self) -> str: ...
method generate (line 64) | def generate(
method refine (line 79) | def refine(
method repair (line 94) | def repair(
function _to_text (line 110) | def _to_text(value: str | bytes | None) -> str:
function _collect_py_files (line 118) | def _collect_py_files(workdir: Path) -> dict[str, str]:
function _seed_workdir (line 128) | def _seed_workdir(workdir: Path, files: dict[str, str]) -> None:
function format_feedback_for_agent (line 135) | def format_feedback_for_agent(
class LlmCodeAgent (line 162) | class LlmCodeAgent:
method __init__ (line 170) | def __init__(
method name (line 181) | def name(self) -> str:
method generate (line 184) | def generate(
method refine (line 265) | def refine(
method repair (line 332) | def repair(
class _CliAgentBase (line 386) | class _CliAgentBase:
method __init__ (line 391) | def __init__(
method name (line 406) | def name(self) -> str:
method _run_subprocess (line 409) | def _run_subprocess(
method _build_result (line 457) | def _build_result(
method _generate_prompt (line 482) | def _generate_prompt(
method _refine_prompt (line 509) | def _refine_prompt(
method _repair_prompt (line 538) | def _repair_prompt(
class ClaudeCodeAgent (line 561) | class ClaudeCodeAgent(_CliAgentBase):
method _build_cmd (line 566) | def _build_cmd(self, prompt: str, workdir: Path) -> list[str]:
method generate (line 582) | def generate(
method refine (line 603) | def refine(
method repair (line 626) | def repair(
class CodexAgent (line 647) | class CodexAgent(_CliAgentBase):
method _build_cmd (line 652) | def _build_cmd(self, prompt: str, workdir: Path) -> list[str]:
method generate (line 665) | def generate(
method refine (line 686) | def refine(
method repair (line 709) | def repair(
function create_code_agent (line 730) | def create_code_agent(
FILE: researchclaw/experiment/colab_sandbox.py
class ColabDriveSandbox (line 120) | class ColabDriveSandbox:
method __init__ (line 126) | def __init__(self, config: ColabDriveConfig, workdir: Path) -> None:
method run (line 141) | def run(self, code: str, *, timeout_sec: int = 300) -> SandboxResult:
method run_project (line 155) | def run_project(
method check_drive_available (line 213) | def check_drive_available(config: ColabDriveConfig) -> tuple[bool, str]:
method write_worker_notebook (line 230) | def write_worker_notebook(output_path: Path) -> None:
method _inject_harness (line 236) | def _inject_harness(target_dir: Path) -> None:
method _write_setup_script (line 244) | def _write_setup_script(self, staging: Path) -> None:
method _submit_and_wait (line 257) | def _submit_and_wait(
method _collect_result (line 311) | def _collect_result(
FILE: researchclaw/experiment/docker_sandbox.py
function _next_container_name (line 39) | def _next_container_name() -> str:
class DockerSandbox (line 92) | class DockerSandbox:
method __init__ (line 110) | def __init__(self, config: DockerSandboxConfig, workdir: Path) -> None:
method run (line 120) | def run(self, code: str, *, timeout_sec: int = 300) -> SandboxResult:
method run_project (line 134) | def run_project(
method check_docker_available (line 198) | def check_docker_available() -> bool:
method check_nvidia_runtime (line 212) | def check_nvidia_runtime() -> bool:
method ensure_image (line 228) | def ensure_image(image: str) -> bool:
method _inject_harness (line 242) | def _inject_harness(target_dir: Path) -> None:
method _execute (line 255) | def _execute(
method _build_run_command (line 345) | def _build_run_command(
method _write_requirements_txt (line 452) | def _write_requirements_txt(self, staging_dir: Path) -> None:
method _detect_pip_packages (line 501) | def _detect_pip_packages(staging_dir: Path) -> list[str]:
method _kill_container (line 529) | def _kill_container(name: str) -> None:
method _remove_container (line 541) | def _remove_container(name: str) -> None:
FILE: researchclaw/experiment/evaluators/convergence.py
class ConvergenceResult (line 20) | class ConvergenceResult:
class ConvergenceReport (line 32) | class ConvergenceReport:
function compute_convergence_order (line 39) | def compute_convergence_order(
function analyze_convergence (line 97) | def analyze_convergence(
FILE: researchclaw/experiment/factory.py
function create_sandbox (line 18) | def create_sandbox(config: ExperimentConfig, workdir: Path) -> SandboxPr...
function create_agentic_sandbox (line 93) | def create_agentic_sandbox(
FILE: researchclaw/experiment/git_manager.py
class ExperimentGitManager (line 14) | class ExperimentGitManager:
method __init__ (line 21) | def __init__(self, repo_dir: Path) -> None:
method create_experiment_branch (line 26) | def create_experiment_branch(self, tag: str) -> str:
method commit_experiment (line 35) | def commit_experiment(
method discard_experiment (line 57) | def discard_experiment(self, run_id: str, reason: str) -> bool:
method get_experiment_history (line 65) | def get_experiment_history(self) -> list[dict[str, str]]:
method is_git_repo (line 78) | def is_git_repo(self) -> bool:
method get_current_branch (line 83) | def get_current_branch(self) -> str:
method return_to_original_branch (line 88) | def return_to_original_branch(self) -> bool:
method get_experiment_diff (line 99) | def get_experiment_diff(self) -> str:
method clean_untracked (line 106) | def clean_untracked(self) -> bool:
method _run_git (line 111) | def _run_git(self, args: list[str]) -> subprocess.CompletedProcess[str...
method _format_commit_message (line 126) | def _format_commit_message(
method _clean_output (line 133) | def _clean_output(output: str) -> str:
method _parse_experiment_log_line (line 137) | def _parse_experiment_log_line(line: str) -> dict[str, str] | None:
method _log_git_failure (line 146) | def _log_git_failure(
method _detect_current_branch (line 160) | def _detect_current_branch(self) -> str | None:
FILE: researchclaw/experiment/harness_template.py
class ExperimentHarness (line 20) | class ExperimentHarness:
method __init__ (line 23) | def __init__(self, time_budget: int = 120):
method elapsed (line 32) | def elapsed(self) -> float:
method progress (line 37) | def progress(self) -> float:
method should_stop (line 41) | def should_stop(self) -> bool:
method check_value (line 45) | def check_value(self, value: float, name: str = "metric") -> bool:
method report_metric (line 64) | def report_metric(self, name: str, value: float) -> None:
method log_result (line 83) | def log_result(self, result_dict: dict[str, object]) -> None:
method finalize (line 87) | def finalize(self) -> None:
method step (line 105) | def step(self) -> None:
function get_harness (line 114) | def get_harness(time_budget: int = 120) -> ExperimentHarness:
FILE: researchclaw/experiment/metrics.py
class MetricType (line 27) | class MetricType(str, Enum):
class ExperimentResults (line 38) | class ExperimentResults:
method to_flat_metrics (line 66) | def to_flat_metrics(self) -> dict[str, float]:
class UniversalMetricParser (line 103) | class UniversalMetricParser:
method parse (line 113) | def parse(self, run_dir: Path, stdout: str = "") -> ExperimentResults:
method _parse_json (line 155) | def _parse_json(self, path: Path) -> ExperimentResults:
method _parse_csv (line 209) | def _parse_csv(self, path: Path) -> ExperimentResults:
method _parse_stdout (line 253) | def _parse_stdout(self, stdout: str) -> ExperimentResults:
FILE: researchclaw/experiment/runner.py
class ExperimentResult (line 20) | class ExperimentResult:
class ExperimentHistory (line 35) | class ExperimentHistory:
method add (line 40) | def add(self, result: ExperimentResult) -> None:
method to_dict (line 45) | def to_dict(self) -> dict[str, object]:
method from_dict (line 53) | def from_dict(cls, data: dict[str, object]) -> ExperimentHistory:
class _ChatResponse (line 88) | class _ChatResponse(Protocol):
class _ChatClient (line 92) | class _ChatClient(Protocol):
method chat (line 93) | def chat(
class _GitManager (line 97) | class _GitManager(Protocol):
method is_git_repo (line 98) | def is_git_repo(self) -> bool: ...
method create_experiment_branch (line 99) | def create_experiment_branch(self, tag: str) -> str: ...
method commit_experiment (line 100) | def commit_experiment(self, run_id: str, metrics: dict[str, object], d...
method discard_experiment (line 101) | def discard_experiment(self, run_id: str, reason: str) -> bool: ...
method return_to_original_branch (line 102) | def return_to_original_branch(self) -> bool: ...
class ExperimentRunner (line 104) | class ExperimentRunner:
method __init__ (line 105) | def __init__(
method run_experiment (line 128) | def run_experiment(
method run_loop (line 181) | def run_loop(
method _improve_code (line 239) | def _improve_code(
method save_history (line 283) | def save_history(self, path: Path) -> None:
method _is_improvement (line 289) | def _is_improvement(self, new_value: float, best_value: float) -> bool:
method _to_float (line 295) | def _to_float(value: object) -> float | None:
method _extract_python_code (line 308) | def _extract_python_code(content: str) -> str:
function _result_from_dict (line 315) | def _result_from_dict(data: dict[str, object]) -> ExperimentResult | None:
FILE: researchclaw/experiment/sandbox.py
function validate_entry_point (line 21) | def validate_entry_point(entry_point: str) -> str | None:
function validate_entry_point_resolved (line 37) | def validate_entry_point_resolved(staging: Path, entry_point: str) -> st...
function _to_text (line 79) | def _to_text(value: str | bytes | None) -> str:
function parse_metrics (line 87) | def parse_metrics(stdout: str) -> dict[str, float]:
function extract_paired_comparisons (line 199) | def extract_paired_comparisons(stdout: str) -> list[dict[str, object]]:
function detect_nan_divergence (line 237) | def detect_nan_divergence(stdout: str, stderr: str) -> str | None:
class SandboxResult (line 280) | class SandboxResult:
class SandboxProtocol (line 289) | class SandboxProtocol(Protocol):
method run (line 292) | def run(self, code: str, *, timeout_sec: int = 300) -> SandboxResult: ...
method run_project (line 294) | def run_project(
class ExperimentSandbox (line 303) | class ExperimentSandbox:
method __init__ (line 304) | def __init__(self, config: SandboxConfig, workdir: Path) -> None:
method run (line 310) | def run(self, code: str, *, timeout_sec: int = 300) -> SandboxResult:
method run_project (line 347) | def run_project(
method _inject_harness (line 442) | def _inject_harness(target_dir: Path) -> None:
method _next_script_path (line 452) | def _next_script_path(self) -> Path:
method _write_script (line 457) | def _write_script(script_path: Path, code: str) -> None:
method _build_command (line 460) | def _build_command(self, script_path: Path) -> list[str]:
method _result_from_completed (line 472) | def _result_from_completed(
method _result_from_timeout (line 485) | def _result_from_timeout(
method _result_from_exception (line 505) | def _result_from_exception(exc: Exception, *, elapsed_sec: float) -> S...
method _should_cleanup (line 516) | def _should_cleanup(result: SandboxResult) -> bool:
method _cleanup_script (line 520) | def _cleanup_script(script_path: Path) -> None:
FILE: researchclaw/experiment/ssh_sandbox.py
class SshRemoteSandbox (line 30) | class SshRemoteSandbox:
method __init__ (line 45) | def __init__(self, config: SshRemoteConfig, workdir: Path) -> None:
method run (line 55) | def run(self, code: str, *, timeout_sec: int = 300) -> SandboxResult:
method run_project (line 68) | def run_project(
method check_ssh_available (line 129) | def check_ssh_available(config: SshRemoteConfig) -> tuple[bool, str]:
method _inject_harness (line 148) | def _inject_harness(target_dir: Path) -> None:
method _execute (line 160) | def _execute(
method _build_bare_exec_cmd (line 244) | def _build_bare_exec_cmd(
method _build_docker_exec_cmd (line 276) | def _build_docker_exec_cmd(
method _ssh_run (line 315) | def _ssh_run(
method _scp_upload (line 355) | def _scp_upload(self, local_dir: Path, remote_dir: str) -> bool:
class _SshResult (line 390) | class _SshResult:
method __init__ (line 393) | def __init__(
function _ssh_target (line 406) | def _ssh_target(cfg: SshRemoteConfig) -> str:
function _build_ssh_base (line 413) | def _build_ssh_base(
FILE: researchclaw/experiment/validator.py
class ValidationIssue (line 21) | class ValidationIssue:
class CodeValidation (line 32) | class CodeValidation:
method ok (line 38) | def ok(self) -> bool:
method errors (line 42) | def errors(self) -> list[ValidationIssue]:
method warnings (line 46) | def warnings(self) -> list[ValidationIssue]:
method summary (line 49) | def summary(self) -> str:
class _SecurityVisitor (line 209) | class _SecurityVisitor(ast.NodeVisitor):
method __init__ (line 212) | def __init__(self) -> None:
method visit_Call (line 217) | def visit_Call(self, node: ast.Call) -> None:
method visit_Import (line 243) | def visit_Import(self, node: ast.Import) -> None:
method visit_ImportFrom (line 257) | def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
function _resolve_call_name (line 272) | def _resolve_call_name(node: ast.expr) -> str:
function extract_imports (line 289) | def extract_imports(code: str) -> set[str]:
function validate_syntax (line 314) | def validate_syntax(code: str) -> CodeValidation:
function validate_security (line 332) | def validate_security(code: str) -> CodeValidation:
function validate_imports (line 346) | def validate_imports(
function validate_code (line 372) | def validate_code(
function format_issues_for_llm (line 412) | def format_issues_for_llm(validation: CodeValidation) -> str:
function check_code_complexity (line 430) | def check_code_complexity(code: str) -> list[str]:
function check_class_quality (line 497) | def check_class_quality(all_files: dict[str, str]) -> list[str]:
function check_variable_scoping (line 698) | def check_variable_scoping(code: str, fname: str = "main.py") -> list[str]:
function _collect_if_only_assignments (line 748) | def _collect_if_only_assignments(
function _extract_assign_targets (line 760) | def _extract_assign_targets(node: ast.AST) -> list[str]:
function auto_fix_unbound_locals (line 776) | def auto_fix_unbound_locals(code: str) -> tuple[str, int]:
function check_api_correctness (line 855) | def check_api_correctness(code: str, fname: str = "main.py") -> list[str]:
function check_undefined_calls (line 945) | def check_undefined_calls(code: str, fname: str = "main.py") -> list[str]:
function check_filename_collisions (line 1069) | def check_filename_collisions(files: dict[str, str]) -> list[str]:
function deep_validate_files (line 1099) | def deep_validate_files(
FILE: researchclaw/experiment/visualize.py
function _is_excluded_metric (line 68) | def _is_excluded_metric(name: str) -> bool:
function _shorten_label (line 76) | def _shorten_label(name: str, max_len: int = 22) -> str:
function _format_cond_name (line 83) | def _format_cond_name(name: str) -> str:
function _ensure_dir (line 88) | def _ensure_dir(path: Path) -> Path:
function _setup_academic_style (line 93) | def _setup_academic_style() -> None:
function plot_condition_comparison (line 121) | def plot_condition_comparison(
function plot_metric_heatmap (line 201) | def plot_metric_heatmap(
function plot_ablation_deltas (line 293) | def plot_ablation_deltas(
function plot_metric_trajectory (line 389) | def plot_metric_trajectory(
function plot_experiment_comparison (line 455) | def plot_experiment_comparison(
function plot_pipeline_timeline (line 517) | def plot_pipeline_timeline(
function plot_iteration_scores (line 564) | def plot_iteration_scores(
function generate_all_charts (line 611) | def generate_all_charts(
FILE: researchclaw/hardware.py
class HardwareProfile (line 30) | class HardwareProfile:
method to_dict (line 40) | def to_dict(self) -> dict[str, object]:
function detect_hardware (line 44) | def detect_hardware() -> HardwareProfile:
function _detect_nvidia (line 76) | def _detect_nvidia() -> HardwareProfile | None:
function _detect_mps (line 128) | def _detect_mps() -> HardwareProfile | None:
function ensure_torch_available (line 165) | def ensure_torch_available(python_path: str, gpu_type: str) -> bool:
function is_metric_name (line 218) | def is_metric_name(name: str) -> bool:
FILE: researchclaw/health.py
class CheckResult (line 27) | class CheckResult:
class DoctorReport (line 35) | class DoctorReport:
method actionable_fixes (line 41) | def actionable_fixes(self) -> list[str]:
method to_dict (line 44) | def to_dict(self) -> dict[str, object]:
function check_python_version (line 61) | def check_python_version() -> CheckResult:
function check_yaml_import (line 87) | def check_yaml_import() -> CheckResult:
function check_config_valid (line 100) | def check_config_valid(config_path: str | Path) -> CheckResult:
function _models_url (line 151) | def _models_url(base_url: str) -> str:
function _is_timeout (line 155) | def _is_timeout(exc: BaseException) -> bool:
function check_llm_connectivity (line 164) | def check_llm_connectivity(base_url: str) -> CheckResult:
function _fetch_models (line 250) | def _fetch_models(base_url: str, api_key: str = "") -> tuple[int, dict[s...
function _read_response_bytes (line 264) | def _read_response_bytes(response: object) -> bytes:
function _urlopen (line 277) | def _urlopen(req: str | urllib.request.Request, timeout: int) -> Context...
function _load_yaml_object (line 281) | def _load_yaml_object(content: str) -> object:
function _load_json_mapping (line 285) | def _load_json_mapping(content: str) -> Mapping[object, object]:
function check_api_key_valid (line 292) | def check_api_key_valid(base_url: str, api_key: str) -> CheckResult:
function check_model_available (line 346) | def check_model_available(base_url: str, api_key: str, model: str) -> Ch...
function check_model_chain (line 371) | def check_model_chain(
function _check_models_against_endpoint (line 423) | def _check_models_against_endpoint(
function check_sandbox_python (line 464) | def check_sandbox_python(python_path: str) -> CheckResult:
function check_matplotlib (line 488) | def check_matplotlib() -> CheckResult:
function check_experiment_mode (line 501) | def check_experiment_mode(mode: str) -> CheckResult:
function check_acp_agent (line 516) | def check_acp_agent(agent_command: str) -> CheckResult:
function check_docker_runtime (line 533) | def check_docker_runtime(config: RCConfig) -> CheckResult:
function run_doctor (line 561) | def run_doctor(config_path: str | Path) -> DoctorReport:
function print_doctor_report (line 624) | def print_doctor_report(report: DoctorReport) -> None:
function write_doctor_report (line 648) | def write_doctor_report(report: DoctorReport, path: Path) -> None:
FILE: researchclaw/knowledge/base.py
function _utcnow_iso (line 23) | def _utcnow_iso() -> str:
class KBEntry (line 33) | class KBEntry:
function _markdown_frontmatter (line 54) | def _markdown_frontmatter(entry: KBEntry) -> str:
function _obsidian_enhancements (line 74) | def _obsidian_enhancements(entry: KBEntry) -> str:
function write_kb_entry (line 86) | def write_kb_entry(
function write_stage_to_kb (line 146) | def write_stage_to_kb(
function generate_weekly_report (line 213) | def generate_weekly_report(
FILE: researchclaw/knowledge/graph/builder.py
class KnowledgeGraphBuilder (line 16) | class KnowledgeGraphBuilder:
method __init__ (line 23) | def __init__(self, max_entities: int = 10000) -> None:
method entity_count (line 29) | def entity_count(self) -> int:
method relation_count (line 34) | def relation_count(self) -> int:
method add_entity (line 38) | def add_entity(self, entity: Entity) -> bool:
method add_relation (line 66) | def add_relation(self, relation: Relation) -> bool:
method get_entity (line 94) | def get_entity(self, entity_id: str) -> Entity | None:
method get_entities_by_type (line 98) | def get_entities_by_type(self, entity_type: EntityType) -> list[Entity]:
method get_relations_for (line 104) | def get_relations_for(
method remove_entity (line 126) | def remove_entity(self, entity_id: str) -> bool:
method add_paper (line 138) | def add_paper(
method add_method (line 175) | def add_method(
method add_dataset (line 191) | def add_dataset(
method save (line 207) | def save(self, path: str | Path) -> None:
method load (line 230) | def load(self, path: str | Path) -> int:
FILE: researchclaw/knowledge/graph/entities.py
class EntityType (line 10) | class EntityType(str, Enum):
class Entity (line 22) | class Entity:
method to_dict (line 37) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 44) | def from_dict(cls, data: dict[str, Any]) -> Entity:
FILE: researchclaw/knowledge/graph/query.py
class KnowledgeGraphQuery (line 16) | class KnowledgeGraphQuery:
method __init__ (line 23) | def __init__(self, graph: KnowledgeGraphBuilder) -> None:
method find_research_gaps (line 26) | def find_research_gaps(self, domain: str = "") -> list[str]:
method find_trending_methods (line 60) | def find_trending_methods(self, min_citations: int = 2) -> list[str]:
method get_method_comparison (line 91) | def get_method_comparison(
method suggest_topics (line 144) | def suggest_topics(
method _find_method (line 197) | def _find_method(self, name_or_id: str) -> Any:
method _get_datasets_for_method (line 208) | def _get_datasets_for_method(self, method_id: str) -> dict[str, Any]:
FILE: researchclaw/knowledge/graph/relations.py
class RelationType (line 10) | class RelationType(str, Enum):
class Relation (line 24) | class Relation:
method to_dict (line 39) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 46) | def from_dict(cls, data: dict[str, Any]) -> Relation:
FILE: researchclaw/knowledge/graph/visualizer.py
function export_to_dot (line 26) | def export_to_dot(graph: KnowledgeGraphBuilder, path: str | Path) -> None:
function export_to_json_cytoscape (line 59) | def export_to_json_cytoscape(
function graph_summary (line 102) | def graph_summary(graph: KnowledgeGraphBuilder) -> str:
FILE: researchclaw/literature/arxiv_client.py
function _reset_circuit_breaker (line 54) | def _reset_circuit_breaker() -> None:
function _cb_should_allow (line 66) | def _cb_should_allow() -> bool:
function _cb_on_success (line 81) | def _cb_on_success() -> None:
function _cb_on_failure (line 91) | def _cb_on_failure() -> bool:
function _get_client (line 117) | def _get_client() -> arxiv.Client:
function search_arxiv (line 134) | def search_arxiv(
function get_paper_by_id (line 206) | def get_paper_by_id(arxiv_id: str) -> Paper | None:
function download_pdf (line 221) | def download_pdf(
function search_arxiv_advanced (line 261) | def search_arxiv_advanced(
function _convert_result (line 296) | def _convert_result(result: arxiv.Result) -> Paper:
FILE: researchclaw/literature/cache.py
function _cache_dir (line 32) | def _cache_dir(base: Path | None = None) -> Path:
function cache_key (line 38) | def cache_key(query: str, source: str, limit: int) -> str:
function get_cached (line 44) | def get_cached(
function _format_age (line 87) | def _format_age(seconds: float) -> str:
function put_cache (line 98) | def put_cache(
function clear_cache (line 122) | def clear_cache(*, cache_base: Path | None = None) -> int:
function cache_stats (line 132) | def cache_stats(*, cache_base: Path | None = None) -> dict[str, Any]:
FILE: researchclaw/literature/models.py
class Author (line 16) | class Author:
method last_name (line 22) | def last_name(self) -> str:
class Paper (line 33) | class Paper:
method cite_key (line 58) | def cite_key(self) -> str:
method to_bibtex (line 78) | def to_bibtex(self) -> str:
method to_dict (line 158) | def to_dict(self) -> dict[str, object]:
FILE: researchclaw/literature/novelty.py
function _extract_keywords (line 133) | def _extract_keywords(text: str) -> list[str]:
function _jaccard_keywords (line 150) | def _jaccard_keywords(keywords_a: list[str], keywords_b: list[str]) -> f...
function _title_similarity (line 159) | def _title_similarity(title_a: str, title_b: str) -> float:
function _compute_similarity (line 164) | def _compute_similarity(
function check_novelty (line 185) | def check_novelty(
function _build_novelty_queries (line 338) | def _build_novelty_queries(topic: str, hypotheses_text: str) -> list[str]:
function _assess_novelty (line 359) | def _assess_novelty(
FILE: researchclaw/literature/openalex_client.py
function search_openalex (line 49) | def search_openalex(
function _request_with_retry (line 126) | def _request_with_retry(
function _reconstruct_abstract (line 206) | def _reconstruct_abstract(inverted_index: dict[str, list[int]] | None) -...
function _parse_openalex_work (line 219) | def _parse_openalex_work(item: dict[str, Any]) -> Paper:
FILE: researchclaw/literature/search.py
function _cache_api (line 43) | def _cache_api() -> tuple[CacheGet, CachePut]:
function _papers_to_dicts (line 48) | def _papers_to_dicts(papers: list[Paper]) -> list[dict[str, object]]:
function _as_int (line 53) | def _as_int(value: object, default: int = 0) -> int:
function _dicts_to_papers (line 66) | def _dicts_to_papers(dicts: list[dict[str, object]]) -> list[Paper]:
function search_papers (line 104) | def search_papers(
function search_papers_multi_query (line 233) | def search_papers_multi_query(
function _normalise_title (line 272) | def _normalise_title(title: str) -> str:
function _deduplicate (line 279) | def _deduplicate(papers: list[Paper]) -> list[Paper]:
function papers_to_bibtex (line 361) | def papers_to_bibtex(papers: Sequence[Paper]) -> str:
FILE: researchclaw/literature/semantic_scholar.py
function _reset_circuit_breaker (line 63) | def _reset_circuit_breaker() -> None:
function _cb_should_allow (line 75) | def _cb_should_allow() -> bool:
function _cb_on_success (line 96) | def _cb_on_success() -> None:
function _cb_on_429 (line 107) | def _cb_on_429() -> bool:
function search_semantic_scholar (line 148) | def search_semantic_scholar(
function _request_with_retry (line 220) | def _request_with_retry(
function batch_fetch_papers (line 271) | def batch_fetch_papers(
function _post_with_retry (line 336) | def _post_with_retry(
function _parse_s2_paper (line 384) | def _parse_s2_paper(item: dict[str, Any]) -> Paper:
FILE: researchclaw/literature/trends.py
class LiteratureTrendAnalyzer (line 11) | class LiteratureTrendAnalyzer:
method __init__ (line 14) | def __init__(self, search_client: Any = None):
method get_daily_papers (line 17) | def get_daily_papers(
method analyze_keyword_trends (line 47) | def analyze_keyword_trends(
method find_emerging_topics (line 67) | def find_emerging_topics(
FILE: researchclaw/literature/verify.py
class VerifyStatus (line 43) | class VerifyStatus(str, Enum):
class CitationResult (line 53) | class CitationResult:
method to_dict (line 65) | def to_dict(self) -> dict[str, object]:
class VerificationReport (line 87) | class VerificationReport:
method integrity_score (line 98) | def integrity_score(self) -> float:
method to_dict (line 105) | def to_dict(self) -> dict[str, object]:
function parse_bibtex_entries (line 134) | def parse_bibtex_entries(bib_text: str) -> list[dict[str, str]]:
function title_similarity (line 158) | def title_similarity(a: str, b: str) -> float:
function verify_by_arxiv_id (line 183) | def verify_by_arxiv_id(arxiv_id: str, expected_title: str) -> CitationRe...
function _verify_doi_datacite (line 277) | def _verify_doi_datacite(doi: str, expected_title: str) -> CitationResul...
function verify_by_doi (line 358) | def verify_by_doi(doi: str, expected_title: str) -> CitationResult | None:
function verify_by_openalex (line 454) | def verify_by_openalex(title: str) -> CitationResult | None:
function _cache_key (line 540) | def _cache_key(title: str) -> str:
function _read_cache (line 544) | def _read_cache(title: str) -> CitationResult | None:
function _write_cache (line 563) | def _write_cache(title: str, result: CitationResult) -> None:
function verify_by_title_search (line 577) | def verify_by_title_search(
function verify_citations (line 663) | def verify_citations(
function filter_verified_bibtex (line 867) | def filter_verified_bibtex(
function annotate_paper_hallucinations (line 904) | def annotate_paper_hallucinations(
FILE: researchclaw/llm/__init__.py
function create_llm_client (line 41) | def create_llm_client(config: RCConfig) -> LLMClient | ACPClient:
FILE: researchclaw/llm/acp_client.py
class ACPConfig (line 36) | class ACPConfig:
function _find_acpx (line 46) | def _find_acpx() -> str | None:
class ACPClient (line 60) | class ACPClient:
method __init__ (line 71) | def __init__(self, acp_config: ACPConfig) -> None:
method from_rc_config (line 80) | def from_rc_config(cls, rc_config: Any) -> ACPClient:
method chat (line 95) | def chat(
method preflight (line 124) | def preflight(self) -> tuple[bool, str]:
method close (line 143) | def close(self) -> None:
method __del__ (line 161) | def __del__(self) -> None:
method _atexit_cleanup (line 169) | def _atexit_cleanup(cls) -> None:
method _resolve_acpx (line 184) | def _resolve_acpx(self) -> str | None:
method _abs_cwd (line 191) | def _abs_cwd(self) -> str:
method _ensure_session (line 194) | def _ensure_session(self) -> None:
method _send_prompt (line 246) | def _send_prompt(self, prompt: str) -> str:
method _force_reconnect (line 316) | def _force_reconnect(self) -> None:
method _send_prompt_cli (line 324) | def _send_prompt_cli(self, acpx: str, prompt: str) -> str:
method _send_prompt_via_file (line 345) | def _send_prompt_via_file(self, acpx: str, prompt: str) -> str:
method _extract_response (line 388) | def _extract_response(raw_output: str | None) -> str:
method _messages_to_prompt (line 425) | def _messages_to_prompt(
FILE: researchclaw/llm/anthropic_adapter.py
class AnthropicAdapter (line 31) | class AnthropicAdapter:
method __init__ (line 34) | def __init__(self, base_url: str, api_key: str, timeout_sec: int = 300):
method close (line 44) | def close(self) -> None:
method chat_completion (line 53) | def chat_completion(
FILE: researchclaw/llm/client.py
class LLMResponse (line 45) | class LLMResponse:
class LLMConfig (line 59) | class LLMConfig:
class LLMClient (line 81) | class LLMClient:
method __init__ (line 84) | def __init__(self, config: LLMConfig) -> None:
method from_rc_config (line 90) | def from_rc_config(cls, rc_config: Any) -> LLMClient:
method chat (line 146) | def chat(
method preflight (line 208) | def preflight(self) -> tuple[bool, str]:
method _call_with_retry (line 251) | def _call_with_retry(
method _raw_call (line 322) | def _raw_call(
function create_client_from_yaml (line 457) | def create_client_from_yaml(yaml_path: str | None = None) -> LLMClient:
FILE: researchclaw/mcp/client.py
class MCPClient (line 12) | class MCPClient:
method __init__ (line 19) | def __init__(self, server_uri: str, transport: str = "stdio") -> None:
method connect (line 27) | async def connect(self) -> None:
method disconnect (line 32) | async def disconnect(self) -> None:
method is_connected (line 38) | def is_connected(self) -> bool:
method list_tools (line 43) | async def list_tools(self) -> list[dict[str, Any]]:
method call_tool (line 55) | async def call_tool(self, name: str, arguments: dict[str, Any]) -> dic...
method list_resources (line 63) | async def list_resources(self) -> list[dict[str, Any]]:
method read_resource (line 70) | async def read_resource(self, uri: str) -> str:
method _send_request (line 82) | async def _send_request(self, method: str, params: dict[str, Any]) -> ...
FILE: researchclaw/mcp/registry.py
class MCPServerRegistry (line 13) | class MCPServerRegistry:
method __init__ (line 16) | def __init__(self) -> None:
method register (line 19) | async def register(self, name: str, uri: str, transport: str = "stdio"...
method unregister (line 27) | async def unregister(self, name: str) -> None:
method get (line 33) | def get(self, name: str) -> MCPClient | None:
method list_all (line 37) | def list_all(self) -> list[dict[str, Any]]:
method close_all (line 44) | async def close_all(self) -> None:
method count (line 50) | def count(self) -> int:
FILE: researchclaw/mcp/server.py
class ResearchClawMCPServer (line 15) | class ResearchClawMCPServer:
method __init__ (line 22) | def __init__(self, config: Any = None) -> None:
method get_tools (line 27) | def get_tools(self) -> list[dict[str, Any]]:
method handle_tool_call (line 31) | async def handle_tool_call(self, name: str, arguments: dict[str, Any])...
method _handle_run_pipeline (line 57) | async def _handle_run_pipeline(self, args: dict[str, Any]) -> dict[str...
method _handle_get_status (line 67) | async def _handle_get_status(self, args: dict[str, Any]) -> dict[str, ...
method _handle_get_results (line 80) | async def _handle_get_results(self, args: dict[str, Any]) -> dict[str,...
method _handle_search_literature (line 90) | async def _handle_search_literature(self, args: dict[str, Any]) -> dic...
method _handle_review_paper (line 99) | async def _handle_review_paper(self, args: dict[str, Any]) -> dict[str...
method _handle_get_paper (line 107) | async def _handle_get_paper(self, args: dict[str, Any]) -> dict[str, A...
method start (line 122) | async def start(self, transport: str = "stdio") -> None:
method stop (line 127) | async def stop(self) -> None:
method is_running (line 133) | def is_running(self) -> bool:
FILE: researchclaw/mcp/tools.py
function get_tool_schema (line 84) | def get_tool_schema(name: str) -> dict[str, Any] | None:
function list_tool_names (line 92) | def list_tool_names() -> list[str]:
FILE: researchclaw/mcp/transport.py
class MCPTransport (line 14) | class MCPTransport(Protocol):
method send (line 17) | async def send(self, message: dict[str, Any]) -> None: ...
method receive (line 18) | async def receive(self) -> dict[str, Any]: ...
method close (line 19) | async def close(self) -> None: ...
class StdioTransport (line 22) | class StdioTransport:
method __init__ (line 25) | def __init__(self) -> None:
method start (line 29) | async def start(self) -> None:
method send (line 40) | async def send(self, message: dict[str, Any]) -> None:
method receive (line 49) | async def receive(self) -> dict[str, Any]:
method close (line 67) | async def close(self) -> None:
class SSETransport (line 73) | class SSETransport:
method __init__ (line 79) | def __init__(self, host: str = "0.0.0.0", port: int = 3000) -> None:
method start (line 84) | async def start(self) -> None:
method send (line 89) | async def send(self, message: dict[str, Any]) -> None:
method receive (line 93) | async def receive(self) -> dict[str, Any]:
method close (line 97) | async def close(self) -> None:
FILE: researchclaw/memory/decay.py
function time_decay_weight (line 9) | def time_decay_weight(
function confidence_update (line 45) | def confidence_update(
FILE: researchclaw/memory/embeddings.py
function _tokenize (line 25) | def _tokenize(text: str) -> list[str]:
function _hash_token (line 30) | def _hash_token(token: str, dim: int = _TFIDF_DIM) -> int:
class EmbeddingProvider (line 36) | class EmbeddingProvider:
method __init__ (line 39) | def __init__(
method backend (line 55) | def backend(self) -> str:
method dimension (line 62) | def dimension(self) -> int:
method _detect_backend (line 68) | def _detect_backend(self) -> None:
method embed (line 94) | def embed(self, text: str) -> list[float]:
method embed_batch (line 113) | def embed_batch(self, texts: list[str]) -> list[list[float]]:
method _embed_api (line 131) | def _embed_api(self, text: str) -> list[float]:
method _embed_local (line 157) | def _embed_local(self, text: str) -> list[float]:
method _embed_tfidf (line 164) | def _embed_tfidf(self, text: str) -> list[float]:
FILE: researchclaw/memory/experiment_memory.py
class ExperimentMemory (line 17) | class ExperimentMemory:
method __init__ (line 24) | def __init__(
method record_hyperparams (line 34) | def record_hyperparams(
method record_architecture (line 76) | def record_architecture(
method record_training_trick (line 113) | def record_training_trick(
method recall_best_configs (line 150) | def recall_best_configs(
FILE: researchclaw/memory/ideation_memory.py
class IdeationMemory (line 16) | class IdeationMemory:
method __init__ (line 23) | def __init__(
method record_topic_outcome (line 33) | def record_topic_outcome(
method record_hypothesis (line 73) | def record_hypothesis(
method recall_similar_topics (line 109) | def recall_similar_topics(
method get_anti_patterns (line 142) | def get_anti_patterns(self) -> list[str]:
FILE: researchclaw/memory/retriever.py
function cosine_similarity (line 20) | def cosine_similarity(a: list[float], b: list[float]) -> float:
class MemoryRetriever (line 43) | class MemoryRetriever:
method __init__ (line 53) | def __init__(
method recall (line 69) | def recall(
method recall_by_text (line 134) | def recall_by_text(
method format_for_prompt (line 159) | def format_for_prompt(
FILE: researchclaw/memory/store.py
class MemoryEntry (line 22) | class MemoryEntry:
method to_dict (line 35) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 40) | def from_dict(cls, data: dict[str, Any]) -> MemoryEntry:
class MemoryStore (line 58) | class MemoryStore:
method __init__ (line 65) | def __init__(
method store_dir (line 80) | def store_dir(self) -> Path:
method add (line 84) | def add(
method get (line 138) | def get(self, entry_id: str) -> MemoryEntry | None:
method get_all (line 146) | def get_all(self, category: str | None = None) -> list[MemoryEntry]:
method update_confidence (line 155) | def update_confidence(self, entry_id: str, delta: float) -> bool:
method mark_accessed (line 185) | def mark_accessed(self, entry_id: str) -> bool:
method prune (line 206) | def prune(
method save (line 248) | def save(self) -> None:
method load (line 262) | def load(self) -> int:
method count (line 290) | def count(self, category: str | None = None) -> int:
FILE: researchclaw/memory/writing_memory.py
class WritingMemory (line 16) | class WritingMemory:
method __init__ (line 23) | def __init__(
method record_review_feedback (line 33) | def record_review_feedback(
method record_successful_structure (line 68) | def record_successful_structure(
method recall_writing_tips (line 104) | def recall_writing_tips(
FILE: researchclaw/metaclaw_bridge/config.py
class PRMConfig (line 9) | class PRMConfig:
class LessonToSkillConfig (line 23) | class LessonToSkillConfig:
class MetaClawBridgeConfig (line 32) | class MetaClawBridgeConfig:
FILE: researchclaw/metaclaw_bridge/lesson_to_skill.py
function _format_lessons (line 61) | def _format_lessons(lessons: list[LessonEntry]) -> str:
function _list_existing_skill_names (line 72) | def _list_existing_skill_names(skills_dir: Path) -> list[str]:
function _parse_skills_response (line 82) | def _parse_skills_response(text: str) -> list[dict[str, str]]:
function _write_skill (line 109) | def _write_skill(skills_dir: Path, skill: dict[str, str]) -> Path | None:
function _severity_at_least (line 132) | def _severity_at_least(severity: str, min_severity: str) -> bool:
function convert_lessons_to_skills (line 137) | def convert_lessons_to_skills(
FILE: researchclaw/metaclaw_bridge/prm_gate.py
function _single_judge_call (line 61) | def _single_judge_call(
class ResearchPRMGate (line 111) | class ResearchPRMGate:
method __init__ (line 114) | def __init__(
method from_bridge_config (line 129) | def from_bridge_config(cls, prm_config: object) -> ResearchPRMGate | N...
method evaluate_stage (line 154) | def evaluate_stage(
method should_gate (line 206) | def should_gate(self, stage_num: int) -> bool:
FILE: researchclaw/metaclaw_bridge/session.py
class MetaClawSession (line 15) | class MetaClawSession:
method __init__ (line 18) | def __init__(self, run_id: str) -> None:
method get_headers (line 23) | def get_headers(self, stage_name: str = "") -> dict[str, str]:
method end (line 40) | def end(self) -> dict[str, str]:
method is_active (line 55) | def is_active(self) -> bool:
FILE: researchclaw/metaclaw_bridge/skill_feedback.py
class SkillEffectivenessRecord (line 19) | class SkillEffectivenessRecord:
class SkillFeedbackStore (line 29) | class SkillFeedbackStore:
method __init__ (line 32) | def __init__(self, store_path: Path) -> None:
method append (line 36) | def append(self, record: SkillEffectivenessRecord) -> None:
method append_many (line 40) | def append_many(self, records: list[SkillEffectivenessRecord]) -> None:
method load_all (line 48) | def load_all(self) -> list[SkillEffectivenessRecord]:
method compute_skill_stats (line 71) | def compute_skill_stats(self) -> dict[str, dict[str, int | float]]:
function record_stage_skills (line 98) | def record_stage_skills(
FILE: researchclaw/metaclaw_bridge/stage_skill_map.py
function get_stage_config (line 143) | def get_stage_config(stage_name: str) -> dict[str, Any]:
FILE: researchclaw/overleaf/conflict.py
class ConflictResolver (line 20) | class ConflictResolver:
method has_conflicts (line 23) | def has_conflicts(self, repo_dir: Path) -> bool:
method detect (line 31) | def detect(self, repo_dir: Path) -> list[dict[str, str]]:
method resolve (line 42) | def resolve(self, repo_dir: Path, strategy: str = "ours") -> list[str]:
function _extract_conflicts (line 60) | def _extract_conflicts(content: str) -> list[dict[str, str]]:
function _resolve_content (line 87) | def _resolve_content(content: str, strategy: str) -> str:
FILE: researchclaw/overleaf/formatter.py
class LatexFormatter (line 9) | class LatexFormatter:
method normalize_paths (line 13) | def normalize_paths(content: str, figures_prefix: str = "figures/") ->...
method ensure_document_class (line 24) | def ensure_document_class(content: str) -> str:
method strip_local_comments (line 31) | def strip_local_comments(content: str) -> str:
method fix_encoding (line 40) | def fix_encoding(content: str) -> str:
method format_for_overleaf (line 52) | def format_for_overleaf(self, tex_path: Path) -> str:
FILE: researchclaw/overleaf/sync.py
class OverleafSync (line 17) | class OverleafSync:
method __init__ (line 20) | def __init__(
method setup (line 35) | def setup(self, run_dir: Path) -> Path:
method push_paper (line 47) | def push_paper(
method pull_changes (line 91) | def pull_changes(self) -> list[str]:
method get_status (line 110) | def get_status(self) -> dict[str, Any]:
method resolve_conflicts (line 125) | def resolve_conflicts(self, strategy: str = "ours") -> list[str]:
method _git (line 133) | def _git(self, *args: str) -> str:
method _git_clone (line 146) | def _git_clone(self) -> None:
FILE: researchclaw/overleaf/watcher.py
class FileWatcher (line 13) | class FileWatcher:
method __init__ (line 16) | def __init__(self, watch_dir: Path, extensions: tuple[str, ...] = (".t...
method _take_snapshot (line 22) | def _take_snapshot(self) -> None:
method check_changes (line 31) | def check_changes(self) -> list[str]:
method poll_loop (line 55) | def poll_loop(self, interval_sec: int = 300, callback: Any = None) -> ...
FILE: researchclaw/pipeline/_domain.py
function _detect_domain (line 75) | def _detect_domain(topic: str, domains: tuple[str, ...] = ()) -> tuple[s...
function _is_ml_domain (line 113) | def _is_ml_domain(domain_id: str) -> bool:
FILE: researchclaw/pipeline/_helpers.py
class StageResult (line 34) | class StageResult:
function _utcnow_iso (line 151) | def _utcnow_iso() -> str:
function _build_fallback_queries (line 160) | def _build_fallback_queries(topic: str) -> list[str]:
function _write_stage_meta (line 235) | def _write_stage_meta(
function _ensure_sandbox_deps (line 260) | def _ensure_sandbox_deps(code: str, python_path: str) -> list[str]:
function _read_best_analysis (line 307) | def _read_best_analysis(run_dir: Path) -> str:
function _read_prior_artifact (line 320) | def _read_prior_artifact(run_dir: Path, filename: str) -> str | None:
function _find_prior_file (line 343) | def _find_prior_file(run_dir: Path, filename: str) -> Path | None:
function _load_hardware_profile (line 362) | def _load_hardware_profile(run_dir: Path) -> dict[str, Any] | None:
function _extract_yaml_block (line 379) | def _extract_yaml_block(text: str) -> str:
function _safe_json_loads (line 430) | def _safe_json_loads(text: str, default: Any) -> Any:
function _extract_code_block (line 501) | def _extract_code_block(content: str) -> str:
function _extract_multi_file_blocks (line 508) | def _extract_multi_file_blocks(content: str) -> dict[str, str]:
function _parse_jsonl_rows (line 590) | def _parse_jsonl_rows(text: str) -> list[dict[str, Any]]:
function _write_jsonl (line 602) | def _write_jsonl(path: Path, rows: list[dict[str, Any]]) -> None:
function _parse_metrics_from_stdout (line 608) | def _parse_metrics_from_stdout(stdout: str) -> dict[str, Any]:
function _chat_with_prompt (line 663) | def _chat_with_prompt(
function _get_evolution_overlay (line 715) | def _get_evolution_overlay(run_dir: Path | None, stage_name: str) -> str:
function _collect_json_context (line 741) | def _collect_json_context(
function _collect_experiment_results (line 775) | def _collect_experiment_results(
function _build_context_preamble (line 918) | def _build_context_preamble(
function _extract_topic_keywords (line 990) | def _extract_topic_keywords(
function _topic_constraint_block (line 1016) | def _topic_constraint_block(topic: str) -> str:
function _detect_runtime_issues (line 1046) | def _detect_runtime_issues(sandbox_result: Any) -> str:
function _generate_neurips_checklist (line 1172) | def _generate_neurips_checklist(
function _extract_paper_title (line 1220) | def _extract_paper_title(md_text: str) -> str:
function _generate_framework_diagram_prompt (line 1308) | def _generate_framework_diagram_prompt(
function _safe_filename (line 1439) | def _safe_filename(name: str) -> str:
function _default_hypotheses (line 1450) | def _default_hypotheses(topic: str) -> str:
function _default_paper_outline (line 1467) | def _default_paper_outline(topic: str) -> str:
function _default_quality_report (line 1513) | def _default_quality_report(threshold: float) -> dict[str, Any]:
function _multi_perspective_generate (line 1548) | def _multi_perspective_generate(
function _synthesize_perspectives (line 1583) | def _synthesize_perspectives(
function reconcile_figure_refs (line 1602) | def reconcile_figure_refs(
FILE: researchclaw/pipeline/code_agent.py
class CodeAgentConfig (line 42) | class CodeAgentConfig:
class SolutionNode (line 85) | class SolutionNode:
class CodeAgentResult (line 104) | class CodeAgentResult:
class _SandboxResult (line 122) | class _SandboxResult(Protocol): # pragma: no cover
class _SandboxLike (line 131) | class _SandboxLike(Protocol): # pragma: no cover
method run_project (line 132) | def run_project(
class CodeAgent (line 146) | class CodeAgent:
method __init__ (line 166) | def __init__(
method generate (line 192) | def generate(
method _phase1_blueprint (line 283) | def _phase1_blueprint(
method _build_domain_context (line 328) | def _build_domain_context(self) -> str:
method _parse_blueprint (line 366) | def _parse_blueprint(self, yaml_text: str) -> dict[str, Any] | None:
method _is_valid_blueprint (line 458) | def _is_valid_blueprint(blueprint: dict[str, Any]) -> bool:
method _phase2_sequential_generate (line 472) | def _phase2_sequential_generate(
method _extract_single_file_code (line 579) | def _extract_single_file_code(content: str, expected_name: str) -> str:
method _build_code_summary (line 609) | def _build_code_summary(
method _hard_validate_and_repair (line 657) | def _hard_validate_and_repair(
method _hard_validate (line 707) | def _hard_validate(
method _repair_critical_issues (line 872) | def _repair_critical_issues(
method _phase2_generate_and_fix (line 940) | def _phase2_generate_and_fix(
method _exec_fix_loop (line 962) | def _exec_fix_loop(self, files: dict[str, str]) -> dict[str, str]:
method _generate_code (line 981) | def _generate_code(
method _fix_runtime_error (line 1029) | def _fix_runtime_error(
method _parse_error_location (line 1075) | def _parse_error_location(
method _targeted_file_repair (line 1103) | def _targeted_file_repair(
method _phase3_tree_search (line 1196) | def _phase3_tree_search(
method _evaluate_node (line 1280) | def _evaluate_node(self, node: SolutionNode, metric_key: str) -> None:
method _score_node (line 1299) | def _score_node(node: SolutionNode, metric_key: str) -> float:
method _phase4_review (line 1316) | def _phase4_review(
method _chat (line 1381) | def _chat(self, system: str, user: str, max_tokens: int = 8192) -> Any:
method _get_or_create_sandbox (line 1391) | def _get_or_create_sandbox(self) -> _SandboxLike:
method _run_in_sandbox (line 1401) | def _run_in_sandbox(
method _extract_files (line 1439) | def _extract_files(self, content: str) -> dict[str, str]:
method _format_files (line 1447) | def _format_files(files: dict[str, str]) -> str:
method _parse_json (line 1455) | def _parse_json(text: str) -> dict[str, Any] | None:
method _log_event (line 1488) | def _log_event(self, msg: str) -> None:
class _SimpleResult (line 1500) | class _SimpleResult:
FILE: researchclaw/pipeline/contracts.py
class StageContract (line 19) | class StageContract:
FILE: researchclaw/pipeline/executor.py
function execute_stage (line 211) | def execute_stage(
FILE: researchclaw/pipeline/experiment_diagnosis.py
class DeficiencyType (line 23) | class DeficiencyType(enum.Enum):
class Deficiency (line 43) | class Deficiency:
class ExperimentDiagnosis (line 55) | class ExperimentDiagnosis:
method has_critical (line 67) | def has_critical(self) -> bool:
method to_repair_prompt (line 70) | def to_repair_prompt(self) -> str:
method to_dict (line 93) | def to_dict(self) -> dict:
class PaperMode (line 122) | class PaperMode(enum.Enum):
class ExperimentQualityAssessment (line 132) | class ExperimentQualityAssessment:
function assess_experiment_quality (line 142) | def assess_experiment_quality(
function _select_paper_mode (line 190) | def _select_paper_mode(
function diagnose_experiment (line 244) | def diagnose_experiment(
function _check_missing_deps (line 344) | def _check_missing_deps(diag: ExperimentDiagnosis, output: str) -> None:
function _check_permission_errors (line 368) | def _check_permission_errors(diag: ExperimentDiagnosis, output: str) -> ...
function _check_gpu_oom (line 391) | def _check_gpu_oom(diag: ExperimentDiagnosis, output: str) -> None:
function _check_time_guard (line 406) | def _check_time_guard(
function _check_synthetic_data (line 435) | def _check_synthetic_data(diag: ExperimentDiagnosis, output: str) -> None:
function _check_dataset_issues (line 459) | def _check_dataset_issues(diag: ExperimentDiagnosis, output: str) -> None:
function _check_code_crashes (line 484) | def _check_code_crashes(diag: ExperimentDiagnosis, stderr: str, output: ...
function _check_hyperparams (line 516) | def _check_hyperparams(diag: ExperimentDiagnosis, output: str, summary: ...
function _check_near_random_accuracy (line 546) | def _check_near_random_accuracy(diag: ExperimentDiagnosis, summary: dict...
function _check_identical_conditions (line 589) | def _check_identical_conditions(diag: ExperimentDiagnosis, summary: dict...
function _check_insufficient_seeds (line 613) | def _check_insufficient_seeds(diag: ExperimentDiagnosis, summary: dict) ...
function _get_planned_conditions (line 640) | def _get_planned_conditions(plan: dict | None, summary: dict) -> set[str]:
function _get_completed_conditions (line 650) | def _get_completed_conditions(summary: dict) -> set[str]:
function _extract_stdout (line 663) | def _extract_stdout(summary: dict, ref_log: dict | None) -> str:
function _extract_stderr (line 682) | def _extract_stderr(summary: dict, ref_log: dict | None) -> str:
function _extract_context (line 699) | def _extract_context(text: str, pattern: str, context_chars: int = 200) ...
function _assess_repairability (line 709) | def _assess_repairability(diag: ExperimentDiagnosis, prior: list[dict] |...
FILE: researchclaw/pipeline/experiment_repair.py
class RepairCycleResult (line 49) | class RepairCycleResult:
class ExperimentRepairResult (line 61) | class ExperimentRepairResult:
method to_dict (line 71) | def to_dict(self) -> dict:
function build_repair_prompt (line 94) | def build_repair_prompt(
function select_best_results (line 196) | def select_best_results(
function _try_load_summary (line 235) | def _try_load_summary(path: Path) -> dict | None:
function _summary_quality_score (line 245) | def _summary_quality_score(summary: dict) -> float:
function run_repair_loop (line 274) | def run_repair_loop(
function _load_experiment_summary (line 503) | def _load_experiment_summary(run_dir: Path) -> dict | None:
function _load_refinement_log (line 513) | def _load_refinement_log(run_dir: Path) -> dict | None:
function _load_experiment_code (line 523) | def _load_experiment_code(run_dir: Path) -> dict[str, str]:
function _load_experiment_plan (line 580) | def _load_experiment_plan(run_dir: Path) -> dict | None:
function _collect_experiment_output (line 590) | def _collect_experiment_output(run_dir: Path) -> tuple[str, str]:
function _get_repaired_code (line 616) | def _get_repaired_code(
function _repair_via_opencode (line 641) | def _repair_via_opencode(
function _repair_via_llm (line 689) | def _repair_via_llm(
function _extract_code_blocks (line 737) | def _extract_code_blocks(text: str) -> dict[str, str]:
function _run_experiment_in_sandbox (line 772) | def _run_experiment_in_sandbox(
function _build_experiment_summary_from_run (line 809) | def _build_experiment_summary_from_run(
FILE: researchclaw/pipeline/opencode_bridge.py
class ComplexityScore (line 111) | class ComplexityScore:
function _count_keyword_hits (line 120) | def _count_keyword_hits(text: str, keywords: tuple[str, ...]) -> int:
function score_complexity (line 125) | def score_complexity(
class OpenCodeResult (line 223) | class OpenCodeResult:
class OpenCodeBridge (line 260) | class OpenCodeBridge:
method __init__ (line 263) | def __init__(
method check_available (line 285) | def check_available() -> bool:
method _prepare_workspace (line 308) | def _prepare_workspace(
method _is_azure (line 374) | def _is_azure(self) -> bool:
method _build_opencode_config (line 381) | def _build_opencode_config(self) -> dict[str, Any]:
method _resolve_opencode_model (line 431) | def _resolve_opencode_model(self) -> str:
method _invoke_opencode (line 450) | def _invoke_opencode(
method _collect_files (line 499) | def _collect_files(workspace: Path) -> dict[str, str]:
method _has_main_guard (line 536) | def _has_main_guard(source: str) -> bool:
method _ensure_main_entry_point (line 553) | def _ensure_main_entry_point(files: dict[str, str]) -> dict[str, str]:
method generate (line 633) | def generate(
function count_historical_failures (line 749) | def count_historical_failures(run_dir: Path, stage_name: str = "stage-10...
FILE: researchclaw/pipeline/paper_verifier.py
class UnverifiedNumber (line 101) | class UnverifiedNumber:
class FabricatedCondition (line 112) | class FabricatedCondition:
class VerificationResult (line 121) | class VerificationResult:
method fabrication_rate (line 136) | def fabrication_rate(self) -> float:
function verify_paper (line 143) | def verify_paper(
function verify_paper_file (line 277) | def verify_paper_file(
function _parse_sections (line 292) | def _parse_sections(tex_text: str) -> list[tuple[int, str]]:
function _section_at_line (line 306) | def _section_at_line(sections: list[tuple[int, str]], line_idx: int) -> ...
function _is_strict_section (line 323) | def _is_strict_section(section_lower: str, strict_set: set[str]) -> bool:
function _find_table_ranges (line 342) | def _find_table_ranges(tex_text: str) -> list[tuple[int, int, bool]]:
function _build_skip_mask (line 371) | def _build_skip_mask(tex_text: str) -> list[bool]:
function _line_offset (line 382) | def _line_offset(lines: list[str], line_idx: int) -> int:
function _check_condition_names (line 390) | def _check_condition_names(
function _check_training_config (line 484) | def _check_training_config(
function _build_summary (line 540) | def _build_summary(result: VerificationResult) -> str:
FILE: researchclaw/pipeline/runner.py
function _utcnow_iso (line 27) | def _utcnow_iso() -> str:
function _should_start (line 33) | def _should_start(stage: Stage, from_stage: Stage, started: bool) -> bool:
function _build_pipeline_summary (line 39) | def _build_pipeline_summary(
function _write_pipeline_summary (line 66) | def _write_pipeline_summary(run_dir: Path, summary: dict[str, object]) -...
function _write_checkpoint (line 73) | def _write_checkpoint(run_dir: Path, stage: Stage, run_id: str) -> None:
function _write_heartbeat (line 93) | def _write_heartbeat(run_dir: Path, stage: Stage, run_id: str) -> None:
function read_checkpoint (line 109) | def read_checkpoint(run_dir: Path) -> Stage | None:
function resume_from_checkpoint (line 129) | def resume_from_checkpoint(
function _collect_content_metrics (line 137) | def _collect_content_metrics(run_dir: Path | None) -> dict[str, object]:
function _run_experiment_diagnosis (line 192) | def _run_experiment_diagnosis(run_dir: Path, config: RCConfig, run_id: s...
function _run_experiment_repair (line 322) | def _run_experiment_repair(run_dir: Path, config: RCConfig, run_id: str)...
function execute_pipeline (line 407) | def execute_pipeline(
function _package_deliverables (line 646) | def _package_deliverables(
function _version_rollback_stages (line 939) | def _version_rollback_stages(
function _consecutive_empty_metrics (line 967) | def _consecutive_empty_metrics(run_dir: Path, pivot_count: int) -> bool:
function _promote_best_stage14 (line 1001) | def _promote_best_stage14(run_dir: Path, config: RCConfig) -> None:
function _check_experiment_quality (line 1117) | def _check_experiment_quality(
function _read_pivot_count (line 1199) | def _read_pivot_count(run_dir: Path) -> int:
function _record_decision_history (line 1213) | def _record_decision_history(
function _read_quality_score (line 1241) | def _read_quality_score(run_dir: Path) -> float | None:
function _write_iteration_context (line 1258) | def _write_iteration_context(
function execute_iterative_pipeline (line 1273) | def execute_iterative_pipeline(
function _metaclaw_post_pipeline (line 1399) | def _metaclaw_post_pipeline(
FILE: researchclaw/pipeline/stage_impls/_analysis.py
function _execute_result_analysis (line 34) | def _execute_result_analysis(
function _parse_decision (line 736) | def _parse_decision(text: str) -> str:
function _execute_research_decision (line 789) | def _execute_research_decision(
FILE: researchclaw/pipeline/stage_impls/_code_generation.py
function _check_rl_compatibility (line 46) | def _check_rl_compatibility(code: str) -> list[str]:
function _execute_code_generation (line 67) | def _execute_code_generation(
FILE: researchclaw/pipeline/stage_impls/_execution.py
function _execute_resource_planning (line 44) | def _execute_resource_planning(
function _execute_experiment_run (line 104) | def _execute_experiment_run(
function _execute_iterative_refine (line 333) | def _execute_iterative_refine(
FILE: researchclaw/pipeline/stage_impls/_experiment_design.py
function _execute_experiment_design (line 34) | def _execute_experiment_design(
FILE: researchclaw/pipeline/stage_impls/_literature.py
function _expand_search_queries (line 41) | def _expand_search_queries(queries: list[str], topic: str) -> list[str]:
function _execute_search_strategy (line 86) | def _execute_search_strategy(
function _execute_literature_collect (line 318) | def _execute_literature_collect(
function _execute_literature_screen (line 593) | def _execute_literature_screen(
function _execute_knowledge_extract (line 708) | def _execute_knowledge_extract(
FILE: researchclaw/pipeline/stage_impls/_paper_writing.py
function _execute_paper_outline (line 40) | def _execute_paper_outline(
function _collect_raw_experiment_metrics (line 129) | def _collect_raw_experiment_metrics(run_dir: Path) -> tuple[str, bool]:
function _write_paper_sections (line 312) | def _write_paper_sections(
function _validate_draft_quality (line 566) | def _validate_draft_quality(
function _review_compiled_pdf (line 928) | def _review_compiled_pdf(
function _check_ablation_effectiveness (line 1032) | def _check_ablation_effectiveness(
function _detect_result_contradictions (line 1132) | def _detect_result_contradictions(
function _execute_paper_draft (line 1207) | def _execute_paper_draft(
FILE: researchclaw/pipeline/stage_impls/_review_publish.py
function _get_collect_raw_experiment_metrics (line 50) | def _get_collect_raw_experiment_metrics():
function _get_review_compiled_pdf (line 55) | def _get_review_compiled_pdf():
function _collect_experiment_evidence (line 64) | def _collect_experiment_evidence(run_dir: Path) -> str:
function _execute_peer_review (line 138) | def _execute_peer_review(
function _execute_paper_revision (line 211) | def _execute_paper_revision(
function _execute_quality_gate (line 370) | def _execute_quality_gate(
function _execute_knowledge_archive (line 630) | def _execute_knowledge_archive(
function _sanitize_fabricated_data (line 706) | def _sanitize_fabricated_data(
function _load_seminal_papers_by_key (line 1168) | def _load_seminal_papers_by_key() -> dict[str, dict]:
function _seminal_to_bibtex (line 1185) | def _seminal_to_bibtex(paper: dict, cite_key: str) -> str:
function _resolve_missing_citations (line 1218) | def _resolve_missing_citations(
function _execute_export_publish (line 1411) | def _execute_export_publish(
function _check_citation_relevance (line 2492) | def _check_citation_relevance(
function _remove_bibtex_entries (line 2544) | def _remove_bibtex_entries(bib_text: str, keys_to_remove: set[str]) -> str:
function _remove_citations_from_text (line 2569) | def _remove_citations_from_text(text: str, keys_to_remove: set[str]) -> ...
function _execute_citation_verify (line 2592) | def _execute_citation_verify(
FILE: researchclaw/pipeline/stage_impls/_synthesis.py
function _execute_synthesis (line 29) | def _execute_synthesis(
function _execute_hypothesis_gen (line 90) | def _execute_hypothesis_gen(
FILE: researchclaw/pipeline/stage_impls/_topic.py
function _execute_topic_init (line 28) | def _execute_topic_init(
function _execute_problem_decompose (line 115) | def _execute_problem_decompose(
FILE: researchclaw/pipeline/stages.py
class Stage (line 22) | class Stage(IntEnum):
class StageStatus (line 65) | class StageStatus(str, Enum):
class TransitionEvent (line 77) | class TransitionEvent(str, Enum):
class TransitionOutcome (line 204) | class TransitionOutcome:
function gate_required (line 213) | def gate_required(
function default_rollback_stage (line 225) | def default_rollback_stage(stage: Stage) -> Stage:
function advance (line 230) | def advance(
FILE: researchclaw/pipeline/verified_registry.py
class ConditionResult (line 39) | class ConditionResult:
method compute_stats (line 49) | def compute_stats(self) -> None:
class VerifiedRegistry (line 64) | class VerifiedRegistry:
method add_value (line 75) | def add_value(self, value: float, source: str) -> None:
method _add_variants (line 83) | def _add_variants(self, value: float, source: str) -> None:
method is_verified (line 107) | def is_verified(self, number: float, tolerance: float = 0.01) -> bool:
method lookup (line 119) | def lookup(self, number: float, tolerance: float = 0.01) -> str | None:
method verify_condition (line 131) | def verify_condition(self, name: str) -> bool:
method from_experiment (line 136) | def from_experiment(
method from_run_dir (line 246) | def from_run_dir(
method from_files (line 350) | def from_files(
function _merge_into (line 367) | def _merge_into(target: VerifiedRegistry, source: VerifiedRegistry) -> N...
function _enrich_from_refinement_log (line 396) | def _enrich_from_refinement_log(reg: VerifiedRegistry, refinement_log: d...
function _extract_primary_metric (line 435) | def _extract_primary_metric(metrics: dict) -> float | None:
function _is_finite (line 443) | def _is_finite(value: Any) -> bool:
FILE: researchclaw/project/idea_pool.py
class IdeaPool (line 16) | class IdeaPool:
method __init__ (line 19) | def __init__(self, pool_path: str | Path) -> None:
method _load (line 26) | def _load(self) -> None:
method _save (line 37) | def _save(self) -> None:
method add (line 46) | def add(self, title: str, description: str, domains: list[str] | None ...
method remove (line 60) | def remove(self, idea_id: str) -> None:
method get (line 67) | def get(self, idea_id: str) -> Idea:
method evaluate (line 75) | def evaluate(self, idea_id: str, feasibility: float, novelty: float) -...
method rank (line 89) | def rank(self) -> list[Idea]:
method to_project (line 95) | def to_project(self, idea_id: str, config_path: str, projects_dir: str...
method list_all (line 110) | def list_all(self) -> list[Idea]:
FILE: researchclaw/project/manager.py
class ProjectManager (line 18) | class ProjectManager:
method __init__ (line 21) | def __init__(self, projects_dir: str | Path) -> None:
method _registry_path (line 29) | def _registry_path(self) -> Path:
method _load_registry (line 32) | def _load_registry(self) -> None:
method _save_registry (line 46) | def _save_registry(self) -> None:
method create (line 59) | def create(
method delete (line 97) | def delete(self, name: str) -> None:
method get (line 107) | def get(self, name: str) -> Project:
method list_all (line 113) | def list_all(self) -> list[Project]:
method get_status (line 117) | def get_status(self) -> dict[str, Any]:
method switch (line 132) | def switch(self, name: str) -> Project:
method active (line 141) | def active(self) -> Project | None:
method compare (line 149) | def compare(self, name_a: str, name_b: str) -> dict[str, Any]:
method start_run (line 161) | def start_run(self, name: str, run_id: str) -> str:
method finish_run (line 169) | def finish_run(self, name: str, status: str, metrics: dict[str, Any] |...
function _count_by (line 178) | def _count_by(projects: list[Project], attr: str) -> dict[str, int]:
function _metric_diff (line 186) | def _metric_diff(a: dict[str, Any], b: dict[str, Any]) -> dict[str, Any]:
FILE: researchclaw/project/models.py
class Project (line 11) | class Project:
method to_dict (line 23) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 37) | def from_dict(cls, data: dict[str, Any]) -> Project:
class Idea (line 57) | class Idea:
method score (line 70) | def score(self) -> float:
method to_dict (line 74) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 88) | def from_dict(cls, data: dict[str, Any]) -> Idea:
FILE: researchclaw/project/scheduler.py
class _QueueEntry (line 16) | class _QueueEntry:
class ProjectScheduler (line 23) | class ProjectScheduler:
method __init__ (line 26) | def __init__(self, manager: ProjectManager, max_concurrent: int = 2) -...
method enqueue (line 32) | def enqueue(self, project_name: str, priority: int = 0) -> None:
method dequeue (line 47) | def dequeue(self) -> str | None:
method next (line 54) | def next(self) -> str | None:
method can_start (line 63) | def can_start(self) -> bool:
method mark_done (line 67) | def mark_done(self, project_name: str) -> None:
method queue_size (line 72) | def queue_size(self) -> int:
method running_count (line 77) | def running_count(self) -> int:
method get_status (line 81) | def get_status(self) -> dict[str, Any]:
FILE: researchclaw/prompts.py
function _render (line 51) | def _render(template: str, variables: dict[str, str]) -> str:
class RenderedPrompt (line 73) | class RenderedPrompt:
class PromptManager (line 87) | class PromptManager:
method __init__ (line 90) | def __init__(self, overrides_path: str | Path | None = None) -> None:
method _load_overrides (line 104) | def _load_overrides(self, path: Path) -> None:
method for_stage (line 132) | def for_stage(
method system (line 156) | def system(self, stage: str) -> str:
method user (line 160) | def user(self, stage: str, **kwargs: Any) -> str:
method json_mode (line 167) | def json_mode(self, stage: str) -> bool:
method max_tokens (line 170) | def max_tokens(self, stage: str) -> int | None:
method block (line 175) | def block(self, name: str, **kwargs: Any) -> str:
method sub_prompt (line 184) | def sub_prompt(self, name: str, **kwargs: Any) -> RenderedPrompt:
method stage_names (line 195) | def stage_names(self) -> list[str]:
method has_stage (line 198) | def has_stage(self, stage: str) -> bool:
method export_yaml (line 201) | def export_yaml(self, path: Path) -> None:
FILE: researchclaw/quality.py
class TemplateMatch (line 47) | class TemplateMatch:
class QualityReport (line 56) | class QualityReport:
method has_template_content (line 65) | def has_template_content(self) -> bool:
method match_count (line 69) | def match_count(self) -> int:
method to_dict (line 72) | def to_dict(self) -> dict[str, object]:
function detect_template_content (line 91) | def detect_template_content(text: str) -> list[TemplateMatch]:
function compute_template_ratio (line 118) | def compute_template_ratio(text: str) -> float:
function assess_quality (line 146) | def assess_quality(text: str) -> QualityReport:
function check_strict_quality (line 169) | def check_strict_quality(text: str, *, threshold: float = 0.05) -> tuple...
FILE: researchclaw/report.py
function generate_report (line 15) | def generate_report(run_dir: Path) -> str:
function _header (line 48) | def _header(summary: dict[str, Any], run_dir: Path) -> str:
function _paper_section (line 68) | def _paper_section(run_dir: Path) -> str:
function _experiment_section (line 96) | def _experiment_section(run_dir: Path) -> str:
function _citation_section (line 135) | def _citation_section(run_dir: Path) -> str:
function _warnings_section (line 172) | def _warnings_section(summary: dict[str, Any]) -> str:
function print_report (line 197) | def print_report(run_dir: Path) -> None:
function write_report (line 201) | def write_report(run_dir: Path, output_path: Path) -> None:
FILE: researchclaw/server/app.py
function create_app (line 25) | def create_app(
FILE: researchclaw/server/dialog/intents.py
class Intent (line 10) | class Intent(str, Enum):
function classify_intent (line 54) | def classify_intent(message: str) -> tuple[Intent, float]:
FILE: researchclaw/server/dialog/router.py
function route_message (line 18) | async def route_message(raw_message: str, client_id: str) -> str:
function _handle_help (line 40) | async def _handle_help(text: str, session: ChatSession) -> str:
function _handle_status (line 53) | async def _handle_status(text: str, session: ChatSession) -> str:
function _handle_start (line 80) | async def _handle_start(text: str, session: ChatSession) -> str:
function _handle_topic (line 92) | async def _handle_topic(text: str, session: ChatSession) -> str:
function _handle_config (line 103) | async def _handle_config(text: str, session: ChatSession) -> str:
function _handle_results (line 114) | async def _handle_results(text: str, session: ChatSession) -> str:
function _handle_paper (line 133) | async def _handle_paper(text: str, session: ChatSession) -> str:
function _handle_general (line 145) | async def _handle_general(text: str, session: ChatSession) -> str:
FILE: researchclaw/server/dialog/session.py
class ChatMessage (line 16) | class ChatMessage:
method to_dict (line 23) | def to_dict(self) -> dict[str, Any]:
class ChatSession (line 28) | class ChatSession:
method add_message (line 39) | def add_message(self, role: str, content: str) -> ChatMessage:
method get_context (line 47) | def get_context(self, last_n: int = 10) -> list[dict[str, str]]:
method to_dict (line 54) | def to_dict(self) -> dict[str, Any]:
class SessionManager (line 64) | class SessionManager:
method __init__ (line 67) | def __init__(self, persist_dir: str = ".researchclaw/sessions") -> None:
method get_or_create (line 71) | def get_or_create(self, client_id: str) -> ChatSession:
method remove (line 77) | def remove(self, client_id: str) -> None:
method save (line 81) | def save(self, client_id: str) -> None:
method load (line 94) | def load(self, client_id: str) -> ChatSession | None:
FILE: researchclaw/server/middleware/auth.py
class TokenAuthMiddleware (line 12) | class TokenAuthMiddleware(BaseHTTPMiddleware):
method __init__ (line 21) | def __init__(self, app: object, token: str = "") -> None:
method dispatch (line 25) | async def dispatch(
FILE: researchclaw/server/routes/chat.py
function set_chat_manager (line 21) | def set_chat_manager(manager: ConnectionManager) -> None:
function get_chat_manager (line 27) | def get_chat_manager() -> ConnectionManager:
function chat_websocket (line 35) | async def chat_websocket(websocket: WebSocket) -> None:
FILE: researchclaw/server/routes/pipeline.py
function _validated_run_dir (line 20) | def _validated_run_dir(run_id: str) -> Path:
class PipelineStartRequest (line 33) | class PipelineStartRequest(BaseModel):
class PipelineStartResponse (line 41) | class PipelineStartResponse(BaseModel):
function _get_app_state (line 54) | def _get_app_state() -> dict[str, Any]:
function start_pipeline (line 61) | async def start_pipeline(req: PipelineStartRequest) -> PipelineStartResp...
function stop_pipeline (line 137) | async def stop_pipeline() -> dict[str, str]:
function pipeline_status (line 150) | async def pipeline_status() -> dict[str, Any]:
function pipeline_stages (line 158) | async def pipeline_stages() -> dict[str, Any]:
function list_runs (line 174) | async def list_runs() -> dict[str, Any]:
function get_run (line 195) | async def get_run(run_id: str) -> dict[str, Any]:
function get_run_metrics (line 227) | async def get_run_metrics(run_id: str) -> dict[str, Any]:
FILE: researchclaw/server/routes/projects.py
function list_projects (line 15) | async def list_projects() -> dict[str, Any]:
FILE: researchclaw/server/routes/voice.py
function transcribe_audio (line 16) | async def transcribe_audio(
FILE: researchclaw/server/websocket/events.py
class EventType (line 12) | class EventType(str, Enum):
class Event (line 41) | class Event:
method to_json (line 48) | def to_json(self) -> str:
method from_json (line 59) | def from_json(cls, raw: str) -> Event:
FILE: researchclaw/server/websocket/manager.py
class ConnectionManager (line 17) | class ConnectionManager:
method __init__ (line 20) | def __init__(self) -> None:
method active_count (line 25) | def active_count(self) -> int:
method connect (line 28) | async def connect(self, websocket: WebSocket, client_id: str) -> None:
method disconnect (line 39) | def disconnect(self, client_id: str) -> None:
method broadcast (line 44) | async def broadcast(self, event: Event) -> None:
method send_to (line 55) | async def send_to(self, client_id: str, event: Event) -> None:
method _send (line 64) | async def _send(self, ws: WebSocket, event: Event) -> None:
method publish (line 67) | def publish(self, event: Event) -> None:
method drain_queue (line 74) | async def drain_queue(self) -> None:
method heartbeat_loop (line 80) | async def heartbeat_loop(self, interval: float = 15.0) -> None:
FILE: researchclaw/servers/cloud_executor.py
class CloudExecutor (line 13) | class CloudExecutor:
method __init__ (line 20) | def __init__(self, server: ServerEntry) -> None:
method launch_instance (line 26) | async def launch_instance(self) -> dict[str, Any]:
method terminate_instance (line 43) | async def terminate_instance(self, instance_id: str) -> None:
method get_instance_status (line 47) | async def get_instance_status(self, instance_id: str) -> dict[str, Any]:
FILE: researchclaw/servers/dispatcher.py
class TaskDispatcher (line 18) | class TaskDispatcher:
method __init__ (line 21) | def __init__(
method dispatch (line 35) | async def dispatch(self, task: dict[str, Any]) -> str:
method execute_task (line 67) | async def execute_task(self, task_id: str) -> dict[str, Any]:
method get_task_status (line 131) | def get_task_status(self, task_id: str) -> dict[str, Any]:
FILE: researchclaw/servers/monitor.py
class ServerMonitor (line 14) | class ServerMonitor:
method __init__ (line 17) | def __init__(self, registry: ServerRegistry) -> None:
method check_status (line 21) | async def check_status(self, server: ServerEntry) -> dict[str, Any]:
method check_all (line 34) | async def check_all(self) -> dict[str, dict[str, Any]]:
method get_cached (line 47) | def get_cached(self, name: str) -> dict[str, Any] | None:
method get_gpu_usage (line 51) | def get_gpu_usage(self, server: ServerEntry) -> dict[str, Any]:
function _ssh_command (line 57) | async def _ssh_command(host: str, command: str) -> str:
function _parse_status_output (line 71) | def _parse_status_output(raw: str, server: ServerEntry) -> dict[str, Any]:
FILE: researchclaw/servers/registry.py
class ServerEntry (line 11) | class ServerEntry:
method __init__ (line 14) | def __init__(
method to_dict (line 38) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 53) | def from_dict(cls, data: dict[str, Any]) -> ServerEntry:
class ServerRegistry (line 68) | class ServerRegistry:
method __init__ (line 71) | def __init__(self, servers: list[ServerEntry] | None = None) -> None:
method add (line 76) | def add(self, server: ServerEntry) -> None:
method remove (line 81) | def remove(self, name: str) -> None:
method get (line 87) | def get(self, name: str) -> ServerEntry:
method list_all (line 93) | def list_all(self) -> list[ServerEntry]:
method get_available (line 97) | def get_available(self, exclude: set[str] | None = None) -> list[Serve...
method get_best_match (line 102) | def get_best_match(
method count (line 141) | def count(self) -> int:
FILE: researchclaw/servers/slurm_executor.py
class SlurmExecutor (line 15) | class SlurmExecutor:
method __init__ (line 18) | def __init__(self, server: ServerEntry) -> None:
method _generate_sbatch_script (line 24) | def _generate_sbatch_script(
method submit_job (line 52) | async def submit_job(
method check_job (line 87) | async def check_job(self, job_id: str) -> dict[str, Any]:
method cancel_job (line 100) | async def cancel_job(self, job_id: str) -> None:
FILE: researchclaw/servers/ssh_executor.py
class SSHExecutor (line 16) | class SSHExecutor:
method __init__ (line 19) | def __init__(self, server: ServerEntry) -> None:
method upload_code (line 23) | async def upload_code(self, local_dir: Path, remote_dir: str) -> None:
method run_experiment (line 39) | async def run_experiment(
method download_results (line 68) | async def download_results(self, remote_dir: str, local_dir: Path) -> ...
method cleanup (line 85) | async def cleanup(self, remote_dir: str) -> None:
FILE: researchclaw/skills/loader.py
function load_skill_from_skillmd (line 18) | def load_skill_from_skillmd(path: Path) -> Skill | None:
function load_skillmd_from_directory (line 96) | def load_skillmd_from_directory(directory: Path) -> list[Skill]:
function load_skill_file (line 117) | def load_skill_file(path: Path) -> Skill | None:
function load_skills_from_directory (line 151) | def load_skills_from_directory(directory: Path) -> list[Skill]:
FILE: researchclaw/skills/matcher.py
function _tokenize (line 13) | def _tokenize(text: str) -> set[str]:
function _resolve_stage (line 18) | def _resolve_stage(stage: int | str) -> int:
function match_skills (line 25) | def match_skills(
function format_skills_for_prompt (line 95) | def format_skills_for_prompt(skills: list[Skill], max_chars: int = 4000)...
FILE: researchclaw/skills/registry.py
class SkillRegistry (line 18) | class SkillRegistry:
method __init__ (line 25) | def __init__(
method _load_from_dir (line 51) | def _load_from_dir(self, directory: Path) -> None:
method register (line 57) | def register(self, skill: Skill) -> None:
method unregister (line 66) | def unregister(self, skill_id: str) -> bool:
method get (line 80) | def get(self, skill_id: str) -> Skill | None:
method list_all (line 84) | def list_all(self) -> list[Skill]:
method list_by_category (line 88) | def list_by_category(self, category: str) -> list[Skill]:
method list_by_stage (line 92) | def list_by_stage(self, stage: int) -> list[Skill]:
method match (line 99) | def match(
method export_for_prompt (line 124) | def export_for_prompt(
method count (line 140) | def count(self) -> int:
FILE: researchclaw/skills/schema.py
class Skill (line 42) | class Skill:
method id (line 65) | def id(self) -> str: # noqa: A003
method category (line 70) | def category(self) -> str:
method trigger_keywords (line 74) | def trigger_keywords(self) -> list[str]:
method applicable_stages (line 79) | def applicable_stages(self) -> list[int]:
method priority (line 91) | def priority(self) -> int:
method prompt_template (line 95) | def prompt_template(self) -> str:
method code_template (line 100) | def code_template(self) -> str | None:
method references (line 104) | def references(self) -> list[str]:
method version (line 109) | def version(self) -> str:
method to_dict (line 114) | def to_dict(self) -> dict[str, Any]:
method from_dict (line 131) | def from_dict(cls, data: dict[str, Any]) -> Skill:
FILE: researchclaw/templates/compiler.py
class CompileResult (line 43) | class CompileResult:
function compile_latex (line 54) | def compile_latex(
function fix_common_latex_errors (line 164) | def fix_common_latex_errors(
function _parse_log (line 370) | def _parse_log(log_text: str) -> tuple[list[str], list[str]]:
class QualityCheckResult (line 400) | class QualityCheckResult:
method has_critical_issues (line 413) | def has_critical_issues(self) -> bool:
function check_compiled_quality (line 417) | def check_compiled_quality(
function remove_missing_figures (line 514) | def remove_missing_figures(tex_text: str, stage_dir: Path) -> tuple[str,...
function _sanitize_tex_unicode (line 574) | def _sanitize_tex_unicode(tex_path: Path) -> None:
function _sanitize_bib_file (line 643) | def _sanitize_bib_file(bib_path: Path) -> None:
function _fix_escaped_ampersand_in_tabular (line 714) | def _fix_escaped_ampersand_in_tabular(tex: str) -> tuple[str, int]:
function _run_pdflatex (line 745) | def _run_pdflatex(
function _is_fatal_error (line 796) | def _is_fatal_error(err: str) -> bool:
function _run_bibtex (line 829) | def _run_bibtex(work_dir: Path, stem: str, timeout: int = 60) -> bool:
FILE: researchclaw/templates/conference.py
class ConferenceTemplate (line 21) | class ConferenceTemplate:
method render_preamble (line 37) | def render_preamble(
method render_footer (line 93) | def render_footer(self, bib_file: str = "references") -> str:
method get_style_files (line 101) | def get_style_files(self) -> list[Path]:
method _render_authors (line 115) | def _render_authors(self, authors: str) -> str:
function get_template (line 351) | def get_template(name: str) -> ConferenceTemplate:
function list_conferences (line 364) | def list_conferences() -> list[str]:
FILE: researchclaw/templates/converter.py
function _reset_render_counters (line 28) | def _reset_render_counters() -> None:
function _next_table_num (line 34) | def _next_table_num() -> int:
function _next_figure_num (line 41) | def _next_figure_num() -> int:
function markdown_to_latex (line 52) | def markdown_to_latex(
function _sanitize_latex_output (line 135) | def _sanitize_latex_output(
function _round_raw_metrics (line 308) | def _round_raw_metrics(text: str) -> str:
function _preprocess_markdown (line 334) | def _preprocess_markdown(md: str) -> str:
class _Section (line 490) | class _Section:
method __post_init__ (line 498) | def __post_init__(self) -> None:
function _separate_heading_body (line 571) | def _separate_heading_body(heading: str) -> tuple[str, str]:
function _parse_sections (line 647) | def _parse_sections(md: str) -> list[_Section]:
function _extract_title (line 704) | def _extract_title(sections: list[_Section], raw_md: str) -> str:
function _is_bad_title (line 738) | def _is_bad_title(candidate: str) -> bool:
function _extract_abstract (line 752) | def _extract_abstract(sections: list[_Section]) -> str:
function _build_body (line 772) | def _build_body(sections: list[_Section], *, title: str = "") -> str:
function _deduplicate_tables (line 860) | def _deduplicate_tables(body: str) -> str:
function _convert_block (line 921) | def _convert_block(text: str) -> str:
function _collect_list (line 1060) | def _collect_list(
function _render_itemize (line 1087) | def _render_itemize(items: list[str]) -> str:
function _render_enumerate (line 1092) | def _render_enumerate(items: list[str]) -> str:
function _collect_table (line 1102) | def _collect_table(lines: list[str], start: int) -> tuple[list[str], int]:
function _render_table (line 1112) | def _render_table(table_lines: list[str], caption: str = "") -> str:
function _auto_table_caption (line 1184) | def _auto_table_caption(header: list[str], table_num: int) -> str:
function _parse_table_row (line 1206) | def _parse_table_row(line: str) -> list[str]:
function _parse_alignments (line 1216) | def _parse_alignments(sep_line: str, ncols: int) -> list[str]:
function _escape_algo_line (line 1311) | def _escape_algo_line(line: str) -> str:
function _render_code_block (line 1366) | def _render_code_block(lang: str, code: str) -> str:
function _render_figure (line 1431) | def _render_figure(caption: str, path: str) -> str:
function _convert_inline (line 1467) | def _convert_inline(text: str) -> str:
function check_paper_completeness (line 1610) | def check_paper_completeness(sections: list[_Section]) -> list[str]:
function _escape_latex (line 1783) | def _escape_latex(text: str) -> str:
FILE: researchclaw/templates/results_table_builder.py
class LatexTable (line 23) | class LatexTable:
function build_results_tables (line 34) | def build_results_tables(
function _get_reportable_conditions (line 88) | def _get_reportable_conditions(registry: VerifiedRegistry) -> list[Condi...
function _build_main_table (line 99) | def _build_main_table(
function _build_per_seed_table (line 177) | def _build_per_seed_table(
function build_condition_whitelist (line 238) | def build_condition_whitelist(registry: VerifiedRegistry) -> str:
function _find_best (line 261) | def _find_best(conditions: list[ConditionResult], direction: str) -> int...
function _fmt (line 279) | def _fmt(value: float | None) -> str:
function _escape_latex (line 303) | def _escape_latex(text: str) -> str:
FILE: researchclaw/trends/auto_topic.py
class AutoTopicGenerator (line 14) | class AutoTopicGenerator:
method __init__ (line 17) | def __init__(
method generate_candidates (line 27) | async def generate_candidates(
method auto_select (line 60) | async def auto_select(
method _score_candidate (line 77) | def _score_candidate(
method format_candidates (line 112) | def format_candidates(
FILE: researchclaw/trends/daily_digest.py
class DailyDigest (line 15) | class DailyDigest:
method __init__ (line 18) | def __init__(
method generate (line 26) | async def generate(
method _generate_with_llm (line 52) | async def _generate_with_llm(
method _generate_basic (line 108) | def _generate_basic(
method _parse_summary (line 152) | def _parse_summary(response: str) -> tuple[str, int]:
method generate_and_save (line 170) | async def generate_and_save(
FILE: researchclaw/trends/feeds.py
class FeedManager (line 12) | class FeedManager:
method __init__ (line 17) | def __init__(
method fetch_recent_papers (line 27) | def fetch_recent_papers(
method _fetch_arxiv (line 66) | def _fetch_arxiv(
method _fetch_s2 (line 98) | def _fetch_s2(
method _fetch_openalex (line 137) | def _fetch_openalex(
FILE: researchclaw/trends/opportunity_finder.py
class OpportunityFinder (line 11) | class OpportunityFinder:
method __init__ (line 14) | def __init__(self, llm_client: Any = None):
method find_opportunities (line 17) | async def find_opportunities(
method _llm_find_opportunities (line 27) | async def _llm_find_opportunities(
method _parse_opportunities (line 57) | def _parse_opportunities(response: str) -> list[dict[str, Any]]:
method _heuristic_find_opportunities (line 87) | def _heuristic_find_opportunities(
FILE: researchclaw/trends/trend_analyzer.py
class TrendAnalyzer (line 30) | class TrendAnalyzer:
method __init__ (line 33) | def __init__(self, min_keyword_length: int = 3):
method analyze (line 36) | def analyze(
method _extract_keywords (line 65) | def _extract_keywords(
method _extract_authors (line 101) | def _extract_authors(
method _extract_datasets (line 121) | def _extract_datasets(
method _extract_methods (line 145) | def _extract_methods(
method _source_distribution (line 173) | def _source_distribution(
method _tokenize (line 183) | def _tokenize(text: str) -> list[str]:
method generate_trend_report (line 187) | def generate_trend_report(
FILE: researchclaw/utils/sanitize.py
function sanitize_figure_id (line 8) | def sanitize_figure_id(raw_id: str, *, fallback: str = "figure") -> str:
FILE: researchclaw/utils/thinking_tags.py
function strip_thinking_tags (line 86) | def strip_thinking_tags(text: str) -> str:
FILE: researchclaw/voice/commands.py
class VoiceCommand (line 10) | class VoiceCommand(str, Enum):
class ParsedVoiceInput (line 22) | class ParsedVoiceInput:
function parse_voice_input (line 39) | def parse_voice_input(text: str) -> ParsedVoiceInput:
FILE: researchclaw/voice/synthesizer.py
class VoiceSynthesizer (line 11) | class VoiceSynthesizer:
method __init__ (line 14) | def __init__(self, server_config: Any) -> None:
method synthesize (line 17) | async def synthesize(
FILE: researchclaw/voice/transcriber.py
class VoiceTranscriber (line 11) | class VoiceTranscriber:
method __init__ (line 14) | def __init__(self, server_config: Any) -> None:
method transcribe (line 18) | async def transcribe(
method transcribe_stream (line 57) | async def transcribe_stream(
FILE: researchclaw/web/_ssrf.py
function check_url_ssrf (line 10) | def check_url_ssrf(url: str) -> str | None:
FILE: researchclaw/web/agent.py
class WebSearchAgentResult (line 35) | class WebSearchAgentResult:
method total_results (line 47) | def total_results(self) -> int:
method to_context_string (line 55) | def to_context_string(self, *, max_length: int = 30_000) -> str:
method to_dict (line 119) | def to_dict(self) -> dict[str, Any]:
class WebSearchAgent (line 134) | class WebSearchAgent:
method __init__ (line 155) | def __init__(
method search_and_extract (line 180) | def search_and_extract(
method _run_web_search (line 242) | def _run_web_search(
method _run_scholar_search (line 257) | def _run_scholar_search(
method _run_crawling (line 269) | def _run_crawling(
method _run_pdf_extraction (line 296) | def _run_pdf_extraction(
method _generate_queries (line 313) | def _generate_queries(topic: str) -> list[str]:
method _select_urls_to_crawl (line 322) | def _select_urls_to_crawl(self, result: WebSearchAgentResult) -> list[...
method _find_pdf_urls (line 338) | def _find_pdf_urls(result: WebSearchAgentResult) -> list[str]:
FILE: researchclaw/web/crawler.py
class CrawlResult (line 30) | class CrawlResult:
method has_content (line 42) | def has_content(self) -> bool:
class WebCrawler (line 46) | class WebCrawler:
method __init__ (line 57) | def __init__(
method crawl (line 72) | async def crawl(self, url: str) -> CrawlResult:
method crawl_sync (line 89) | def crawl_sync(self, url: str) -> CrawlResult:
method crawl_many (line 104) | async def crawl_many(self, urls: list[str]) -> list[CrawlResult]:
method _crawl_with_crawl4ai (line 163) | async def _crawl_with_crawl4ai(self, url: str, t0: float) -> CrawlResult:
method _extract_markdown (line 192) | def _extract_markdown(self, raw: Any) -> str:
method _crawl_with_urllib (line 208) | def _crawl_with_urllib(self, url: str, t0: float) -> CrawlResult:
method _html_to_markdown (line 234) | def _html_to_markdown(html: str) -> str:
FILE: researchclaw/web/pdf_extractor.py
class PDFContent (line 36) | class PDFContent:
method has_content (line 52) | def has_content(self) -> bool:
class PDFExtractor (line 56) | class PDFExtractor:
method __init__ (line 67) | def __init__(
method backend (line 77) | def backend(self) -> str:
method extract (line 80) | def extract(self, path: str | Path) -> PDFContent:
method extract_from_url (line 133) | def extract_from_url(self, url: str) -> PDFContent:
method _extract_abstract (line 166) | def _extract_abstract(text: str) -> str:
method _detect_sections (line 183) | def _detect_sections(text: str) -> list[dict[str, str]]:
FILE: researchclaw/web/scholar.py
class ScholarPaper (line 33) | class ScholarPaper:
method to_dict (line 46) | def to_dict(self) -> dict[str, Any]:
method to_literature_paper (line 59) | def to_literature_paper(self) -> Any:
class GoogleScholarClient (line 76) | class GoogleScholarClient:
method __init__ (line 87) | def __init__(
method available (line 111) | def available(self) -> bool:
method search (line 115) | def search(self, query: str, *, limit: int = 10) -> list[ScholarPaper]:
method get_citations (line 134) | def get_citations(self, scholar_id: str, *, limit: int = 20) -> list[S...
method search_author (line 155) | def search_author(self, name: str) -> list[dict[str, Any]]:
method _rate_limit (line 179) | def _rate_limit(self) -> None:
method _parse_pub (line 187) | def _parse_pub(pub: Any) -> ScholarPaper:
FILE: researchclaw/web/search.py
class SearchResult (line 27) | class SearchResult:
method to_dict (line 37) | def to_dict(self) -> dict[str, Any]:
class WebSearchResponse (line 49) | class WebSearchResponse:
method has_results (line 59) | def has_results(self) -> bool:
class WebSearchClient (line 63) | class WebSearchClient:
method __init__ (line 81) | def __init__(
method search (line 94) | def search(
method search_multi (line 115) | def search_multi(
method _search_tavily (line 141) | def _search_tavily(
method _search_duckduckgo (line 191) | def _search_duckduckgo(
method _parse_ddg_html (line 216) | def _parse_ddg_html(html: str, limit: int) -> list[SearchResult]:
FILE: researchclaw/wizard/quickstart.py
class QuickStartWizard (line 11) | class QuickStartWizard:
method run_interactive (line 14) | def run_interactive(self, template: str | None = None) -> dict[str, Any]:
method run_web (line 101) | def run_web(self, steps: list[dict[str, Any]]) -> dict[str, Any]:
method _apply_template (line 117) | def _apply_template(self, name: str) -> dict[str, Any]:
method _template_to_config (line 136) | def _template_to_config(self, tpl: dict[str, Any]) -> dict[str, Any]:
method _ask (line 158) | def _ask(self, prompt: str, default: str = "") -> str:
method _ask_yn (line 167) | def _ask_yn(self, prompt: str, default: bool = True) -> bool:
method _choose (line 178) | def _choose(
method _print_summary (line 204) | def _print_summary(self, config: dict[str, Any], indent: int = 2) -> N...
FILE: researchclaw/wizard/templates.py
function get_template (line 48) | def get_template(name: str) -> dict[str, Any] | None:
function list_templates (line 53) | def list_templates() -> list[dict[str, str]]:
FILE: researchclaw/wizard/validator.py
class EnvironmentReport (line 12) | class EnvironmentReport:
method to_dict (line 26) | def to_dict(self) -> dict[str, Any]:
function detect_environment (line 41) | def detect_environment() -> EnvironmentReport:
FILE: researchclaw/writing_guide.py
function format_writing_tips (line 56) | def format_writing_tips(categories: list[str] | None = None) -> str:
FILE: scripts/test_beast_mode_e2e.py
function main (line 172) | def main() -> None:
FILE: scripts/test_code_agent_live.py
function analyze_code_quality (line 192) | def analyze_code_quality(files: dict[str, str], test_case: dict) -> dict:
function main (line 321) | def main():
FILE: scripts/test_code_agent_sandbox.py
function make_sandbox_factory (line 120) | def make_sandbox_factory(docker_cfg: DockerSandboxConfig):
function main (line 127) | def main():
FILE: scripts/test_codegen_v2.py
function analyze_code_quality (line 269) | def analyze_code_quality(files: dict[str, str], test_case: dict) -> dict:
function _check_identical_classes (line 411) | def _check_identical_classes(files: dict[str, str]) -> list[str]:
function run_in_sandbox (line 446) | def run_in_sandbox(
function load_from_run (line 543) | def load_from_run(run_dir: str) -> dict:
function main (line 602) | def main():
FILE: tests/e2e_docker_sandbox.py
function check (line 30) | def check(name: str, ok: bool, detail: str = "") -> None:
function main (line 39) | def main() -> None:
FILE: tests/e2e_real_llm.py
function main (line 28) | def main() -> None:
FILE: tests/test_anthropic.py
function _create_message (line 21) | def _create_message() -> dict[str, Any]:
function test_anthropic_api (line 40) | def test_anthropic_api() -> None:
FILE: tests/test_assessor.py
function _sample_paper (line 27) | def _sample_paper() -> str:
function _sample_scores (line 37) | def _sample_scores(overall: float = 7.5) -> dict[str, Any]:
class MockLLM (line 50) | class MockLLM:
method __init__ (line 53) | def __init__(self, response: str = "SCORE: 7\nREASON: Solid contributi...
method chat_async (line 56) | async def chat_async(self, prompt: str) -> str:
class FailingLLM (line 60) | class FailingLLM:
method chat_async (line 61) | async def chat_async(self, prompt: str) -> str:
class TestRubrics (line 70) | class TestRubrics:
method test_all_five_dimensions_present (line 71) | def test_all_five_dimensions_present(self):
method test_rubric_is_frozen (line 76) | def test_rubric_is_frozen(self):
method test_rubric_has_criteria_and_scale (line 81) | def test_rubric_has_criteria_and_scale(self):
method test_default_weight (line 86) | def test_default_weight(self):
class TestPaperScorer (line 96) | class TestPaperScorer:
method test_score_without_llm (line 97) | def test_score_without_llm(self):
method test_score_with_mock_llm (line 105) | def test_score_with_mock_llm(self):
method test_score_with_failing_llm_falls_back (line 113) | def test_score_with_failing_llm_falls_back(self):
method test_score_subset_dimensions (line 120) | def test_score_subset_dimensions(self):
method test_parse_score_valid (line 125) | def test_parse_score_valid(self):
method test_parse_score_clamped (line 132) | def test_parse_score_clamped(self):
method test_parse_score_missing (line 138) | def test_parse_score_missing(self):
method test_heuristic_clarity_long_paper (line 143) | def test_heuristic_clarity_long_paper(self):
method test_heuristic_clarity_short_paper (line 149) | def test_heuristic_clarity_short_paper(self):
method test_heuristic_experiments_with_table_and_figure (line 154) | def test_heuristic_experiments_with_table_and_figure(self):
method test_heuristic_experiments_no_evidence (line 159) | def test_heuristic_experiments_no_evidence(self):
method test_heuristic_default_dimension (line 164) | def test_heuristic_default_dimension(self):
class TestVenueRecommender (line 176) | class TestVenueRecommender:
method test_recommend_high_score (line 177) | def test_recommend_high_score(self):
method test_recommend_low_score (line 185) | def test_recommend_low_score(self):
method test_recommend_medium_score_no_tier1 (line 191) | def test_recommend_medium_score_no_tier1(self):
method test_recommend_filter_by_domain (line 198) | def test_recommend_filter_by_domain(self):
method test_get_suggestion_weak_dimension (line 205) | def test_get_suggestion_weak_dimension(self):
method test_get_suggestion_moderate (line 211) | def test_get_suggestion_moderate(self):
method test_get_suggestion_strong (line 216) | def test_get_suggestion_strong(self):
method test_get_suggestion_no_scores (line 221) | def test_get_suggestion_no_scores(self):
method test_format_recommendations_empty (line 226) | def test_format_recommendations_empty(self):
method test_format_recommendations_with_data (line 231) | def test_format_recommendations_with_data(self):
class TestHistoryComparator (line 243) | class TestHistoryComparator:
method test_record_and_get_history (line 244) | def test_record_and_get_history(self, tmp_path: Path):
method test_record_persists_to_disk (line 251) | def test_record_persists_to_disk(self, tmp_path: Path):
method test_compare_no_history (line 258) | def test_compare_no_history(self):
method test_compare_with_previous (line 263) | def test_compare_with_previous(self, tmp_path: Path):
method test_compare_stable_trend (line 271) | def test_compare_stable_trend(self, tmp_path: Path):
method test_compare_declined_trend (line 277) | def test_compare_declined_trend(self, tmp_path: Path):
method test_compare_not_found (line 283) | def test_compare_not_found(self, tmp_path: Path):
method test_get_best_run (line 289) | def test_get_best_run(self, tmp_path: Path):
method test_get_best_run_empty (line 298) | def test_get_best_run_empty(self):
method test_dimension_deltas (line 302) | def test_dimension_deltas(self, tmp_path: Path):
FILE: tests/test_benchmark_agent.py
class FakeLLMResponse (line 20) | class FakeLLMResponse:
class FakeLLM (line 31) | class FakeLLM:
method __init__ (line 34) | def __init__(self, responses: list[str] | None = None) -> None:
method chat (line 39) | def chat(self, messages, **kwargs) -> FakeLLMResponse:
class TestBenchmarkKnowledge (line 54) | class TestBenchmarkKnowledge:
method test_knowledge_file_exists (line 57) | def test_knowledge_file_exists(self) -> None:
method test_knowledge_loads (line 61) | def test_knowledge_loads(self) -> None:
method test_knowledge_has_domains (line 67) | def test_knowledge_has_domains(self) -> None:
method test_each_domain_has_benchmarks_and_baselines (line 73) | def test_each_domain_has_benchmarks_and_baselines(self) -> None:
method test_benchmark_entries_have_required_fields (line 83) | def test_benchmark_entries_have_required_fields(self) -> None:
method test_baseline_entries_have_required_fields (line 92) | def test_baseline_entries_have_required_fields(self) -> None:
class TestSurveyor (line 107) | class TestSurveyor:
method test_domain_matching_image_classification (line 110) | def test_domain_matching_image_classification(self) -> None:
method test_domain_matching_rl (line 118) | def test_domain_matching_rl(self) -> None:
method test_domain_matching_knowledge_distillation (line 126) | def test_domain_matching_knowledge_distillation(self) -> None:
method test_domain_matching_multiple (line 134) | def test_domain_matching_multiple(self) -> None:
method test_local_candidates_returns_benchmarks (line 142) | def test_local_candidates_returns_benchmarks(self) -> None:
method test_execute_returns_benchmarks (line 149) | def test_execute_returns_benchmarks(self) -> None:
method test_execute_with_unknown_topic_uses_llm_fallback (line 159) | def test_execute_with_unknown_topic_uses_llm_fallback(self) -> None:
method test_extract_search_keywords (line 174) | def test_extract_search_keywords(self) -> None:
method test_execute_empty_topic_fails (line 184) | def test_execute_empty_topic_fails(self) -> None:
class TestSelector (line 196) | class TestSelector:
method benchmarks (line 200) | def benchmarks(self) -> list[dict]:
method baselines (line 215) | def baselines(self) -> list[dict]:
method test_filter_excludes_tier3 (line 223) | def test_filter_excludes_tier3(self, benchmarks: list[dict]) -> None:
method test_filter_network_none_only_tier1 (line 231) | def test_filter_network_none_only_tier1(self, benchmarks: list[dict]) ...
method test_ranking_prefers_tier1 (line 238) | def test_ranking_prefers_tier1(self, benchmarks: list[dict]) -> None:
method test_ranking_prefers_knowledge_base (line 246) | def test_ranking_prefers_knowledge_base(self, benchmarks: list[dict]) ...
method test_execute_selects_minimum (line 257) | def test_execute_selects_minimum(self, benchmarks: list[dict],
class TestAcquirer (line 282) | class TestAcquirer:
method test_generate_setup_script_tier1_only (line 285) | def test_generate_setup_script_tier1_only(self) -> None:
method test_generate_setup_script_tier2 (line 294) | def test_generate_setup_script_tier2(self) -> None:
method test_generate_requirements_filters_builtin (line 305) | def test_generate_requirements_filters_builtin(self) -> None:
method test_strip_fences (line 314) | def test_strip_fences(self) -> None:
method test_execute_generates_code (line 319) | def test_execute_generates_code(self) -> None:
class TestValidator (line 349) | class TestValidator:
method test_syntax_check_valid (line 352) | def test_syntax_check_valid(self) -> None:
method test_syntax_check_invalid (line 358) | def test_syntax_check_invalid(self) -> None:
method test_import_check_builtin_ok (line 365) | def test_import_check_builtin_ok(self) -> None:
method test_import_check_unknown (line 371) | def test_import_check_unknown(self) -> None:
method test_import_check_with_requirements (line 377) | def test_import_check_with_requirements(self) -> None:
method test_execute_passes_valid_code (line 385) | def test_execute_passes_valid_code(self) -> None:
method test_execute_fails_syntax_error (line 407) | def test_execute_fails_syntax_error(self) -> None:
class TestOrchestrator (line 429) | class TestOrchestrator:
method test_orchestrate_produces_plan (line 432) | def test_orchestrate_produces_plan(self, tmp_path: Path) -> None:
method test_orchestrate_saves_artifacts (line 477) | def test_orchestrate_saves_artifacts(self, tmp_path: Path) -> None:
method test_plan_to_prompt_block (line 512) | def test_plan_to_prompt_block(self) -> None:
method test_plan_to_dict_serializable (line 532) | def test_plan_to_dict_serializable(self) -> None:
class TestConfig (line 549) | class TestConfig:
method test_default_config_has_benchmark_agent (line 552) | def test_default_config_has_benchmark_agent(self) -> None:
method test_parse_benchmark_agent_config (line 558) | def test_parse_benchmark_agent_config(self) -> None:
method test_parse_benchmark_agent_config_empty (line 569) | def test_parse_benchmark_agent_config_empty(self) -> None:
class TestBaseAgent (line 581) | class TestBaseAgent:
method test_parse_json_direct (line 584) | def test_parse_json_direct(self) -> None:
method test_parse_json_fenced (line 589) | def test_parse_json_fenced(self) -> None:
method test_parse_json_embedded (line 594) | def test_parse_json_embedded(self) -> None:
method test_parse_json_invalid (line 599) | def test_parse_json_invalid(self) -> None:
class TestRequiredBaselines (line 610) | class TestRequiredBaselines:
method test_inject_required_baselines_image_classification (line 613) | def test_inject_required_baselines_image_classification(self) -> None:
method test_inject_required_baselines_no_duplicates (line 633) | def test_inject_required_baselines_no_duplicates(self) -> None:
FILE: tests/test_calendar.py
function _make_conference (line 24) | def _make_conference(
function _future (line 44) | def _future(days: int) -> date:
function _past (line 48) | def _past(days: int) -> date:
class TestConference (line 57) | class TestConference:
method test_from_dict_minimal (line 58) | def test_from_dict_minimal(self):
method test_from_dict_full (line 65) | def test_from_dict_full(self):
method test_from_dict_date_passthrough (line 83) | def test_from_dict_date_passthrough(self):
method test_next_deadline_returns_earliest_future (line 92) | def test_next_deadline_returns_earliest_future(self):
method test_next_deadline_skips_past (line 99) | def test_next_deadline_skips_past(self):
method test_next_deadline_none_when_all_past (line 106) | def test_next_deadline_none_when_all_past(self):
method test_days_until_deadline (line 113) | def test_days_until_deadline(self):
method test_days_until_deadline_none (line 117) | def test_days_until_deadline_none(self):
class TestConferenceCalendar (line 127) | class TestConferenceCalendar:
method test_load_from_yaml (line 128) | def test_load_from_yaml(self, tmp_path: Path):
method test_load_skips_invalid_entries (line 152) | def test_load_skips_invalid_entries(self, tmp_path: Path):
method test_get_upcoming_filters_by_days (line 164) | def test_get_upcoming_filters_by_days(self):
method test_get_upcoming_filters_by_domain (line 174) | def test_get_upcoming_filters_by_domain(self):
method test_get_upcoming_filters_by_tier (line 184) | def test_get_upcoming_filters_by_tier(self):
method test_get_by_name_case_insensitive (line 194) | def test_get_by_name_case_insensitive(self):
method test_get_by_domain (line 201) | def test_get_by_domain(self):
method test_format_upcoming_no_deadlines (line 211) | def test_format_upcoming_no_deadlines(self):
method test_format_upcoming_with_deadlines (line 216) | def test_format_upcoming_with_deadlines(self):
method test_load_builtin (line 226) | def test_load_builtin(self):
class TestSubmissionPlanner (line 237) | class TestSubmissionPlanner:
method test_plan_basic (line 238) | def test_plan_basic(self):
method test_plan_unknown_venue (line 247) | def test_plan_unknown_venue(self):
method test_plan_past_deadline (line 253) | def test_plan_past_deadline(self):
method test_format_plan (line 261) | def test_format_plan(self):
method test_format_plan_error (line 269) | def test_format_plan_error(self):
class TestReminderCalculator (line 281) | class TestReminderCalculator:
method test_check_fires_on_matching_day (line 282) | def test_check_fires_on_matching_day(self):
method test_check_no_fire_on_non_matching_day (line 290) | def test_check_no_fire_on_non_matching_day(self):
method test_check_skips_past_deadlines (line 297) | def test_check_skips_past_deadlines(self):
method test_urgency_critical (line 302) | def test_urgency_critical(self):
method test_urgency_warning (line 306) | def test_urgency_warning(self):
method test_urgency_info (line 310) | def test_urgency_info(self):
method test_get_active_reminders (line 313) | def test_get_active_reminders(self):
method test_format_reminders_empty (line 323) | def test_format_reminders_empty(self):
method test_format_reminders_with_data (line 327) | def test_format_reminders_with_data(self):
method test_reminder_frozen (line 340) | def test_reminder_frozen(self):
FILE: tests/test_cli.py
function test_install_opencode_uses_which_resolved_npm_path (line 10) | def test_install_opencode_uses_which_resolved_npm_path():
function test_install_opencode_returns_false_when_npm_missing (line 24) | def test_install_opencode_returns_false_when_npm_missing():
function test_is_opencode_installed_uses_which_resolved_path (line 29) | def test_is_opencode_installed_uses_which_resolved_path():
FILE: tests/test_code_agent.py
class FakeLLM (line 28) | class FakeLLM:
method __init__ (line 31) | def __init__(self, responses: list[str] | None = None):
method chat (line 36) | def chat(self, messages: list[dict], **kwargs: Any) -> LLMResponse:
class FakeSandboxResult (line 47) | class FakeSandboxResult:
class FakeSandbox (line 56) | class FakeSandbox:
method __init__ (line 59) | def __init__(self, results: list[FakeSandboxResult] | None = None):
method run_project (line 64) | def run_project(
function stage_dir (line 75) | def stage_dir(tmp_path: Path) -> Path:
function pm (line 82) | def pm() -> PromptManager:
class TestCodeAgentConfig (line 91) | class TestCodeAgentConfig:
method test_default_values (line 92) | def test_default_values(self) -> None:
method test_custom_values (line 100) | def test_custom_values(self) -> None:
class TestPhase1Architecture (line 118) | class TestPhase1Architecture:
method test_architecture_planning_produces_spec (line 119) | def test_architecture_planning_produces_spec(
method test_architecture_planning_disabled (line 146) | def test_architecture_planning_disabled(
class TestPhase2ExecFix (line 175) | class TestPhase2ExecFix:
method test_exec_fix_loop_fixes_crashing_code (line 176) | def test_exec_fix_loop_fixes_crashing_code(
method test_exec_fix_skipped_without_sandbox (line 212) | def test_exec_fix_skipped_without_sandbox(
method test_exec_fix_max_iterations_respected (line 232) | def test_exec_fix_max_iterations_respected(
class TestPhase3TreeSearch (line 266) | class TestPhase3TreeSearch:
method test_tree_search_generates_multiple_candidates (line 267) | def test_tree_search_generates_multiple_candidates(
method test_tree_search_fixes_crashing_candidates (line 301) | def test_tree_search_fixes_crashing_candidates(
class TestPhase4Review (line 347) | class TestPhase4Review:
method test_review_approves_on_first_round (line 348) | def test_review_approves_on_first_round(
method test_review_triggers_fix_on_critical_issues (line 369) | def test_review_triggers_fix_on_critical_issues(
method test_review_disabled (line 399) | def test_review_disabled(
class TestFullPipeline (line 427) | class TestFullPipeline:
method test_all_phases_end_to_end (line 428) | def test_all_phases_end_to_end(
method test_agent_writes_attempt_directories (line 463) | def test_agent_writes_attempt_directories(
class TestSolutionNodeScoring (line 493) | class TestSolutionNodeScoring:
method test_score_running_node (line 494) | def test_score_running_node(self) -> None:
method test_score_crashing_node (line 505) | def test_score_crashing_node(self) -> None:
method test_score_partial_output (line 515) | def test_score_partial_output(self) -> None:
class TestHelpers (line 532) | class TestHelpers:
method test_format_files (line 533) | def test_format_files(self) -> None:
method test_parse_json_direct (line 540) | def test_parse_json_direct(self) -> None:
method test_parse_json_fenced (line 544) | def test_parse_json_fenced(self) -> None:
method test_parse_json_embedded (line 549) | def test_parse_json_embedded(self) -> None:
method test_parse_json_invalid (line 555) | def test_parse_json_invalid(self) -> None:
method test_simple_result_defaults (line 559) | def test_simple_result_defaults(self) -> None:
class TestConfigIntegration (line 571) | class TestConfigIntegration:
method test_code_agent_config_in_experiment_config (line 572) | def test_code_agent_config_in_experiment_config(self) -> None:
method test_code_agent_config_from_dict (line 580) | def test_code_agent_config_from_dict(self, tmp_path: Path) -> None:
class TestPromptsIntegration (line 631) | class TestPromptsIntegration:
method test_architecture_planning_prompt_exists (line 632) | def test_architecture_planning_prompt_exists(self, pm: PromptManager) ...
method test_code_exec_fix_prompt_exists (line 643) | def test_code_exec_fix_prompt_exists(self, pm: PromptManager) -> None:
method test_code_reviewer_prompt_exists (line 654) | def test_code_reviewer_prompt_exists(self, pm: PromptManager) -> None:
FILE: tests/test_code_searcher.py
class TestQueryGeneration (line 37) | class TestQueryGeneration:
method test_heuristic_generates_queries (line 38) | def test_heuristic_generates_queries(self):
method test_heuristic_no_duplicates (line 51) | def test_heuristic_no_duplicates(self):
method test_extract_key_phrases (line 61) | def test_extract_key_phrases(self):
method test_generate_without_llm (line 67) | def test_generate_without_llm(self):
class TestPatternExtractor (line 83) | class TestPatternExtractor:
method test_heuristic_extract_imports (line 84) | def test_heuristic_extract_imports(self):
method test_heuristic_extract_functions (line 93) | def test_heuristic_extract_functions(self):
method test_empty_snippets (line 100) | def test_empty_snippets(self):
method test_code_patterns_to_prompt (line 104) | def test_code_patterns_to_prompt(self):
method test_code_patterns_has_content (line 115) | def test_code_patterns_has_content(self):
class TestSearchCache (line 128) | class TestSearchCache:
method test_put_and_get (line 129) | def test_put_and_get(self, tmp_path):
method test_cache_miss (line 138) | def test_cache_miss(self, tmp_path):
method test_cache_expiry (line 143) | def test_cache_expiry(self, tmp_path):
method test_clear_domain (line 158) | def test_clear_domain(self, tmp_path):
method test_clear_all (line 169) | def test_clear_all(self, tmp_path):
method test_stats (line 177) | def test_stats(self, tmp_path):
method test_topic_hash_deterministic (line 187) | def test_topic_hash_deterministic(self):
method test_topic_hash_case_insensitive (line 192) | def test_topic_hash_case_insensitive(self):
class TestGitHubClient (line 203) | class TestGitHubClient:
method test_has_token_false (line 204) | def test_has_token_false(self):
method test_has_token_true (line 210) | def test_has_token_true(self):
method test_headers_with_token (line 214) | def test_headers_with_token(self):
method test_headers_without_token (line 220) | def test_headers_without_token(self):
class TestDataClasses (line 231) | class TestDataClasses:
method test_repo_info_defaults (line 232) | def test_repo_info_defaults(self):
method test_code_snippet (line 237) | def test_code_snippet(self):
method test_repo_analysis (line 244) | def test_repo_analysis(self):
class TestCodeSearchResult (line 258) | class TestCodeSearchResult:
method test_empty_result (line 259) | def test_empty_result(self):
method test_result_with_patterns (line 264) | def test_result_with_patterns(self):
method test_cache_roundtrip (line 274) | def test_cache_roundtrip(self):
class TestCodeSearchAgent (line 299) | class TestCodeSearchAgent:
method _mock_github (line 300) | def _mock_github(self):
method test_search_uses_cache (line 324) | def test_search_uses_cache(self, tmp_path):
method test_search_with_mock_github (line 345) | def test_search_with_mock_github(self, tmp_path):
method test_search_graceful_failure (line 364) | def test_search_graceful_failure(self, tmp_path):
FILE: tests/test_collaboration.py
function repo (line 27) | def repo(tmp_path: Path) -> ResearchRepository:
function populated_repo (line 32) | def populated_repo(repo: ResearchRepository) -> ResearchRepository:
function run_dir (line 51) | def run_dir(tmp_path: Path) -> Path:
class TestResearchRepository (line 87) | class TestResearchRepository:
method test_publish (line 88) | def test_publish(self, repo: ResearchRepository) -> None:
method test_publish_creates_dirs (line 95) | def test_publish_creates_dirs(self, repo: ResearchRepository) -> None:
method test_publish_unknown_type_skipped (line 102) | def test_publish_unknown_type_skipped(self, repo: ResearchRepository) ...
method test_search_by_query (line 109) | def test_search_by_query(self, populated_repo: ResearchRepository) -> ...
method test_search_by_type (line 113) | def test_search_by_type(self, populated_repo: ResearchRepository) -> N...
method test_search_no_results (line 119) | def test_search_no_results(self, populated_repo: ResearchRepository) -...
method test_search_empty_repo (line 123) | def test_search_empty_repo(self, repo: ResearchRepository) -> None:
method test_list_runs (line 127) | def test_list_runs(self, populated_repo: ResearchRepository) -> None:
method test_list_runs_empty (line 132) | def test_list_runs_empty(self, repo: ResearchRepository) -> None:
method test_get_run_artifacts (line 136) | def test_get_run_artifacts(self, populated_repo: ResearchRepository) -...
method test_get_run_artifacts_missing (line 141) | def test_get_run_artifacts_missing(self, populated_repo: ResearchRepos...
method test_import_literature (line 145) | def test_import_literature(self, populated_repo: ResearchRepository) -...
method test_import_literature_missing_run (line 150) | def test_import_literature_missing_run(self, populated_repo: ResearchR...
method test_import_code_template (line 154) | def test_import_code_template(self, populated_repo: ResearchRepository...
method test_import_code_template_no_match (line 159) | def test_import_code_template_no_match(self, populated_repo: ResearchR...
class TestArtifactPublisher (line 167) | class TestArtifactPublisher:
method test_publish_from_run_dir (line 168) | def test_publish_from_run_dir(self, run_dir: Path, tmp_path: Path) -> ...
method test_publish_empty_dir (line 174) | def test_publish_empty_dir(self, tmp_path: Path) -> None:
method test_publish_nonexistent_dir (line 182) | def test_publish_nonexistent_dir(self, tmp_path: Path) -> None:
class TestArtifactSubscriber (line 192) | class TestArtifactSubscriber:
method test_find_relevant_literature (line 193) | def test_find_relevant_literature(self, populated_repo: ResearchReposi...
method test_find_similar_experiments (line 198) | def test_find_similar_experiments(self, populated_repo: ResearchReposi...
method test_find_code_templates (line 203) | def test_find_code_templates(self, populated_repo: ResearchRepository)...
method test_import_best_practices (line 208) | def test_import_best_practices(self, populated_repo: ResearchRepositor...
method test_import_best_practices_empty (line 213) | def test_import_best_practices_empty(self, repo: ResearchRepository) -...
class TestDedup (line 222) | class TestDedup:
method test_content_hash_deterministic (line 223) | def test_content_hash_deterministic(self) -> None:
method test_content_hash_different (line 228) | def test_content_hash_different(self) -> None:
method test_deduplicate_artifacts (line 233) | def test_deduplicate_artifacts(self) -> None:
method test_deduplicate_empty (line 242) | def test_deduplicate_empty(self) -> None:
FILE: tests/test_compiler.py
class TestIsFatalError (line 28) | class TestIsFatalError:
method test_unicode_char_not_set_up_is_nonfatal (line 31) | def test_unicode_char_not_set_up_is_nonfatal(self):
method test_unicode_char_various_codepoints_nonfatal (line 41) | def test_unicode_char_various_codepoints_nonfatal(self):
method test_undefined_control_sequence_is_fatal (line 47) | def test_undefined_control_sequence_is_fatal(self):
method test_missing_dollar_is_fatal (line 51) | def test_missing_dollar_is_fatal(self):
method test_overfull_hbox_is_nonfatal (line 55) | def test_overfull_hbox_is_nonfatal(self):
method test_float_lost_is_nonfatal (line 59) | def test_float_lost_is_nonfatal(self):
method test_unavailable_in_encoding_is_nonfatal (line 63) | def test_unavailable_in_encoding_is_nonfatal(self):
method test_emergency_stop_is_fatal (line 67) | def test_emergency_stop_is_fatal(self):
method test_non_bang_file_not_found_is_fatal (line 71) | def test_non_bang_file_not_found_is_fatal(self):
class TestSanitizeTexUnicode (line 80) | class TestSanitizeTexUnicode:
method test_replaces_narrow_no_break_space (line 83) | def test_replaces_narrow_no_break_space(self, tmp_path: Path):
method test_replaces_no_break_space (line 90) | def test_replaces_no_break_space(self, tmp_path: Path):
method test_removes_zero_width_space (line 97) | def test_removes_zero_width_space(self, tmp_path: Path):
method test_removes_bom (line 104) | def test_removes_bom(self, tmp_path: Path):
method test_preserves_normal_text (line 111) | def test_preserves_normal_text(self, tmp_path: Path):
method test_handles_multiple_types (line 119) | def test_handles_multiple_types(self, tmp_path: Path):
method test_nonexistent_file (line 130) | def test_nonexistent_file(self, tmp_path: Path):
method test_cyrillic_transliterated_to_latin (line 134) | def test_cyrillic_transliterated_to_latin(self, tmp_path: Path):
class TestSanitizeBibFile (line 152) | class TestSanitizeBibFile:
method test_cyrillic_author_transliterated (line 155) | def test_cyrillic_author_transliterated(self, tmp_path: Path):
class TestFixUnicodeErrors (line 178) | class TestFixUnicodeErrors:
method test_unicode_u202f_replaced_with_space (line 181) | def test_unicode_u202f_replaced_with_space(self):
method test_unicode_u200b_removed (line 192) | def test_unicode_u200b_removed(self):
method test_no_unicode_error_no_change (line 202) | def test_no_unicode_error_no_change(self):
class TestRunPdflatexByteMode (line 218) | class TestRunPdflatexByteMode:
method test_broken_utf8_in_stdout_does_not_crash (line 222) | def test_broken_utf8_in_stdout_does_not_crash(self, mock_run):
method test_valid_utf8_works (line 240) | def test_valid_utf8_works(self, mock_run):
class TestRunBibtex (line 261) | class TestRunBibtex:
method test_bibtex_failure_logged (line 266) | def test_bibtex_failure_logged(self, mock_run, mock_which, tmp_path):
method test_bibtex_success_with_bbl (line 281) | def test_bibtex_success_with_bbl(self, mock_run, mock_which, tmp_path):
method test_bibtex_not_found (line 298) | def test_bibtex_not_found(self, mock_which, tmp_path):
method test_bibtex_broken_utf8 (line 307) | def test_bibtex_broken_utf8(self, mock_run, mock_which, tmp_path):
FILE: tests/test_convergence_evaluator.py
class TestComputeConvergenceOrder (line 21) | class TestComputeConvergenceOrder:
method test_second_order (line 22) | def test_second_order(self):
method test_fourth_order (line 30) | def test_fourth_order(self):
method test_first_order (line 38) | def test_first_order(self):
method test_too_few_points (line 45) | def test_too_few_points(self):
method test_empty_input (line 50) | def test_empty_input(self):
method test_filters_invalid (line 54) | def test_filters_invalid(self):
class TestAnalyzeConvergence (line 67) | class TestAnalyzeConvergence:
method test_single_method (line 68) | def test_single_method(self):
method test_multiple_methods (line 82) | def test_multiple_methods(self):
method test_expected_orders (line 102) | def test_expected_orders(self):
method test_non_converging (line 114) | def test_non_converging(self):
method test_summary_string (line 126) | def test_summary_string(self):
method test_l2_error_key (line 137) | def test_l2_error_key(self):
method test_empty_data (line 149) | def test_empty_data(self):
FILE: tests/test_copilot.py
class TestResearchMode (line 34) | class TestResearchMode:
method test_all_modes (line 35) | def test_all_modes(self):
method test_from_value (line 40) | def test_from_value(self):
method test_invalid_mode_raises (line 45) | def test_invalid_mode_raises(self):
method test_mode_count (line 49) | def test_mode_count(self):
class TestFeedback (line 58) | class TestFeedback:
method test_feedback_actions_defined (line 59) | def test_feedback_actions_defined(self):
method test_feedback_frozen (line 63) | def test_feedback_frozen(self):
method test_feedback_defaults (line 68) | def test_feedback_defaults(self):
method test_feedback_with_modifications (line 75) | def test_feedback_with_modifications(self):
class TestFeedbackHandler (line 90) | class TestFeedbackHandler:
method test_write_feedback_request (line 91) | def test_write_feedback_request(self, tmp_path: Path):
method test_read_feedback_response_valid (line 105) | def test_read_feedback_response_valid(self, tmp_path: Path):
method test_read_feedback_response_invalid_action (line 120) | def test_read_feedback_response_invalid_action(self, tmp_path: Path):
method test_read_feedback_response_missing (line 128) | def test_read_feedback_response_missing(self, tmp_path: Path):
method test_read_feedback_response_malformed (line 132) | def test_read_feedback_response_malformed(self, tmp_path: Path):
method test_read_feedback_response_with_rollback (line 138) | def test_read_feedback_response_with_rollback(self, tmp_path: Path):
method test_read_feedback_response_branch (line 152) | def test_read_feedback_response_branch(self, tmp_path: Path):
method test_clear_request (line 165) | def test_clear_request(self, tmp_path: Path):
method test_clear_request_no_file (line 171) | def test_clear_request_no_file(self, tmp_path: Path):
method test_wait_for_feedback_timeout (line 175) | def test_wait_for_feedback_timeout(self, tmp_path: Path):
method test_wait_for_feedback_finds_response (line 180) | def test_wait_for_feedback_finds_response(self, tmp_path: Path):
class TestBranchManager (line 206) | class TestBranchManager:
method test_create_branch (line 207) | def test_create_branch(self, tmp_path: Path):
method test_create_branch_max_reached (line 228) | def test_create_branch_max_reached(self, tmp_path: Path):
method test_create_branch_duplicate_name (line 234) | def test_create_branch_duplicate_name(self, tmp_path: Path):
method test_list_branches_empty (line 240) | def test_list_branches_empty(self, tmp_path: Path):
method test_list_branches (line 244) | def test_list_branches(self, tmp_path: Path):
method test_switch_branch (line 253) | def test_switch_branch(self, tmp_path: Path):
method test_switch_branch_nonexistent (line 259) | def test_switch_branch_nonexistent(self, tmp_path: Path):
method test_delete_branch (line 264) | def test_delete_branch(self, tmp_path: Path):
method test_delete_branch_nonexistent (line 271) | def test_delete_branch_nonexistent(self, tmp_path: Path):
method test_compare_branches (line 276) | def test_compare_branches(self, tmp_path: Path):
method test_compare_branches_nonexistent (line 287) | def test_compare_branches_nonexistent(self, tmp_path: Path):
method test_count_stages (line 293) | def test_count_stages(self, tmp_path: Path):
class TestCoPilotController (line 305) | class TestCoPilotController:
method _make_config (line 306) | def _make_config(self, **overrides) -> CoPilotConfig:
method test_should_pause_zero_touch (line 318) | def test_should_pause_zero_touch(self, tmp_path: Path):
method test_should_pause_auto_pilot_gate (line 324) | def test_should_pause_auto_pilot_gate(self, tmp_path: Path):
method test_should_pause_auto_pilot_gates_disabled (line 330) | def test_should_pause_auto_pilot_gates_disabled(self, tmp_path: Path):
method test_should_pause_copilot_every_stage (line 335) | def test_should_pause_copilot_every_stage(self, tmp_path: Path):
method test_should_pause_copilot_gates_only (line 341) | def test_should_pause_copilot_gates_only(self, tmp_path: Path):
method test_present_stage_result (line 347) | def test_present_stage_result(self, tmp_path: Path):
method test_present_stage_result_with_error (line 360) | def test_present_stage_result_with_error(self, tmp_path: Path):
method test_handle_feedback_approve (line 372) | def test_handle_feedback_approve(self, tmp_path: Path):
method test_handle_feedback_modify (line 379) | def test_handle_feedback_modify(self, tmp_path: Path):
method test_handle_feedback_retry (line 392) | def test_handle_feedback_retry(self, tmp_path: Path):
method test_handle_feedback_skip (line 399) | def test_handle_feedback_skip(self, tmp_path: Path):
method test_handle_feedback_branch (line 406) | def test_handle_feedback_branch(self, tmp_path: Path):
method test_handle_feedback_branch_disabled (line 414) | def test_handle_feedback_branch_disabled(self, tmp_path: Path):
method test_handle_feedback_branch_max_reached (line 421) | def test_handle_feedback_branch_max_reached(self, tmp_path: Path):
method test_handle_feedback_rollback (line 432) | def test_handle_feedback_rollback(self, tmp_path: Path):
method test_handle_feedback_unknown_action (line 440) | def test_handle_feedback_unknown_action(self, tmp_path: Path):
method test_from_config_zero_touch_returns_none (line 448) | def test_from_config_zero_touch_returns_none(self, tmp_path: Path):
method test_from_config_copilot_returns_controller (line 453) | def test_from_config_copilot_returns_controller(self, tmp_path: Path):
method test_from_config_auto_pilot_returns_controller (line 459) | def test_from_config_auto_pilot_returns_controller(self, tmp_path: Path):
method test_handle_feedback_branch_default_name (line 464) | def test_handle_feedback_branch_default_name(self, tmp_path: Path):
FILE: tests/test_decision_agent.py
class _FakeLLMResponse (line 34) | class _FakeLLMResponse:
method __post_init__ (line 44) | def __post_init__(self):
class _FakeLLM (line 49) | class _FakeLLM:
method __init__ (line 52) | def __init__(self, response: str = "{}"):
method chat (line 56) | def chat(self, messages, *, system=None, max_tokens=None,
class TestParseDecisions (line 70) | class TestParseDecisions:
method _agent (line 73) | def _agent(self):
method test_valid_json_array (line 77) | def test_valid_json_array(self):
method test_json_inside_markdown_fences (line 100) | def test_json_inside_markdown_fences(self):
method test_json_with_surrounding_text (line 107) | def test_json_with_surrounding_text(self):
method test_no_json_array_raises (line 113) | def test_no_json_array_raises(self):
method test_empty_array (line 118) | def test_empty_array(self):
method test_non_dict_items_skipped (line 123) | def test_non_dict_items_skipped(self):
method test_invalid_backend_auto_inferred (line 134) | def test_invalid_backend_auto_inferred(self):
method test_missing_fields_get_defaults (line 143) | def test_missing_fields_get_defaults(self):
class TestHeuristicDecide (line 158) | class TestHeuristicDecide:
method _agent (line 161) | def _agent(self, min_figures=3, max_figures=10):
method test_with_experiments (line 167) | def test_with_experiments(self):
method test_without_experiments (line 180) | def test_without_experiments(self):
method test_ablation_trigger (line 191) | def test_ablation_trigger(self):
method test_max_figures_respected (line 202) | def test_max_figures_respected(self):
class TestInferBackend (line 216) | class TestInferBackend:
method test_code_types (line 217) | def test_code_types(self):
method test_image_types (line 226) | def test_image_types(self):
method test_unknown_defaults_to_image (line 235) | def test_unknown_defaults_to_image(self):
class TestEnforceBounds (line 244) | class TestEnforceBounds:
method _agent (line 245) | def _agent(self, min_figures=3, max_figures=6):
method test_min_padding (line 251) | def test_min_padding(self):
method test_max_truncation (line 261) | def test_max_truncation(self):
method test_ensures_image_figure (line 272) | def test_ensures_image_figure(self):
method test_ensures_code_figure_with_experiments (line 282) | def test_ensures_code_figure_with_experiments(self):
class TestBuildPrompt (line 297) | class TestBuildPrompt:
method _agent (line 298) | def _agent(self):
method test_prompt_contains_description (line 304) | def test_prompt_contains_description(self):
method test_prompt_contains_style (line 316) | def test_prompt_contains_style(self):
method test_prompt_varies_by_type (line 327) | def test_prompt_varies_by_type(self):
class TestGetTypeGuidelines (line 345) | class TestGetTypeGuidelines:
method test_known_types (line 346) | def test_known_types(self):
method test_unknown_type_falls_back (line 357) | def test_unknown_type_falls_back(self):
class TestNanoBananaNoKey (line 368) | class TestNanoBananaNoKey:
method test_execute_without_key_fails (line 369) | def test_execute_without_key_fails(self, tmp_path):
method test_execute_empty_figures_succeeds (line 387) | def test_execute_empty_figures_succeeds(self, tmp_path):
class TestDockerRenderer (line 406) | class TestDockerRenderer:
method _agent (line 407) | def _agent(self):
method test_docker_command_construction (line 416) | def test_docker_command_construction(self, tmp_path):
method test_docker_timeout_kills_container (line 447) | def test_docker_timeout_kills_container(self, tmp_path):
method test_docker_not_found (line 467) | def test_docker_not_found(self, tmp_path):
method test_docker_script_failure (line 485) | def test_docker_script_failure(self, tmp_path):
class TestStripThinkingTags (line 512) | class TestStripThinkingTags:
method test_closed_tags_removed (line 513) | def test_closed_tags_removed(self):
method test_no_tags (line 518) | def test_no_tags(self):
method test_empty_string (line 523) | def test_empty_string(self):
method test_nested_code_preserved (line 527) | def test_nested_code_preserved(self):
method test_unclosed_tag_behavior (line 534) | def test_unclosed_tag_behavior(self):
class TestDecisionAgentExecute (line 548) | class TestDecisionAgentExecute:
method test_llm_decision (line 549) | def test_llm_decision(self):
method test_fallback_on_bad_llm (line 572) | def test_fallback_on_bad_llm(self):
method test_fallback_on_no_llm (line 587) | def test_fallback_on_no_llm(self):
class TestRendererCwd (line 604) | class TestRendererCwd:
method test_local_cwd_is_output_dir (line 607) | def test_local_cwd_is_output_dir(self, tmp_path):
class TestChatStripThinking (line 631) | class TestChatStripThinking:
method test_strip_thinking_false_by_default (line 634) | def test_strip_thinking_false_by_default(self):
method test_strip_thinking_true_removes_tags (line 671) | def test_strip_thinking_true_removes_tags(self):
class TestLatexDisplayMath (line 714) | class TestLatexDisplayMath:
method test_dollar_dollar_to_equation (line 717) | def test_dollar_dollar_to_equation(self):
method test_multiline_dollar_dollar (line 735) | def test_multiline_dollar_dollar(self):
method test_inline_dollar_dollar_not_escaped (line 748) | def test_inline_dollar_dollar_not_escaped(self):
class TestLatexFigurePlacement (line 762) | class TestLatexFigurePlacement:
method test_figure_uses_top_placement (line 765) | def test_figure_uses_top_placement(self):
method test_figure_has_centering (line 772) | def test_figure_has_centering(self):
class TestChatWithPromptStripThinking (line 787) | class TestChatWithPromptStripThinking:
method test_default_strips_thinking (line 790) | def test_default_strips_thinking(self):
method test_can_disable_stripping (line 806) | def test_can_disable_stripping(self):
FILE: tests/test_domain_detector.py
class TestProfileLoading (line 28) | class TestProfileLoading:
method setup_method (line 29) | def setup_method(self):
method test_load_all_profiles_returns_dict (line 32) | def test_load_all_profiles_returns_dict(self):
method test_profiles_have_required_fields (line 37) | def test_profiles_have_required_fields(self):
method test_get_profile_existing (line 45) | def test_get_profile_existing(self):
method test_get_profile_nonexistent (line 52) | def test_get_profile_nonexistent(self):
method test_get_generic_profile (line 56) | def test_get_generic_profile(self):
method test_ml_profiles_exist (line 61) | def test_ml_profiles_exist(self):
method test_physics_profiles_exist (line 66) | def test_physics_profiles_exist(self):
method test_other_domain_profiles_exist (line 71) | def test_other_domain_profiles_exist(self):
method test_physics_profile_paradigm (line 84) | def test_physics_profile_paradigm(self):
method test_economics_profile_paradigm (line 90) | def test_economics_profile_paradigm(self):
class TestKeywordDetection (line 102) | class TestKeywordDetection:
method test_ml_vision_keywords (line 103) | def test_ml_vision_keywords(self):
method test_ml_nlp_keywords (line 107) | def test_ml_nlp_keywords(self):
method test_ml_rl_keywords (line 111) | def test_ml_rl_keywords(self):
method test_physics_keywords (line 115) | def test_physics_keywords(self):
method test_chemistry_keywords (line 119) | def test_chemistry_keywords(self):
method test_biology_keywords (line 123) | def test_biology_keywords(self):
method test_economics_keywords (line 126) | def test_economics_keywords(self):
method test_math_keywords (line 130) | def test_math_keywords(self):
method test_security_keywords (line 134) | def test_security_keywords(self):
method test_robotics_keywords (line 137) | def test_robotics_keywords(self):
method test_generic_ml_fallback (line 140) | def test_generic_ml_fallback(self):
method test_unknown_topic (line 144) | def test_unknown_topic(self):
method test_case_insensitive (line 147) | def test_case_insensitive(self):
class TestDetectDomain (line 157) | class TestDetectDomain:
method test_detect_ml_vision (line 158) | def test_detect_ml_vision(self):
method test_detect_physics (line 163) | def test_detect_physics(self):
method test_detect_with_hypotheses (line 168) | def test_detect_with_hypotheses(self):
method test_detect_generic_fallback (line 175) | def test_detect_generic_fallback(self):
method test_detect_domain_id_shortcut (line 179) | def test_detect_domain_id_shortcut(self):
class TestIsMLDomain (line 192) | class TestIsMLDomain:
method test_ml_domains (line 193) | def test_ml_domains(self):
method test_non_ml_domains (line 199) | def test_non_ml_domains(self):
method test_generic_not_ml (line 205) | def test_generic_not_ml(self):
class TestDomainProfile (line 215) | class TestDomainProfile:
method test_default_values (line 216) | def test_default_values(self):
method test_custom_values (line 223) | def test_custom_values(self):
class TestEnums (line 241) | class TestEnums:
method test_experiment_paradigm_values (line 242) | def test_experiment_paradigm_values(self):
method test_metric_type_values (line 248) | def test_metric_type_values(self):
class TestDetectionAccuracy (line 259) | class TestDetectionAccuracy:
method test_keyword_detection_accuracy (line 309) | def test_keyword_detection_accuracy(self):
method test_full_detection_accuracy (line 325) | def test_full_detection_accuracy(self):
FILE: tests/test_entry_point_validation.py
class TestValidateEntryPoint (line 18) | class TestValidateEntryPoint:
method test_valid_entry_point (line 21) | def test_valid_entry_point(self) -> None:
method test_valid_nested_entry_point (line 24) | def test_valid_nested_entry_point(self) -> None:
method test_valid_dot_slash_prefix (line 27) | def test_valid_dot_slash_prefix(self) -> None:
method test_valid_dot_in_middle (line 30) | def test_valid_dot_in_middle(self) -> None:
method test_valid_deeply_nested (line 33) | def test_valid_deeply_nested(self) -> None:
method test_rejects_absolute_path (line 36) | def test_rejects_absolute_path(self) -> None:
method test_rejects_path_traversal (line 41) | def test_rejects_path_traversal(self) -> None:
method test_rejects_dotdot_in_middle (line 46) | def test_rejects_dotdot_in_middle(self) -> None:
method test_rejects_empty_string (line 51) | def test_rejects_empty_string(self) -> None:
method test_rejects_whitespace_only (line 56) | def test_rejects_whitespace_only(self) -> None:
class TestValidateEntryPointResolved (line 65) | class TestValidateEntryPointResolved:
method test_valid_path_passes (line 68) | def test_valid_path_passes(self, tmp_path: Path) -> None:
method test_symlink_escape_rejected (line 72) | def test_symlink_escape_rejected(self, tmp_path: Path) -> None:
method test_nested_valid_path_passes (line 86) | def test_nested_valid_path_passes(self, tmp_path: Path) -> None:
class TestExperimentSandboxEntryPointValidation (line 96) | class TestExperimentSandboxEntryPointValidation:
method _make_sandbox (line 99) | def _make_sandbox(self, tmp_path: Path) -> ExperimentSandbox:
method test_rejects_path_traversal (line 105) | def test_rejects_path_traversal(self, tmp_path: Path) -> None:
method test_rejects_absolute_path (line 123) | def test_rejects_absolute_path(self, tmp_path: Path) -> None:
FILE: tests/test_experiment_diagnosis.py
class TestMissingDependency (line 27) | class TestMissingDependency:
method test_detects_module_not_found (line 28) | def test_detects_module_not_found(self):
method test_detects_box2d (line 37) | def test_detects_box2d(self):
class TestPermissionError (line 47) | class TestPermissionError:
method test_detects_hf_permission (line 48) | def test_detects_hf_permission(self):
class TestTimeGuard (line 58) | class TestTimeGuard:
method test_detects_dominant_time_guard (line 59) | def test_detects_dominant_time_guard(self):
method test_no_time_guard_if_most_complete (line 73) | def test_no_time_guard_if_most_complete(self):
class TestSyntheticData (line 89) | class TestSyntheticData:
method test_detects_synthetic_fallback (line 90) | def test_detects_synthetic_fallback(self):
class TestGPUOOM (line 99) | class TestGPUOOM:
method test_detects_oom (line 100) | def test_detects_oom(self):
class TestIdenticalConditions (line 109) | class TestIdenticalConditions:
method test_detects_from_ablation_warnings (line 110) | def test_detects_from_ablation_warnings(self):
class TestCodeCrash (line 123) | class TestCodeCrash:
method test_detects_traceback (line 124) | def test_detects_traceback(self):
class TestQualityAssessment (line 143) | class TestQualityAssessment:
method test_full_paper_mode (line 144) | def test_full_paper_mode(self):
method test_preliminary_study_mode (line 163) | def test_preliminary_study_mode(self):
method test_technical_report_no_conditions (line 175) | def test_technical_report_no_conditions(self):
method test_technical_report_synthetic_data (line 184) | def test_technical_report_synthetic_data(self):
class TestRepairPrompt (line 198) | class TestRepairPrompt:
method test_generates_prompt (line 199) | def test_generates_prompt(self):
method test_serialization (line 209) | def test_serialization(self):
class TestRealArtifacts (line 224) | class TestRealArtifacts:
method _load (line 225) | def _load(self, run_id: str) -> tuple[dict, dict | None]:
method test_run_e57360_diagnosis (line 239) | def test_run_e57360_diagnosis(self):
method test_run_8b4a1b_diagnosis (line 246) | def test_run_8b4a1b_diagnosis(self):
class TestDatasetNotFoundError (line 253) | class TestDatasetNotFoundError:
method test_detects_hf_dataset_not_found (line 256) | def test_detects_hf_dataset_not_found(self):
method test_suggested_fix_mentions_precached (line 274) | def test_suggested_fix_mentions_precached(self):
class TestNearRandomAccuracy (line 287) | class TestNearRandomAccuracy:
method test_detects_near_random_cifar10 (line 290) | def test_detects_near_random_cifar10(self):
method test_normal_accuracy_not_flagged (line 302) | def test_normal_accuracy_not_flagged(self):
method test_zero_accuracy_not_flagged (line 314) | def test_zero_accuracy_not_flagged(self):
class TestRealArtifactsContinued (line 327) | class TestRealArtifactsContinued(TestRealArtifacts):
method test_run_acbdfa_diagnosis (line 330) | def test_run_acbdfa_diagnosis(self):
FILE: tests/test_experiment_repair.py
class TestBuildRepairPrompt (line 36) | class TestBuildRepairPrompt:
method test_basic_prompt (line 37) | def test_basic_prompt(self):
method test_scope_reduction_included (line 63) | def test_scope_reduction_included(self):
method test_dep_fix_section (line 83) | def test_dep_fix_section(self):
method test_long_code_truncated (line 98) | def test_long_code_truncated(self):
method test_output_format_section (line 104) | def test_output_format_section(self):
class TestRepairResult (line 116) | class TestRepairResult:
method test_serialization (line 117) | def test_serialization(self):
method test_serialization_with_cycles (line 128) | def test_serialization_with_cycles(self):
class TestExtractCodeBlocks (line 155) | class TestExtractCodeBlocks:
method test_named_blocks (line 156) | def test_named_blocks(self):
method test_unnamed_block_fallback (line 175) | def test_unnamed_block_fallback(self):
method test_no_blocks (line 185) | def test_no_blocks(self):
method test_path_normalization (line 190) | def test_path_normalization(self):
class TestBuildExperimentSummary (line 204) | class TestBuildExperimentSummary:
method test_basic_summary (line 205) | def test_basic_summary(self):
method test_failed_run (line 225) | def test_failed_run(self):
method test_multi_seed_grouping (line 238) | def test_multi_seed_grouping(self):
class TestLoadExperimentCode (line 265) | class TestLoadExperimentCode:
method test_loads_from_stage_13 (line 266) | def test_loads_from_stage_13(self, tmp_path):
method test_loads_from_stage_10 (line 276) | def test_loads_from_stage_10(self, tmp_path):
method test_empty_when_no_code (line 284) | def test_empty_when_no_code(self, tmp_path):
class TestLoadExperimentSummary (line 289) | class TestLoadExperimentSummary:
method test_loads_summary (line 290) | def test_loads_summary(self, tmp_path):
class TestSelectBestResults (line 306) | class TestSelectBestResults:
method test_picks_best_across_cycles (line 307) | def test_picks_best_across_cycles(self, tmp_path):
method test_returns_none_when_empty (line 328) | def test_returns_none_when_empty(self, tmp_path):
class TestRunRepairLoop (line 338) | class TestRunRepairLoop:
method _make_run_dir (line 339) | def _make_run_dir(self, tmp_path, n_conditions=1, has_code=True):
method test_skips_when_already_sufficient (line 368) | def test_skips_when_already_sufficient(self, tmp_path):
method test_returns_failure_when_no_code (line 405) |
Condensed preview — 422 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (4,540K chars).
[
{
"path": ".gitignore",
"chars": 1517,
"preview": "HANDOFF_METACLAW_SKILL_LOOP.md\n.venv/\n__pycache__/\n*.pyc\n*.egg-info/\ndist/\nbuild/\nworkspaces/\n.claude/*\n!.claude/agents/"
},
{
"path": "CONTRIBUTING.md",
"chars": 800,
"preview": "# Contributing to AutoResearchClaw\n\n## Setup\n\n1. Fork and clone the repo\n2. Create a venv and install with dev extras:\n "
},
{
"path": "LICENSE",
"chars": 1067,
"preview": "MIT License\n\nCopyright (c) 2026 Aiming Lab\n\nPermission is hereby granted, free of charge, to any person obtaining a copy"
},
{
"path": "README.md",
"chars": 29584,
"preview": "<p align=\"center\">\n <img src=\"image/logo.png\" width=\"700\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\"><b>Chat"
},
{
"path": "config.researchclaw.example.yaml",
"chars": 7497,
"preview": "project:\n name: \"my-research\"\n mode: \"full-auto\"\n\nresearch:\n topic: \"Your research topic here\"\n domains:\n - \"mach"
},
{
"path": "docs/BUG_FIX_DOCUMENT_20260316.md",
"chars": 13171,
"preview": "# Bug Fix Document — AutoResearchClaw Pipeline\n\n> 生成日期:2026-03-16\n> 反馈来源:2 位测试者(user1: CV 方向 / GPU 环境, user2: Windows 环境"
},
{
"path": "docs/BUG_TRACKER.md",
"chars": 5112,
"preview": "# Bug Tracker & TODO\n\n> 实验运行期间发现的 bug 和待修复事项。实验结束后统一修复。\n\n## 已发现的 Bug\n\n### BUG-01: Stage 2 合约缺少 queries.json 输出 (已修复)\n- *"
},
{
"path": "docs/CHANGELOG_ANTHROPIC_ADAPTER.md",
"chars": 8935,
"preview": "# Anthropic Messages API Adapter — 改动说明\n\n> 本文档详细描述了为 ResearchClaw LLM 模块引入 Anthropic Messages API 原生支持的改动内容,\n> 并通过架构图说明本"
},
{
"path": "docs/PIPELINE_TEST_LOG_R5.md",
"chars": 14018,
"preview": "# Pipeline Test Log — Round 5 (main branch)\n\n> **分支**: `main` @ `e95527f`\n> **日期**: 2026-03-18\n> **目的**: 全面测试 main 分支 Pi"
},
{
"path": "docs/README_AR.md",
"chars": 25199,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"700\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\"><b>ش"
},
{
"path": "docs/README_CN.md",
"chars": 21891,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"700\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\"><b>聊"
},
{
"path": "docs/README_DE.md",
"chars": 28023,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"700\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\"><b>I"
},
{
"path": "docs/README_ES.md",
"chars": 28642,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"700\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\"><b>C"
},
{
"path": "docs/README_FR.md",
"chars": 28878,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"700\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\"><b>D"
},
{
"path": "docs/README_JA.md",
"chars": 20930,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"700\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\"><b>ア"
},
{
"path": "docs/README_KO.md",
"chars": 21211,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"700\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\"><b>아"
},
{
"path": "docs/README_PT.md",
"chars": 28082,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"700\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\"><b>C"
},
{
"path": "docs/README_RU.md",
"chars": 27598,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"700\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\"><b>Н"
},
{
"path": "docs/TESTER_GUIDE.md",
"chars": 18228,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"500\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\">🧪 Co"
},
{
"path": "docs/TESTER_GUIDE_CN.md",
"chars": 11304,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"500\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\">🧪 社区"
},
{
"path": "docs/TESTER_GUIDE_JA.md",
"chars": 12493,
"preview": "<p align=\"center\">\n <img src=\"../image/logo.png\" width=\"500\" alt=\"AutoResearchClaw Logo\">\n</p>\n\n<h2 align=\"center\">🧪 コミ"
},
{
"path": "docs/agent_figure_and_benchmark_plan.md",
"chars": 19290,
"preview": "# Multi-Agent Figure Generation & Benchmark Selection — Task Requirements\n\n> **Created**: 2026-03-15\n> **Updated**: 2026"
},
{
"path": "docs/figure_prompts/case_a_meta_learning.md",
"chars": 3528,
"preview": "# Case A: Continual Meta-Learning — Image Generation Prompt\n\n## Prompt\n\nA premium, modern data visualization infographic"
},
{
"path": "docs/figure_prompts/case_b_rlhf_alignment.md",
"chars": 3621,
"preview": "# Case B: RLHF with Curriculum Reward Shaping — Image Generation Prompt\n\n## Prompt\n\nA premium, modern data visualization"
},
{
"path": "docs/integration-guide.md",
"chars": 35526,
"preview": "# AutoResearchClaw Integration Guide\n\n> **The simplest way to use AutoResearchClaw**: give the repo URL to [OpenClaw](ht"
},
{
"path": "docs/issue_tracker_v9.md",
"chars": 26374,
"preview": "# AutoResearchClaw — Issue Tracker v9\n\n> Created: 2026-03-15\n> Status: **Active** — tracking all known issues from Phase"
},
{
"path": "docs/iteration_plan_v8.md",
"chars": 13064,
"preview": "# AutoResearchClaw Pipeline — 持续迭代改进方案 V8\n\n> 创建日期: 2026-03-15\n> 基于: V7 质量修复 (P1-P14) + Run 1-7 测试反馈\n> 目标: 将 pipeline 从 3"
},
{
"path": "docs/iteration_showcase_narrative.md",
"chars": 7496,
"preview": "# AutoResearchClaw: Self-Iterating Experiment Optimization — Showcase\n\n> Figure: `docs/figures/iteration_improvement_sho"
},
{
"path": "docs/metaclaw-integration-plan.md",
"chars": 21440,
"preview": "# MetaClaw × AutoResearchClaw 集成方案\n\n> **Status**: ✅ **Implemented & Merged to main** (v0.3.0, 2026-03-16)\n>\n> **目标**: 将 "
},
{
"path": "docs/next_phase_showcase_plan.md",
"chars": 10348,
"preview": "# AutoResearchClaw — Phase 5: Showcase Website & Sample Papers\n\n> Created: 2026-03-15\n> Status: **Website Built** — stat"
},
{
"path": "docs/pipeline_critical_fixes_v8.md",
"chars": 4842,
"preview": "# Pipeline Critical Fixes V8 — 投稿级论文质量修复\n\n## 目标\n修复所有阻止 Pipeline 产出符合 AI 顶会投稿标准论文的问题。\n\n---\n\n## Tier 1: 阻断性问题(必须立即修复)\n\n###"
},
{
"path": "docs/rate_limit_fix_plan.md",
"chars": 10083,
"preview": "# arXiv / 文献检索限流问题 — 调研报告与修复方案\n\n> Created: 2026-03-15\n> Status: **DONE** — All 7 tasks completed, 1117/1117 tests passin"
},
{
"path": "docs/sandbox_environment_fix_plan.md",
"chars": 23542,
"preview": "# AutoResearchClaw — Docker Sandbox Environment Enhancement Plan\n\n> Created: 2026-03-15\n> Status: **DONE** — All 10 issu"
},
{
"path": "docs/showcase/SHOWCASE.md",
"chars": 22884,
"preview": "<h1 align=\"center\">🏆 Generated Paper Showcase</h1>\n\n<p align=\"center\">\n <i>From a one-line idea to a conference-ready p"
},
{
"path": "prompts.default.yaml",
"chars": 21975,
"preview": "# =============================================================================\n# AutoResearchClaw — Default Prompt Temp"
},
{
"path": "pyproject.toml",
"chars": 1015,
"preview": "[project]\nname = \"researchclaw\"\nversion = \"0.3.1\"\ndescription = \"ResearchClaw — Autonomous Research Pipeline. Turn any r"
},
{
"path": "researchclaw/__init__.py",
"chars": 74,
"preview": "\"\"\"ResearchClaw — Autonomous Research Pipeline.\"\"\"\n\n__version__ = \"0.3.1\"\n"
},
{
"path": "researchclaw/__main__.py",
"chars": 113,
"preview": "\"\"\"Allow running as `python -m researchclaw`.\"\"\"\n\nimport sys\nfrom researchclaw.cli import main\n\nsys.exit(main())\n"
},
{
"path": "researchclaw/adapters.py",
"chars": 4072,
"preview": "\"\"\"Typed adapter interfaces and deterministic recording stubs.\"\"\"\n\nfrom __future__ import annotations\n\nfrom dataclasses "
},
{
"path": "researchclaw/agents/__init__.py",
"chars": 60,
"preview": "\"\"\"Multi-agent subsystems for AutoResearchClaw pipeline.\"\"\"\n"
},
{
"path": "researchclaw/agents/base.py",
"chars": 6685,
"preview": "\"\"\"Base classes for multi-agent subsystems.\n\nProvides ``BaseAgent`` (individual agent) and ``AgentOrchestrator``\n(coordi"
},
{
"path": "researchclaw/agents/benchmark_agent/__init__.py",
"chars": 947,
"preview": "\"\"\"BenchmarkAgent — multi-agent benchmark, dataset, and baseline selection.\n\nArchitecture\n------------\n1. **Surveyor** —"
},
{
"path": "researchclaw/agents/benchmark_agent/acquirer.py",
"chars": 11264,
"preview": "\"\"\"Acquirer Agent — generates data loading code and download scripts.\n\nProduces three outputs consumed by the code gener"
},
{
"path": "researchclaw/agents/benchmark_agent/orchestrator.py",
"chars": 12317,
"preview": "\"\"\"BenchmarkAgent Orchestrator — coordinates the four sub-agents.\n\nFlow: Surveyor → Selector → Acquirer → Validator (→ r"
},
{
"path": "researchclaw/agents/benchmark_agent/selector.py",
"chars": 14054,
"preview": "\"\"\"Selector Agent — filters and ranks benchmark candidates.\n\nApplies hardware constraints, time budget, network policy, "
},
{
"path": "researchclaw/agents/benchmark_agent/surveyor.py",
"chars": 13357,
"preview": "\"\"\"Surveyor Agent — searches for domain-relevant benchmarks and baselines.\n\nData sources (in priority order):\n1. Local `"
},
{
"path": "researchclaw/agents/benchmark_agent/validator.py",
"chars": 7471,
"preview": "\"\"\"Validator Agent — validates generated code for correctness.\n\nPerforms three levels of validation:\n1. **Syntax check**"
},
{
"path": "researchclaw/agents/code_searcher/__init__.py",
"chars": 422,
"preview": "\"\"\"Code Searcher agent — searches GitHub for reference code before generation.\n\nThis agent searches GitHub repositories "
},
{
"path": "researchclaw/agents/code_searcher/agent.py",
"chars": 9935,
"preview": "\"\"\"Code Search Agent — orchestrates GitHub search, pattern extraction, and caching.\n\nThis is the main entry point for co"
},
{
"path": "researchclaw/agents/code_searcher/cache.py",
"chars": 4021,
"preview": "\"\"\"Disk-based cache for code search results.\n\nCaches search results by domain + topic hash with a configurable TTL\n(defa"
},
{
"path": "researchclaw/agents/code_searcher/github_client.py",
"chars": 9471,
"preview": "\"\"\"GitHub REST API client for code and repository search.\n\nHandles rate limiting, authentication, and response parsing f"
},
{
"path": "researchclaw/agents/code_searcher/pattern_extractor.py",
"chars": 6397,
"preview": "\"\"\"Extract reusable code patterns from GitHub search results.\n\nUses LLM to analyze reference code and extract:\n - API c"
},
{
"path": "researchclaw/agents/code_searcher/query_gen.py",
"chars": 5120,
"preview": "\"\"\"LLM-based search query generation for code search.\n\nGiven a research topic and domain, generates targeted search quer"
},
{
"path": "researchclaw/agents/figure_agent/__init__.py",
"chars": 951,
"preview": "\"\"\"FigureAgent — multi-agent intelligent chart generation system.\n\nArchitecture\n------------\n1. **Planner** — analyzes e"
},
{
"path": "researchclaw/agents/figure_agent/codegen.py",
"chars": 35339,
"preview": "\"\"\"CodeGen Agent — generates visualization code for each figure.\n\nTakes the Planner's figure specifications and experime"
},
{
"path": "researchclaw/agents/figure_agent/critic.py",
"chars": 15746,
"preview": "\"\"\"Critic Agent — tri-modal review of rendered charts.\n\nReviews each chart on three dimensions (inspired by PlotGen):\n1."
},
{
"path": "researchclaw/agents/figure_agent/decision.py",
"chars": 15325,
"preview": "\"\"\"Decision Agent — decides what figures are needed and how to generate them.\n\nAnalyzes the paper draft/outline and expe"
},
{
"path": "researchclaw/agents/figure_agent/integrator.py",
"chars": 7817,
"preview": "\"\"\"Integrator Agent — determines figure placement in the paper.\n\nMaps each rendered figure to the correct paper section,"
},
{
"path": "researchclaw/agents/figure_agent/nano_banana.py",
"chars": 15524,
"preview": "\"\"\"Nano Banana Agent — generates conceptual/architectural images via Gemini.\n\nUses Google's Gemini native image generati"
},
{
"path": "researchclaw/agents/figure_agent/orchestrator.py",
"chars": 18046,
"preview": "\"\"\"FigureAgent Orchestrator — coordinates the figure generation sub-agents.\n\nFlow:\n Decision Agent → analyzes paper → d"
},
{
"path": "researchclaw/agents/figure_agent/planner.py",
"chars": 19424,
"preview": "\"\"\"Planner Agent — analyzes experiment results and determines chart plan.\n\nExamines the experiment results data structur"
},
{
"path": "researchclaw/agents/figure_agent/renderer.py",
"chars": 13163,
"preview": "\"\"\"Renderer Agent — executes plotting scripts and verifies output.\n\nRuns generated Python scripts in a subprocess (or Do"
},
{
"path": "researchclaw/agents/figure_agent/style_config.py",
"chars": 5685,
"preview": "\"\"\"Academic chart styling configuration for FigureAgent.\n\nDefines global constants for chart styling that conform to AI "
},
{
"path": "researchclaw/assessor/__init__.py",
"chars": 413,
"preview": "\"\"\"Paper quality assessment and venue recommendation.\"\"\"\n\nfrom researchclaw.assessor.rubrics import RUBRICS, Rubric\nfrom"
},
{
"path": "researchclaw/assessor/comparator.py",
"chars": 4024,
"preview": "\"\"\"Historical score comparison and tracking.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport logging\nfrom dat"
},
{
"path": "researchclaw/assessor/rubrics.py",
"chars": 1471,
"preview": "\"\"\"Paper quality assessment rubrics.\"\"\"\n\nfrom __future__ import annotations\n\nfrom dataclasses import dataclass\n\n\n@datacl"
},
{
"path": "researchclaw/assessor/scorer.py",
"chars": 4706,
"preview": "\"\"\"Multi-dimensional paper quality scorer.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport logging\nimport re\n"
},
{
"path": "researchclaw/assessor/venue_recommender.py",
"chars": 4382,
"preview": "\"\"\"Venue recommendation based on paper quality scores.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\n\n"
},
{
"path": "researchclaw/calendar/__init__.py",
"chars": 338,
"preview": "\"\"\"Conference deadline calendar and submission planning.\"\"\"\n\nfrom researchclaw.calendar.deadlines import ConferenceCalen"
},
{
"path": "researchclaw/calendar/data/conferences.yaml",
"chars": 4137,
"preview": "conferences:\n - name: \"NeurIPS 2026\"\n full_name: \"Conference on Neural Information Processing Systems\"\n domains: "
},
{
"path": "researchclaw/calendar/deadlines.py",
"chars": 5888,
"preview": "\"\"\"Conference deadline data management.\"\"\"\n\nfrom __future__ import annotations\n\nimport logging\nfrom dataclasses import d"
},
{
"path": "researchclaw/calendar/planner.py",
"chars": 2944,
"preview": "\"\"\"Submission timeline planner.\"\"\"\n\nfrom __future__ import annotations\n\nfrom datetime import date, timedelta\nfrom typing"
},
{
"path": "researchclaw/calendar/reminder.py",
"chars": 3806,
"preview": "\"\"\"Deadline reminder calculation.\"\"\"\n\nfrom __future__ import annotations\n\nfrom dataclasses import dataclass\nfrom datetim"
},
{
"path": "researchclaw/cli.py",
"chars": 33829,
"preview": "\"\"\"ResearchClaw CLI — run the 23-stage autonomous research pipeline.\"\"\"\n\nfrom __future__ import annotations\n\nimport argp"
},
{
"path": "researchclaw/collaboration/__init__.py",
"chars": 647,
"preview": "\"\"\"Agent collaboration and knowledge sharing system.\n\nEnables multiple AutoResearchClaw instances to share research arti"
},
{
"path": "researchclaw/collaboration/dedup.py",
"chars": 1366,
"preview": "\"\"\"Cross-instance deduplication for shared artifacts.\"\"\"\n\nfrom __future__ import annotations\n\nimport hashlib\nimport logg"
},
{
"path": "researchclaw/collaboration/publisher.py",
"chars": 3457,
"preview": "\"\"\"Artifact publisher — extracts and publishes research artifacts from pipeline runs.\"\"\"\n\nfrom __future__ import annotat"
},
{
"path": "researchclaw/collaboration/repository.py",
"chars": 6013,
"preview": "\"\"\"Shared knowledge repository for cross-instance collaboration.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimp"
},
{
"path": "researchclaw/collaboration/subscriber.py",
"chars": 3297,
"preview": "\"\"\"Artifact subscriber — queries and imports shared artifacts.\"\"\"\n\nfrom __future__ import annotations\n\nimport logging\nfr"
},
{
"path": "researchclaw/config.py",
"chars": 50407,
"preview": "\"\"\"ResearchClaw config loading and validation.\"\"\"\n\nfrom __future__ import annotations\n\nfrom dataclasses import asdict, d"
},
{
"path": "researchclaw/copilot/__init__.py",
"chars": 403,
"preview": "\"\"\"Interactive Co-Pilot mode for human-AI research collaboration.\"\"\"\n\nfrom researchclaw.copilot.modes import ResearchMod"
},
{
"path": "researchclaw/copilot/branching.py",
"chars": 5411,
"preview": "\"\"\"Exploration branch management for Co-Pilot mode.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport logging\ni"
},
{
"path": "researchclaw/copilot/controller.py",
"chars": 4850,
"preview": "\"\"\"Co-Pilot controller — orchestrates pause/feedback/branch logic.\"\"\"\n\nfrom __future__ import annotations\n\nimport loggin"
},
{
"path": "researchclaw/copilot/feedback.py",
"chars": 4201,
"preview": "\"\"\"User feedback processing for Co-Pilot mode.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport logging\nimport"
},
{
"path": "researchclaw/copilot/modes.py",
"chars": 362,
"preview": "\"\"\"Research mode definitions for Co-Pilot.\"\"\"\n\nfrom __future__ import annotations\n\nfrom enum import Enum\n\n\nclass Researc"
},
{
"path": "researchclaw/dashboard/__init__.py",
"chars": 47,
"preview": "\"\"\"Real-time pipeline monitoring dashboard.\"\"\"\n"
},
{
"path": "researchclaw/dashboard/broadcaster.py",
"chars": 2917,
"preview": "\"\"\"Dashboard state broadcaster — pushes updates via WebSocket.\"\"\"\n\nfrom __future__ import annotations\n\nimport asyncio\nim"
},
{
"path": "researchclaw/dashboard/collector.py",
"chars": 4403,
"preview": "\"\"\"Run data collector — scans artifacts/ for pipeline state.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport "
},
{
"path": "researchclaw/dashboard/metrics.py",
"chars": 1538,
"preview": "\"\"\"Metric aggregation and computation for the dashboard.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n"
},
{
"path": "researchclaw/data/__init__.py",
"chars": 4758,
"preview": "\"\"\"Static data assets for the ResearchClaw pipeline.\"\"\"\n\nfrom __future__ import annotations\n\nimport logging\nfrom pathlib"
},
{
"path": "researchclaw/data/benchmark_knowledge.yaml",
"chars": 29137,
"preview": "# BenchmarkAgent Knowledge Base\n# Domain-indexed registry of standard benchmarks, datasets, and baselines\n# for automate"
},
{
"path": "researchclaw/data/dataset_registry.yaml",
"chars": 3938,
"preview": "# ResearchClaw Dataset Registry\n# Tiers:\n# 1 = pre-cached in Docker image (no network needed)\n# 2 = downloadable via"
},
{
"path": "researchclaw/data/docker_profiles.yaml",
"chars": 2968,
"preview": "# Docker image profiles for domain-specific experiment execution.\n# Each domain can specify its own image with pre-insta"
},
{
"path": "researchclaw/data/framework_docs/axolotl.md",
"chars": 3017,
"preview": "# Axolotl — API Quick Reference\n\n## Installation\n```bash\npip install axolotl\n# or\ngit clone https://github.com/axolotl-a"
},
{
"path": "researchclaw/data/framework_docs/llamafactory.md",
"chars": 3136,
"preview": "# LLaMA-Factory — API Quick Reference\n\n## Installation\n```bash\npip install llamafactory\n# or\ngit clone https://github.co"
},
{
"path": "researchclaw/data/framework_docs/peft.md",
"chars": 3003,
"preview": "# PEFT (Parameter-Efficient Fine-Tuning) — API Quick Reference\n\n## Installation\n```bash\npip install peft\n```\n\n## LoRA (L"
},
{
"path": "researchclaw/data/framework_docs/transformers_training.md",
"chars": 3465,
"preview": "# HuggingFace Transformers Training — API Quick Reference\n\n## TrainingArguments (key parameters)\n```python\nfrom transfor"
},
{
"path": "researchclaw/data/framework_docs/trl.md",
"chars": 4989,
"preview": "# TRL (Transformer Reinforcement Learning) — API Quick Reference\n\n## Installation\n```bash\npip install trl\n```\n\n## SFTTra"
},
{
"path": "researchclaw/data/seminal_papers.yaml",
"chars": 8019,
"preview": "# Foundational ML papers that should be cited when discussing specific topics.\n# Indexed by keyword/technique for automa"
},
{
"path": "researchclaw/docker/Dockerfile",
"chars": 3494,
"preview": "# ResearchClaw experiment sandbox — GPU-enabled, isolated environment.\n# Build: docker build -t researchclaw/experiment:"
},
{
"path": "researchclaw/docker/Dockerfile.biology",
"chars": 982,
"preview": "# ResearchClaw Biology sandbox — bioinformatics, single-cell analysis.\n# Build: docker build -f Dockerfile.biology -t re"
},
{
"path": "researchclaw/docker/Dockerfile.chemistry",
"chars": 997,
"preview": "# ResearchClaw Chemistry sandbox — quantum chemistry, molecular property.\n# Build: docker build -f Dockerfile.chemistry "
},
{
"path": "researchclaw/docker/Dockerfile.economics",
"chars": 897,
"preview": "# ResearchClaw Economics sandbox — econometrics, regression analysis.\n# Build: docker build -f Dockerfile.economics -t r"
},
{
"path": "researchclaw/docker/Dockerfile.generic",
"chars": 871,
"preview": "# ResearchClaw Generic sandbox — lightweight Python scientific stack.\n# Build: docker build -f Dockerfile.generic -t res"
},
{
"path": "researchclaw/docker/Dockerfile.math",
"chars": 900,
"preview": "# ResearchClaw Math sandbox — numerical methods, optimization.\n# Build: docker build -f Dockerfile.math -t researchclaw/"
},
{
"path": "researchclaw/docker/Dockerfile.physics",
"chars": 1105,
"preview": "# ResearchClaw Physics sandbox — CPU-focused, numerical computing.\n# Build: docker build -f Dockerfile.physics -t resear"
},
{
"path": "researchclaw/docker/entrypoint.sh",
"chars": 2305,
"preview": "#!/bin/bash\n# ResearchClaw experiment entrypoint — unified three-phase execution.\n#\n# Phase 0: pip install from requirem"
},
{
"path": "researchclaw/domains/__init__.py",
"chars": 371,
"preview": "\"\"\"Universal cross-domain research code generation framework.\n\nThis package provides domain detection, prompt adaptation"
},
{
"path": "researchclaw/domains/adapters/__init__.py",
"chars": 993,
"preview": "\"\"\"Domain-specific prompt adapters.\n\nEach adapter customizes prompt blocks for a specific research domain\nwhile the ML a"
},
{
"path": "researchclaw/domains/adapters/biology.py",
"chars": 3536,
"preview": "\"\"\"Biology domain prompt adapter.\n\nProvides domain-specific prompt blocks for bioinformatics\nexperiments (single-cell an"
},
{
"path": "researchclaw/domains/adapters/chemistry.py",
"chars": 3414,
"preview": "\"\"\"Chemistry domain prompt adapter.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom researchclaw.do"
},
{
"path": "researchclaw/domains/adapters/economics.py",
"chars": 4440,
"preview": "\"\"\"Economics domain prompt adapter.\n\nProvides domain-specific prompt blocks for empirical economics\nexperiments (regress"
},
{
"path": "researchclaw/domains/adapters/generic.py",
"chars": 285,
"preview": "\"\"\"Generic domain adapter — fallback for unknown/new domains.\n\nRe-exports GenericPromptAdapter from prompt_adapter.py so"
},
{
"path": "researchclaw/domains/adapters/math.py",
"chars": 3601,
"preview": "\"\"\"Mathematics domain prompt adapter.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom researchclaw."
},
{
"path": "researchclaw/domains/adapters/ml.py",
"chars": 872,
"preview": "\"\"\"ML domain prompt adapter — preserves existing behavior exactly.\n\nThis adapter returns empty PromptBlocks for all stag"
},
{
"path": "researchclaw/domains/adapters/neuroscience.py",
"chars": 6495,
"preview": "\"\"\"Computational neuroscience domain prompt adapter.\n\nProvides domain-specific prompt blocks for neural simulation\nexper"
},
{
"path": "researchclaw/domains/adapters/physics.py",
"chars": 5336,
"preview": "\"\"\"Physics domain prompt adapter.\n\nProvides domain-specific prompt blocks for computational physics\nexperiments (simulat"
},
{
"path": "researchclaw/domains/adapters/robotics.py",
"chars": 5725,
"preview": "\"\"\"Robotics & control domain prompt adapter.\n\nProvides domain-specific prompt blocks for robotics experiments\n(control p"
},
{
"path": "researchclaw/domains/adapters/security.py",
"chars": 2127,
"preview": "\"\"\"Security domain prompt adapter.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\nfrom researchclaw.dom"
},
{
"path": "researchclaw/domains/detector.py",
"chars": 19539,
"preview": "\"\"\"Domain detection and profile loading.\n\nProvides :class:`DomainProfile` (the canonical representation of a research\ndo"
},
{
"path": "researchclaw/domains/experiment_schema.py",
"chars": 8744,
"preview": "\"\"\"Universal experiment schema — domain-agnostic experiment plan structure.\n\nReplaces the fixed ``baselines/proposed_met"
},
{
"path": "researchclaw/domains/profiles/_generic.yaml",
"chars": 1414,
"preview": "domain_id: generic\ndisplay_name: Generic Computational Research\nparent_domain: \"\"\n\nexperiment_paradigm: comparison\ncondi"
},
{
"path": "researchclaw/domains/profiles/biology_genomics.yaml",
"chars": 916,
"preview": "domain_id: biology_genomics\ndisplay_name: Genomics Analysis\nparent_domain: biology\nexperiment_paradigm: comparison\ncondi"
},
{
"path": "researchclaw/domains/profiles/biology_protein.yaml",
"chars": 908,
"preview": "domain_id: biology_protein\ndisplay_name: Protein Science\nparent_domain: biology\nexperiment_paradigm: comparison\nconditio"
},
{
"path": "researchclaw/domains/profiles/biology_singlecell.yaml",
"chars": 1646,
"preview": "domain_id: biology_singlecell\ndisplay_name: Single-Cell Analysis\nparent_domain: biology\n\nexperiment_paradigm: comparison"
},
{
"path": "researchclaw/domains/profiles/chemistry_molprop.yaml",
"chars": 1572,
"preview": "domain_id: chemistry_molprop\ndisplay_name: Molecular Property Prediction\nparent_domain: chemistry\n\nexperiment_paradigm: "
},
{
"path": "researchclaw/domains/profiles/chemistry_qm.yaml",
"chars": 2203,
"preview": "domain_id: chemistry_qm\ndisplay_name: Quantum Chemistry\nparent_domain: chemistry\n\nexperiment_paradigm: comparison\ncondit"
},
{
"path": "researchclaw/domains/profiles/economics_empirical.yaml",
"chars": 2509,
"preview": "domain_id: economics_empirical\ndisplay_name: Empirical Economics\nparent_domain: economics\n\nexperiment_paradigm: progress"
},
{
"path": "researchclaw/domains/profiles/mathematics_numerical.yaml",
"chars": 2354,
"preview": "domain_id: mathematics_numerical\ndisplay_name: Numerical Mathematics\nparent_domain: mathematics\n\nexperiment_paradigm: co"
},
{
"path": "researchclaw/domains/profiles/mathematics_optimization.yaml",
"chars": 1015,
"preview": "domain_id: mathematics_optimization\ndisplay_name: Mathematical Optimization\nparent_domain: mathematics\nexperiment_paradi"
},
{
"path": "researchclaw/domains/profiles/ml_compression.yaml",
"chars": 1555,
"preview": "domain_id: ml_compression\ndisplay_name: Model Compression (ML)\nparent_domain: ml\n\nexperiment_paradigm: comparison\ncondit"
},
{
"path": "researchclaw/domains/profiles/ml_generative.yaml",
"chars": 1455,
"preview": "domain_id: ml_generative\ndisplay_name: Generative Models (ML)\nparent_domain: ml\n\nexperiment_paradigm: comparison\nconditi"
},
{
"path": "researchclaw/domains/profiles/ml_generic.yaml",
"chars": 1039,
"preview": "domain_id: ml_generic\ndisplay_name: Machine Learning (General)\nparent_domain: ml\n\nexperiment_paradigm: comparison\ncondit"
},
{
"path": "researchclaw/domains/profiles/ml_graph.yaml",
"chars": 1463,
"preview": "domain_id: ml_graph\ndisplay_name: Graph Neural Networks (ML)\nparent_domain: ml\n\nexperiment_paradigm: comparison\nconditio"
},
{
"path": "researchclaw/domains/profiles/ml_nlp.yaml",
"chars": 1287,
"preview": "domain_id: ml_nlp\ndisplay_name: Natural Language Processing (ML)\nparent_domain: ml\n\nexperiment_paradigm: comparison\ncond"
},
{
"path": "researchclaw/domains/profiles/ml_rl.yaml",
"chars": 1244,
"preview": "domain_id: ml_rl\ndisplay_name: Reinforcement Learning\nparent_domain: ml\n\nexperiment_paradigm: comparison\ncondition_termi"
},
{
"path": "researchclaw/domains/profiles/ml_tabular.yaml",
"chars": 1516,
"preview": "domain_id: ml_tabular\ndisplay_name: Tabular ML\nparent_domain: ml\n\nexperiment_paradigm: comparison\ncondition_terminology:"
},
{
"path": "researchclaw/domains/profiles/ml_vision.yaml",
"chars": 1406,
"preview": "domain_id: ml_vision\ndisplay_name: Computer Vision (ML)\nparent_domain: ml\n\nexperiment_paradigm: comparison\ncondition_ter"
},
{
"path": "researchclaw/domains/profiles/neuroscience_computational.yaml",
"chars": 3816,
"preview": "domain_id: neuroscience_computational\ndisplay_name: Computational Neuroscience\nparent_domain: neuroscience\n\nexperiment_p"
},
{
"path": "researchclaw/domains/profiles/neuroscience_imaging.yaml",
"chars": 3317,
"preview": "domain_id: neuroscience_imaging\ndisplay_name: Brain Imaging Analysis\nparent_domain: neuroscience\n\nexperiment_paradigm: c"
},
{
"path": "researchclaw/domains/profiles/physics_pde.yaml",
"chars": 2873,
"preview": "domain_id: physics_pde\ndisplay_name: PDE Solvers\nparent_domain: physics\n\nexperiment_paradigm: convergence\ncondition_term"
},
{
"path": "researchclaw/domains/profiles/physics_quantum.yaml",
"chars": 1104,
"preview": "domain_id: physics_quantum\ndisplay_name: Quantum Mechanics / Quantum Physics\nparent_domain: physics\nexperiment_paradigm:"
},
{
"path": "researchclaw/domains/profiles/physics_simulation.yaml",
"chars": 3099,
"preview": "domain_id: physics_simulation\ndisplay_name: Computational Physics (Simulation)\nparent_domain: physics\n\nexperiment_paradi"
},
{
"path": "researchclaw/domains/profiles/robotics_control.yaml",
"chars": 1253,
"preview": "domain_id: robotics_control\ndisplay_name: Robotics & Control\nparent_domain: robotics\n\nexperiment_paradigm: comparison\nco"
},
{
"path": "researchclaw/domains/profiles/security_detection.yaml",
"chars": 1534,
"preview": "domain_id: security_detection\ndisplay_name: Security / Intrusion Detection\nparent_domain: security\n\nexperiment_paradigm:"
},
{
"path": "researchclaw/domains/prompt_adapter.py",
"chars": 12701,
"preview": "\"\"\"Domain-aware prompt adaptation layer.\n\nInstead of rewriting ``prompts.py`` (2395+ lines of battle-tested code),\nthis "
},
{
"path": "researchclaw/evolution.py",
"chars": 20919,
"preview": "\"\"\"Self-evolution system for the ResearchClaw pipeline.\n\nRecords lessons from each pipeline run (failures, slow stages, "
},
{
"path": "researchclaw/experiment/__init__.py",
"chars": 376,
"preview": "\"\"\"Experiment execution — sandbox, runner, git manager.\"\"\"\n\nfrom researchclaw.experiment.factory import create_sandbox\nf"
},
{
"path": "researchclaw/experiment/agentic_sandbox.py",
"chars": 11957,
"preview": "\"\"\"Agentic sandbox: launches a coding agent inside a Docker container.\n\nThe agent (e.g. Claude Code, Codex) gets full sh"
},
{
"path": "researchclaw/experiment/code_agent.py",
"chars": 25559,
"preview": "\"\"\"Pluggable code-generation backends for experiment Stages 10 & 13.\n\nSupports three providers:\n - ``llm`` — ex"
},
{
"path": "researchclaw/experiment/colab_sandbox.py",
"chars": 12853,
"preview": "\"\"\"Google Drive-based async sandbox for Colab experiment execution.\n\nExecution model:\n 1. Write experiment code to a sh"
},
{
"path": "researchclaw/experiment/docker_sandbox.py",
"chars": 21014,
"preview": "\"\"\"Docker-based sandbox for experiment code execution with GPU passthrough.\n\nUses a single-container, three-phase execut"
},
{
"path": "researchclaw/experiment/evaluators/__init__.py",
"chars": 45,
"preview": "\"\"\"Domain-specific experiment evaluators.\"\"\"\n"
},
{
"path": "researchclaw/experiment/evaluators/convergence.py",
"chars": 4918,
"preview": "\"\"\"Convergence study evaluator for physics/math domains.\n\nAnalyzes convergence data (error vs grid size/timestep) to det"
},
{
"path": "researchclaw/experiment/factory.py",
"chars": 3910,
"preview": "\"\"\"Factory for creating sandbox backends based on experiment config.\"\"\"\n\nfrom __future__ import annotations\n\nimport logg"
},
{
"path": "researchclaw/experiment/git_manager.py",
"chars": 6441,
"preview": "\"\"\"Git-native experiment version management inspired by autoresearch.\"\"\"\n\nfrom __future__ import annotations\n\nimport jso"
},
{
"path": "researchclaw/experiment/harness_template.py",
"chars": 4235,
"preview": "\"\"\"Experiment harness — immutable evaluation infrastructure.\n\nThis file is injected into the sandbox project directory a"
},
{
"path": "researchclaw/experiment/metrics.py",
"chars": 9929,
"preview": "\"\"\"Universal metric parser — supports JSON, CSV, and stdout regex formats.\n\nParse priority:\n 1. ``results.json`` — stru"
},
{
"path": "researchclaw/experiment/runner.py",
"chars": 12977,
"preview": "\"\"\"Experiment execution engine inspired by autoresearch's edit→run→eval→keep/discard loop.\"\"\"\n\nfrom __future__ import an"
},
{
"path": "researchclaw/experiment/sandbox.py",
"chars": 19859,
"preview": "\"\"\"Sandbox environment for safe experiment code execution.\"\"\"\n\nfrom __future__ import annotations\n\nimport logging\nimport"
},
{
"path": "researchclaw/experiment/ssh_sandbox.py",
"chars": 15245,
"preview": "\"\"\"SSH remote sandbox for experiment code execution on remote GPU servers.\n\nUploads experiment code via scp, executes vi"
},
{
"path": "researchclaw/experiment/validator.py",
"chars": 40628,
"preview": "\"\"\"Experiment code validation: syntax, security, and import checks.\n\nThis module provides pre-execution validation for L"
},
{
"path": "researchclaw/experiment/visualize.py",
"chars": 23693,
"preview": "\"\"\"Experiment result visualization.\n\nGenerates publication-quality charts from experiment run data:\n- Condition comparis"
},
{
"path": "researchclaw/feedback/FEEDBACK_ANALYSIS_PROMPT.md",
"chars": 5608,
"preview": "# Tester Feedback Analysis — Claude Code Prompt\n\n> **用途:** 在 Claude Code agent 窗口中读取本文件,agent 将自动完成「测试反馈分析 → Bug 修复文档生成」"
},
{
"path": "researchclaw/hardware.py",
"chars": 7054,
"preview": "\"\"\"Hardware detection for GPU-aware experiment execution.\"\"\"\n\nfrom __future__ import annotations\n\nimport logging\nimport "
},
{
"path": "researchclaw/health.py",
"chars": 21835,
"preview": "from __future__ import annotations\n\nimport importlib\nimport json\nimport logging\nimport os\nimport shutil\nimport socket\nim"
},
{
"path": "researchclaw/knowledge/__init__.py",
"chars": 45,
"preview": "\"\"\"Knowledge management — base, adapters.\"\"\"\n"
},
{
"path": "researchclaw/knowledge/base.py",
"chars": 8818,
"preview": "\"\"\"Knowledge base integration for ARC pipeline.\n\nSupports two backends:\n- ``markdown`` (default): Plain Markdown files i"
},
{
"path": "researchclaw/knowledge/graph/__init__.py",
"chars": 664,
"preview": "\"\"\"Research knowledge graph built on NetworkX.\n\nExtracts entities (Papers, Methods, Datasets, Metrics) and relations\n(CI"
},
{
"path": "researchclaw/knowledge/graph/builder.py",
"chars": 8213,
"preview": "\"\"\"Knowledge graph builder — constructs graph from literature and experiments.\"\"\"\n\nfrom __future__ import annotations\n\ni"
},
{
"path": "researchclaw/knowledge/graph/entities.py",
"chars": 1331,
"preview": "\"\"\"Entity definitions for the research knowledge graph.\"\"\"\n\nfrom __future__ import annotations\n\nfrom dataclasses import "
},
{
"path": "researchclaw/knowledge/graph/query.py",
"chars": 7469,
"preview": "\"\"\"Knowledge graph query engine.\"\"\"\n\nfrom __future__ import annotations\n\nimport logging\nfrom collections import Counter,"
},
{
"path": "researchclaw/knowledge/graph/relations.py",
"chars": 1559,
"preview": "\"\"\"Relation definitions for the research knowledge graph.\"\"\"\n\nfrom __future__ import annotations\n\nfrom dataclasses impor"
},
{
"path": "researchclaw/knowledge/graph/visualizer.py",
"chars": 3753,
"preview": "\"\"\"Knowledge graph visualization export.\"\"\"\n\nfrom __future__ import annotations\n\nimport json\nimport logging\nfrom pathlib"
},
{
"path": "researchclaw/literature/__init__.py",
"chars": 664,
"preview": "\"\"\"Real literature search and citation management for ResearchClaw.\n\nProvides API clients for Semantic Scholar and arXiv"
},
{
"path": "researchclaw/literature/arxiv_client.py",
"chars": 10218,
"preview": "\"\"\"arXiv API client powered by the ``arxiv`` library.\n\nThe ``arxiv`` pip package (2.4+) provides robust arXiv search wit"
},
{
"path": "researchclaw/literature/cache.py",
"chars": 4119,
"preview": "\"\"\"Local query cache for literature search results.\n\nCaches search results by (query, source, limit) hash to avoid\nredun"
},
{
"path": "researchclaw/literature/models.py",
"chars": 6607,
"preview": "\"\"\"Data models for literature search results.\n\nPaper and Author are frozen dataclasses — immutable after creation.\n``Pap"
},
{
"path": "researchclaw/literature/novelty.py",
"chars": 12392,
"preview": "\"\"\"Novelty checker — detects similar existing work before paper generation.\n\nSearches real academic APIs (Semantic Schol"
},
{
"path": "researchclaw/literature/openalex_client.py",
"chars": 9137,
"preview": "\"\"\"OpenAlex API client.\n\nUses stdlib ``urllib`` + ``json`` — zero extra dependencies.\n\nPublic API\n----------\n- ``search_"
},
{
"path": "researchclaw/literature/search.py",
"chars": 12292,
"preview": "\"\"\"Unified literature search with deduplication.\n\nCombines results from OpenAlex, Semantic Scholar, and arXiv,\ndeduplica"
},
{
"path": "researchclaw/literature/semantic_scholar.py",
"chars": 13172,
"preview": "\"\"\"Semantic Scholar API client.\n\nUses only stdlib ``urllib`` — zero extra dependencies.\n\nPublic API\n----------\n- ``searc"
},
{
"path": "researchclaw/literature/trends.py",
"chars": 2874,
"preview": "\"\"\"Literature trend analysis — analyze trends from search results.\"\"\"\n\nfrom __future__ import annotations\n\nimport loggin"
},
{
"path": "researchclaw/literature/verify.py",
"chars": 32029,
"preview": "\"\"\"Citation verification engine — detect hallucinated references.\n\nVerifies each BibTeX entry against real academic APIs"
},
{
"path": "researchclaw/llm/__init__.py",
"chars": 2325,
"preview": "\"\"\"LLM integration — OpenAI-compatible and ACP agent clients.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import"
},
{
"path": "researchclaw/llm/acp_client.py",
"chars": 16284,
"preview": "\"\"\"ACP (Agent Client Protocol) LLM client via acpx.\n\nUses acpx as the ACP bridge to communicate with any ACP-compatible "
},
{
"path": "researchclaw/llm/anthropic_adapter.py",
"chars": 6894,
"preview": "\"\"\"Anthropic Messages API adapter for ResearchClaw.\"\"\"\n\nimport json\nimport logging\nimport urllib.error\nfrom typing impor"
},
{
"path": "researchclaw/llm/client.py",
"chars": 18238,
"preview": "\"\"\"Lightweight OpenAI-compatible LLM client — stdlib only.\n\nFeatures:\n - Model fallback chain (gpt-5.2 → gpt-5.1 → gpt-"
},
{
"path": "researchclaw/mcp/__init__.py",
"chars": 314,
"preview": "\"\"\"MCP (Model Context Protocol) standardized integration for AutoResearchClaw.\"\"\"\n\nfrom researchclaw.mcp.server import R"
},
{
"path": "researchclaw/mcp/client.py",
"chars": 3420,
"preview": "\"\"\"MCP Client: connect to external MCP servers for enhanced capabilities.\"\"\"\n\nfrom __future__ import annotations\n\nimport"
},
{
"path": "researchclaw/mcp/registry.py",
"chars": 1565,
"preview": "\"\"\"Registry of connected MCP servers.\"\"\"\n\nfrom __future__ import annotations\n\nimport logging\nfrom typing import Any\n\nfro"
},
{
"path": "researchclaw/mcp/server.py",
"chars": 5359,
"preview": "\"\"\"ResearchClaw MCP Server: expose pipeline capabilities to external agents.\"\"\"\n\nfrom __future__ import annotations\n\nimp"
},
{
"path": "researchclaw/mcp/tools.py",
"chars": 3300,
"preview": "\"\"\"MCP tool definitions for ResearchClaw capabilities.\"\"\"\n\nfrom __future__ import annotations\n\nfrom typing import Any\n\n#"
},
{
"path": "researchclaw/mcp/transport.py",
"chars": 3419,
"preview": "\"\"\"MCP transport layer: stdio and SSE implementations.\"\"\"\n\nfrom __future__ import annotations\n\nimport asyncio\nimport jso"
},
{
"path": "researchclaw/memory/__init__.py",
"chars": 727,
"preview": "\"\"\"Persistent evolutionary memory system for AutoResearchClaw.\n\nProvides three categories of memory:\n- **Ideation**: Res"
},
{
"path": "researchclaw/memory/decay.py",
"chars": 1666,
"preview": "\"\"\"Time-decay and confidence scoring for memory entries.\"\"\"\n\nfrom __future__ import annotations\n\nimport math\nfrom dateti"
},
{
"path": "researchclaw/memory/embeddings.py",
"chars": 5604,
"preview": "\"\"\"Vector embedding management for memory retrieval.\n\nSupports three backends (auto-fallback):\n1. OpenAI-compatible API "
},
{
"path": "researchclaw/memory/experiment_memory.py",
"chars": 5469,
"preview": "\"\"\"Experiment memory — records and retrieves experiment experiences.\"\"\"\n\nfrom __future__ import annotations\n\nimport json"
},
{
"path": "researchclaw/memory/ideation_memory.py",
"chars": 4845,
"preview": "\"\"\"Ideation memory — records and retrieves research direction experiences.\"\"\"\n\nfrom __future__ import annotations\n\nimpor"
},
{
"path": "researchclaw/memory/retriever.py",
"chars": 5487,
"preview": "\"\"\"Similarity-based memory retrieval engine.\n\nCombines cosine similarity with time-decay and confidence weighting\nto ret"
}
]
// ... and 222 more files (download for full content)
About this extraction
This page contains the full source code of the aiming-lab/AutoResearchClaw GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 422 files (4.1 MB), approximately 1.1M tokens, and a symbol index with 4464 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.