Repository: Stanford-Trinity/ARTEMIS
Branch: main
Commit: f309242d2a74
Files: 421
Total size: 7.0 MB
Directory structure:
gitextract_2yw_apz6/
├── .env.example
├── .github/
│ └── workflows/
│ └── ci.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── NOTICE
├── README.md
├── codex-rs/
│ ├── .gitignore
│ ├── Cargo.toml
│ ├── README.md
│ ├── ansi-escape/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ └── src/
│ │ └── lib.rs
│ ├── apply-patch/
│ │ ├── Cargo.toml
│ │ ├── apply_patch_tool_instructions.md
│ │ ├── src/
│ │ │ ├── lib.rs
│ │ │ ├── main.rs
│ │ │ ├── parser.rs
│ │ │ ├── seek_sequence.rs
│ │ │ └── standalone_executable.rs
│ │ └── tests/
│ │ ├── all.rs
│ │ └── suite/
│ │ ├── cli.rs
│ │ └── mod.rs
│ ├── arg0/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ └── lib.rs
│ ├── chatgpt/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── src/
│ │ │ ├── apply_command.rs
│ │ │ ├── chatgpt_client.rs
│ │ │ ├── chatgpt_token.rs
│ │ │ ├── get_task.rs
│ │ │ └── lib.rs
│ │ └── tests/
│ │ ├── all.rs
│ │ ├── suite/
│ │ │ ├── apply_command_e2e.rs
│ │ │ └── mod.rs
│ │ └── task_turn_fixture.json
│ ├── cli/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ ├── debug_sandbox.rs
│ │ ├── exit_status.rs
│ │ ├── lib.rs
│ │ ├── login.rs
│ │ ├── main.rs
│ │ └── proto.rs
│ ├── clippy.toml
│ ├── common/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ └── src/
│ │ ├── approval_mode_cli_arg.rs
│ │ ├── approval_presets.rs
│ │ ├── config_override.rs
│ │ ├── config_summary.rs
│ │ ├── elapsed.rs
│ │ ├── fuzzy_match.rs
│ │ ├── lib.rs
│ │ ├── model_presets.rs
│ │ ├── sandbox_mode_cli_arg.rs
│ │ └── sandbox_summary.rs
│ ├── config.md
│ ├── core/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── active_directory.md
│ │ ├── approval_prompt.txt
│ │ ├── bugcrowd_approval_prompt.txt
│ │ ├── client_side_web.md
│ │ ├── continuation_prompt.txt
│ │ ├── enumeration.md
│ │ ├── initial_prompt.txt
│ │ ├── linux_privesc.md
│ │ ├── prompt.md
│ │ ├── shelling.md
│ │ ├── src/
│ │ │ ├── apply_patch.rs
│ │ │ ├── bash.rs
│ │ │ ├── chat_completions.rs
│ │ │ ├── client.rs
│ │ │ ├── client_common.rs
│ │ │ ├── codex.rs
│ │ │ ├── codex_conversation.rs
│ │ │ ├── config.rs
│ │ │ ├── config_profile.rs
│ │ │ ├── config_types.rs
│ │ │ ├── conversation_history.rs
│ │ │ ├── conversation_manager.rs
│ │ │ ├── environment_context.rs
│ │ │ ├── error.rs
│ │ │ ├── exec.rs
│ │ │ ├── exec_command/
│ │ │ │ ├── exec_command_params.rs
│ │ │ │ ├── exec_command_session.rs
│ │ │ │ ├── mod.rs
│ │ │ │ ├── responses_api.rs
│ │ │ │ ├── session_id.rs
│ │ │ │ └── session_manager.rs
│ │ │ ├── exec_env.rs
│ │ │ ├── flags.rs
│ │ │ ├── git_info.rs
│ │ │ ├── is_safe_command.rs
│ │ │ ├── landlock.rs
│ │ │ ├── lib.rs
│ │ │ ├── mcp_connection_manager.rs
│ │ │ ├── mcp_tool_call.rs
│ │ │ ├── message_history.rs
│ │ │ ├── model_family.rs
│ │ │ ├── model_provider_info.rs
│ │ │ ├── openai_model_info.rs
│ │ │ ├── openai_tools.rs
│ │ │ ├── parse_command.rs
│ │ │ ├── plan_tool.rs
│ │ │ ├── project_doc.rs
│ │ │ ├── prompt_for_compact_command.md
│ │ │ ├── rollout.rs
│ │ │ ├── safety.rs
│ │ │ ├── seatbelt.rs
│ │ │ ├── seatbelt_base_policy.sbpl
│ │ │ ├── shell.rs
│ │ │ ├── spawn.rs
│ │ │ ├── terminal.rs
│ │ │ ├── tool_apply_patch.rs
│ │ │ ├── turn_diff_tracker.rs
│ │ │ ├── user_agent.rs
│ │ │ ├── user_notification.rs
│ │ │ └── util.rs
│ │ ├── summarization_prompt.txt
│ │ ├── tests/
│ │ │ ├── all.rs
│ │ │ ├── cli_responses_fixture.sse
│ │ │ ├── common/
│ │ │ │ ├── Cargo.toml
│ │ │ │ └── lib.rs
│ │ │ ├── fixtures/
│ │ │ │ ├── completed_template.json
│ │ │ │ └── incomplete_sse.json
│ │ │ └── suite/
│ │ │ ├── cli_stream.rs
│ │ │ ├── client.rs
│ │ │ ├── compact.rs
│ │ │ ├── exec.rs
│ │ │ ├── exec_stream_events.rs
│ │ │ ├── live_cli.rs
│ │ │ ├── mod.rs
│ │ │ ├── prompt_caching.rs
│ │ │ ├── seatbelt.rs
│ │ │ ├── stream_error_allows_next_turn.rs
│ │ │ └── stream_no_completed.rs
│ │ ├── web.md
│ │ ├── web_enumeration.md
│ │ └── windows_privesc.md
│ ├── cve-prompt.md
│ ├── default.nix
│ ├── disclosure-email-prompt.md
│ ├── docs/
│ │ └── protocol_v1.md
│ ├── exec/
│ │ ├── Cargo.toml
│ │ ├── src/
│ │ │ ├── cli.rs
│ │ │ ├── event_processor.rs
│ │ │ ├── event_processor_with_human_output.rs
│ │ │ ├── event_processor_with_json_output.rs
│ │ │ ├── lib.rs
│ │ │ ├── main.rs
│ │ │ └── realtime_logger.rs
│ │ └── tests/
│ │ ├── all.rs
│ │ ├── fixtures/
│ │ │ ├── apply_patch_freeform_final.txt
│ │ │ ├── sse_apply_patch_add.json
│ │ │ ├── sse_apply_patch_freeform_add.json
│ │ │ ├── sse_apply_patch_freeform_update.json
│ │ │ ├── sse_apply_patch_update.json
│ │ │ └── sse_response_completed.json
│ │ └── suite/
│ │ ├── apply_patch.rs
│ │ ├── common.rs
│ │ ├── mod.rs
│ │ └── sandbox.rs
│ ├── execpolicy/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── build.rs
│ │ ├── src/
│ │ │ ├── arg_matcher.rs
│ │ │ ├── arg_resolver.rs
│ │ │ ├── arg_type.rs
│ │ │ ├── default.policy
│ │ │ ├── error.rs
│ │ │ ├── exec_call.rs
│ │ │ ├── execv_checker.rs
│ │ │ ├── lib.rs
│ │ │ ├── main.rs
│ │ │ ├── opt.rs
│ │ │ ├── policy.rs
│ │ │ ├── policy_parser.rs
│ │ │ ├── program.rs
│ │ │ ├── sed_command.rs
│ │ │ └── valid_exec.rs
│ │ └── tests/
│ │ ├── all.rs
│ │ └── suite/
│ │ ├── bad.rs
│ │ ├── cp.rs
│ │ ├── good.rs
│ │ ├── head.rs
│ │ ├── literal.rs
│ │ ├── ls.rs
│ │ ├── mod.rs
│ │ ├── parse_sed_command.rs
│ │ ├── pwd.rs
│ │ └── sed.rs
│ ├── file-search/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ └── src/
│ │ ├── cli.rs
│ │ ├── lib.rs
│ │ └── main.rs
│ ├── justfile
│ ├── linux-sandbox/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── src/
│ │ │ ├── landlock.rs
│ │ │ ├── lib.rs
│ │ │ ├── linux_run_main.rs
│ │ │ └── main.rs
│ │ └── tests/
│ │ ├── all.rs
│ │ └── suite/
│ │ ├── landlock.rs
│ │ └── mod.rs
│ ├── login/
│ │ ├── Cargo.toml
│ │ ├── src/
│ │ │ ├── assets/
│ │ │ │ └── success.html
│ │ │ ├── auth_manager.rs
│ │ │ ├── lib.rs
│ │ │ ├── pkce.rs
│ │ │ ├── server.rs
│ │ │ └── token_data.rs
│ │ └── tests/
│ │ ├── all.rs
│ │ └── suite/
│ │ ├── login_server_e2e.rs
│ │ └── mod.rs
│ ├── mcp-client/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ ├── lib.rs
│ │ ├── main.rs
│ │ └── mcp_client.rs
│ ├── mcp-server/
│ │ ├── Cargo.toml
│ │ ├── src/
│ │ │ ├── codex_message_processor.rs
│ │ │ ├── codex_tool_config.rs
│ │ │ ├── codex_tool_runner.rs
│ │ │ ├── error_code.rs
│ │ │ ├── exec_approval.rs
│ │ │ ├── json_to_toml.rs
│ │ │ ├── lib.rs
│ │ │ ├── main.rs
│ │ │ ├── message_processor.rs
│ │ │ ├── outgoing_message.rs
│ │ │ ├── patch_approval.rs
│ │ │ └── tool_handlers/
│ │ │ └── mod.rs
│ │ └── tests/
│ │ ├── all.rs
│ │ ├── common/
│ │ │ ├── Cargo.toml
│ │ │ ├── lib.rs
│ │ │ ├── mcp_process.rs
│ │ │ ├── mock_model_server.rs
│ │ │ └── responses.rs
│ │ └── suite/
│ │ ├── auth.rs
│ │ ├── codex_message_processor_flow.rs
│ │ ├── codex_tool.rs
│ │ ├── config.rs
│ │ ├── create_conversation.rs
│ │ ├── interrupt.rs
│ │ ├── login.rs
│ │ ├── mod.rs
│ │ └── send_message.rs
│ ├── mcp-types/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ ├── generate_mcp_types.py
│ │ ├── schema/
│ │ │ ├── 2025-03-26/
│ │ │ │ └── schema.json
│ │ │ └── 2025-06-18/
│ │ │ └── schema.json
│ │ ├── src/
│ │ │ └── lib.rs
│ │ └── tests/
│ │ ├── all.rs
│ │ └── suite/
│ │ ├── initialize.rs
│ │ ├── mod.rs
│ │ └── progress_notification.rs
│ ├── ollama/
│ │ ├── Cargo.toml
│ │ └── src/
│ │ ├── client.rs
│ │ ├── lib.rs
│ │ ├── parser.rs
│ │ ├── pull.rs
│ │ └── url.rs
│ ├── protocol/
│ │ ├── Cargo.toml
│ │ ├── README.md
│ │ └── src/
│ │ ├── config_types.rs
│ │ ├── lib.rs
│ │ ├── mcp_protocol.rs
│ │ ├── message_history.rs
│ │ ├── models.rs
│ │ ├── parse_command.rs
│ │ ├── plan_tool.rs
│ │ └── protocol.rs
│ ├── protocol-ts/
│ │ ├── Cargo.toml
│ │ ├── generate-ts
│ │ └── src/
│ │ ├── lib.rs
│ │ └── main.rs
│ ├── rust-toolchain.toml
│ ├── rustfmt.toml
│ ├── scripts/
│ │ └── create_github_release.sh
│ └── tui/
│ ├── Cargo.toml
│ ├── prompt_for_init_command.md
│ ├── src/
│ │ ├── app.rs
│ │ ├── app_backtrack.rs
│ │ ├── app_event.rs
│ │ ├── app_event_sender.rs
│ │ ├── backtrack_helpers.rs
│ │ ├── bottom_pane/
│ │ │ ├── approval_modal_view.rs
│ │ │ ├── bottom_pane_view.rs
│ │ │ ├── chat_composer.rs
│ │ │ ├── chat_composer_history.rs
│ │ │ ├── command_popup.rs
│ │ │ ├── file_search_popup.rs
│ │ │ ├── list_selection_view.rs
│ │ │ ├── mod.rs
│ │ │ ├── popup_consts.rs
│ │ │ ├── scroll_state.rs
│ │ │ ├── selection_popup_common.rs
│ │ │ ├── snapshots/
│ │ │ │ ├── codex_tui__bottom_pane__chat_composer__tests__backspace_after_pastes.snap
│ │ │ │ ├── codex_tui__bottom_pane__chat_composer__tests__empty.snap
│ │ │ │ ├── codex_tui__bottom_pane__chat_composer__tests__large.snap
│ │ │ │ ├── codex_tui__bottom_pane__chat_composer__tests__multiple_pastes.snap
│ │ │ │ └── codex_tui__bottom_pane__chat_composer__tests__small.snap
│ │ │ └── textarea.rs
│ │ ├── chatwidget/
│ │ │ ├── agent.rs
│ │ │ ├── interrupts.rs
│ │ │ ├── snapshots/
│ │ │ │ ├── codex_tui__chatwidget__tests__approval_modal_exec.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__approval_modal_patch.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__chat_small_idle_h1.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__chat_small_idle_h2.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__chat_small_idle_h3.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__chat_small_running_h1.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__chat_small_running_h2.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__chat_small_running_h3.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__deltas_then_same_final_message_are_rendered_snapshot.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__final_reasoning_then_message_without_deltas_are_rendered.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__interrupt_exec_marks_failed.snap
│ │ │ │ ├── codex_tui__chatwidget__tests__status_widget_active.snap
│ │ │ │ └── codex_tui__chatwidget__tests__status_widget_and_approval_modal.snap
│ │ │ └── tests.rs
│ │ ├── chatwidget.rs
│ │ ├── chatwidget_stream_tests.rs
│ │ ├── citation_regex.rs
│ │ ├── cli.rs
│ │ ├── clipboard_paste.rs
│ │ ├── common.rs
│ │ ├── custom_terminal.rs
│ │ ├── diff_render.rs
│ │ ├── exec_command.rs
│ │ ├── file_search.rs
│ │ ├── get_git_diff.rs
│ │ ├── history_cell.rs
│ │ ├── insert_history.rs
│ │ ├── lib.rs
│ │ ├── live_wrap.rs
│ │ ├── main.rs
│ │ ├── markdown.rs
│ │ ├── markdown_stream.rs
│ │ ├── onboarding/
│ │ │ ├── auth.rs
│ │ │ ├── mod.rs
│ │ │ ├── onboarding_screen.rs
│ │ │ ├── trust_directory.rs
│ │ │ └── welcome.rs
│ │ ├── pager_overlay.rs
│ │ ├── render/
│ │ │ ├── line_utils.rs
│ │ │ ├── markdown_utils.rs
│ │ │ └── mod.rs
│ │ ├── session_log.rs
│ │ ├── shimmer.rs
│ │ ├── slash_command.rs
│ │ ├── snapshots/
│ │ │ ├── codex_tui__diff_render__tests__add_details.snap
│ │ │ ├── codex_tui__diff_render__tests__blank_context_line.snap
│ │ │ ├── codex_tui__diff_render__tests__single_line_replacement_counts.snap
│ │ │ ├── codex_tui__diff_render__tests__update_details_with_rename.snap
│ │ │ ├── codex_tui__diff_render__tests__vertical_ellipsis_between_hunks.snap
│ │ │ ├── codex_tui__diff_render__tests__wrap_behavior_insert.snap
│ │ │ ├── codex_tui__pager_overlay__tests__static_overlay_snapshot_basic.snap
│ │ │ ├── codex_tui__pager_overlay__tests__transcript_overlay_snapshot_basic.snap
│ │ │ ├── codex_tui__status_indicator_widget__tests__renders_truncated.snap
│ │ │ ├── codex_tui__status_indicator_widget__tests__renders_with_queued_messages.snap
│ │ │ └── codex_tui__status_indicator_widget__tests__renders_with_working_header.snap
│ │ ├── status_indicator_widget.rs
│ │ ├── streaming/
│ │ │ ├── controller.rs
│ │ │ └── mod.rs
│ │ ├── text_formatting.rs
│ │ ├── tui.rs
│ │ ├── updates.rs
│ │ └── user_approval_widget.rs
│ ├── styles.md
│ └── tests/
│ ├── all.rs
│ ├── fixtures/
│ │ ├── binary-size-log.jsonl
│ │ ├── ideal-binary-response.txt
│ │ └── oss-story.jsonl
│ └── suite/
│ ├── mod.rs
│ ├── status_indicator.rs
│ ├── vt100_history.rs
│ ├── vt100_live_commit.rs
│ └── vt100_streaming_no_dup.rs
├── configs/
│ ├── stanford/
│ │ └── example.yaml
│ └── tests/
│ ├── ctf_easy.yaml
│ └── simple.yaml
├── docs/
│ ├── SYSTEM_ARCHITECTURES.md
│ ├── TRIAGE_ARCHITECTURE.md
│ ├── license.md
│ └── supervisor-usage.md
├── pyproject.toml
├── run_docker.sh
├── supervisor/
│ ├── __init__.py
│ ├── config.py
│ ├── context_manager.py
│ ├── orchestration/
│ │ ├── __init__.py
│ │ ├── instance_manager.py
│ │ ├── log_reader.py
│ │ ├── orchestrator.py
│ │ ├── prompt_generator.py
│ │ └── router.py
│ ├── prompts/
│ │ ├── __init__.py
│ │ ├── continuation_context_prompt.py
│ │ ├── router_prompt.py
│ │ ├── summarization_prompt.py
│ │ └── supervisor_prompt.py
│ ├── submissions/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── ctf.py
│ │ ├── registry.py
│ │ └── vulnerability.py
│ ├── supervisor.py
│ ├── todo_generator.py
│ ├── tools.py
│ ├── triage/
│ │ ├── prompts/
│ │ │ ├── __init__.py
│ │ │ ├── initial_review_prompt.py
│ │ │ ├── severity_prompt.py
│ │ │ ├── system_prompt.py
│ │ │ └── validation_prompt.py
│ │ ├── triage_manager.py
│ │ └── triage_tools.py
│ ├── vulnerability_storage.py
│ └── working_hours.py
└── test_files/
└── it_has_begun/
└── script.sh
================================================
FILE CONTENTS
================================================
================================================
FILE: .env.example
================================================
# OpenRouter API Configuration
OPENROUTER_API_KEY=your-openrouter-api-key-here
# Optional: Override default models
# SUPERVISOR_MODEL=openai/o1-preview
# SUMMARIZATION_MODEL=openai/o4-mini
# ROUTER_MODEL=openai/o4-mini
# Optional: Override TODO generator models
# TODO_GENERATOR_OPENROUTER_MODEL=anthropic/claude-opus-4.1
# TODO_GENERATOR_OPENAI_MODEL=gpt-5
# Optional: Override prompt generator model for custom system prompts
# PROMPT_GENERATOR_MODEL=anthropic/claude-opus-4.1
# Optional: Override available models for model switching (comma-separated)
# OPENROUTER_AVAILABLE_MODELS=anthropic/claude-sonnet-4,openai/o3,anthropic/claude-opus-4,google/gemini-2.5-pro,openai/o3-pro
# OPENAI_AVAILABLE_MODELS=o3,gpt-5
================================================
FILE: .github/workflows/ci.yml
================================================
name: CI
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
build-and-test:
strategy:
matrix:
include:
- os: ubuntu-latest
arch: amd64
runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Install uv
run: |
curl -LsSf https://astral.sh/uv/install.sh | sh
echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Install Rust
uses: dtolnay/rust-toolchain@stable
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y libssl-dev
- name: Build codex-rs binary (${{ matrix.arch }})
run: |
cargo build --release --manifest-path codex-rs/Cargo.toml
ls -lh codex-rs/target/release/codex
- name: Verify binary is executable
run: |
file codex-rs/target/release/codex
codex-rs/target/release/codex --version || echo "Binary built successfully"
- name: Setup Python environment
run: |
uv sync
- name: Check Python package formatting
run: |
source .venv/bin/activate
python -m py_compile supervisor/*.py || true
- name: Test supervisor imports
run: |
source .venv/bin/activate
python -c "import supervisor.supervisor; print('✓ supervisor.supervisor imports successfully')"
python -c "from supervisor import supervisor; print('✓ supervisor module imports successfully')"
- name: Summary
run: |
echo "====== Build Summary ======"
echo "Architecture: ${{ matrix.arch }}"
echo "Codex binary: ✓ Built"
echo "Python environment: ✓ Set up"
echo "Supervisor imports: ✓ Working"
echo "=========================="
================================================
FILE: .gitignore
================================================
# deps
# Node.js dependencies
node_modules
.pnpm-store
.pnpm-debug.log
# Keep pnpm-lock.yaml
!pnpm-lock.yaml
# build
dist/
build/
out/
storybook-static/
# ignore README for publishing
codex-cli/README.md
# ignore Nix derivation results
result
# editor
.vscode/
.idea/
.history/
.zed/
*.swp
*~
# cli tools
CLAUDE.md
.claude/
# caches
.cache/
.turbo/
.parcel-cache/
.eslintcache
.nyc_output/
.jest/
*.tsbuildinfo
# logs
logs/
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# env
.env*
!.env.example
# package
*.tgz
# ci
.vercel/
.netlify/
# patches
apply_patch/
# coverage
coverage/
# os
.DS_Store
Thumbs.db
Icon?
.Spotlight-V100/
# Unwanted package managers
.yarn/
yarn.lock
# release
package.json-e
session.ts-e
CHANGELOG.ignore.md
# nix related
.direnv
.envrc
# Python stuff
*.pyc
*.pyc
__pycache__/
*.pyo
*.pyd
================================================
FILE: Dockerfile
================================================
FROM ghcr.io/astral-sh/uv:python3.11-bookworm
ENV DEBIAN_FRONTEND=noninteractive
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
libssl-dev \
pkg-config \
&& rm -rf /var/lib/apt/lists/*
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && \
~/.cargo/bin/rustup install stable && \
~/.cargo/bin/rustup default stable
ENV PATH="/root/.cargo/bin:${PATH}"
WORKDIR /app
COPY . /app/trinity/ARTEMIS
WORKDIR /app/trinity/ARTEMIS
RUN cargo build --release --manifest-path codex-rs/Cargo.toml
RUN uv sync
ENV VIRTUAL_ENV=/app/trinity/ARTEMIS/.venv
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright 2025 OpenAI
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: NOTICE
================================================
OpenAI Codex
Copyright 2025 OpenAI
This project includes code derived from [Ratatui](https://github.com/ratatui/ratatui), licensed under the MIT license.
Copyright (c) 2016-2022 Florian Dehau
Copyright (c) 2023-2025 The Ratatui Developers
================================================
FILE: README.md
================================================
🏹 ARTEMIS
Automated Red Teaming Engine with Multi-agent Intelligent Supervision
ARTEMIS is an autonomous agent created by the Stanford Trinity project to automate vulnerability discovery.
#### Quickstart
Install `uv` if you haven't already:
```bash
curl -LsSf https://astral.sh/uv/install.sh | sh
```
Install the latest version of Rust (required for building):
```bash
# Remove old Rust if installed via apt
sudo apt remove rustc cargo
sudo apt install libssl-dev
# Install rustup (the official Rust toolchain installer)
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
# Restart shell or source the environment
source ~/.cargo/env
# Install latest stable Rust
rustup install stable
rustup default stable
```
First, we have to build the codex binary:
```bash
cargo build --release --manifest-path codex-rs/Cargo.toml
```
Now we can setup the Python environment:
```bash
uv sync
source .venv/bin/activate
```
### Environment Configuration
Copy the example configuration and add your API keys:
```bash
cp .env.example .env
# Edit .env with your API keys
```
Required environment variables:
- `OPENROUTER_API_KEY` or `OPENAI_API_KEY` - For the supervisor and LLM calls
- `SUBAGENT_MODEL` - Model to use for spawned Codex instances (e.g., `anthropic/claude-sonnet-4`)
### Quick Test Run
Try a simple CTF challenge to verify everything works:
```bash
python -m supervisor.supervisor \
--config-file configs/tests/ctf_easy.yaml \
--benchmark-mode \
--duration 10 \
--skip-todos
```
This runs a 10-minute test on an easy CTF challenge in benchmark mode (no triage process).
For detailed configuration options and usage, see [supervisor-usage.md](docs/supervisor-usage.md).
---
## Docker
### Docker Quickstart
Build the Docker image:
```bash
docker build -t artemis .
```
### Environment Configuration
Same as above - copy the example configuration and add your API keys:
```bash
cp .env.example .env
# Edit .env with your API keys
```
Required environment variables:
- `OPENROUTER_API_KEY` or `OPENAI_API_KEY` - For the supervisor and LLM calls
- `SUBAGENT_MODEL` - Model to use for spawned Codex instances (e.g., `anthropic/claude-sonnet-4`)
### Codex Configuration for OpenRouter
If using OpenRouter, you'll need to configure the codex binary. Create `~/.codex/config.toml`:
```bash
mkdir -p ~/.codex
cat > ~/.codex/config.toml <<'EOF'
model_provider = "openrouter"
[model_providers.openrouter]
name = "OpenRouter"
base_url = "https://openrouter.ai/api/v1"
env_key = "OPENROUTER_API_KEY"
[sandbox]
mode = "workspace-write"
network_access = true
EOF
```
### Running with Docker
Use the provided `run_docker.sh` script:
```bash
# Run with OpenRouter (mounts ~/.codex/config.toml)
./run_docker.sh openrouter
# Run with OpenAI only (no config mount needed)
./run_docker.sh openai
```
The script will:
- Mount your `~/.codex/config.toml` (if using OpenRouter)
- Mount the `./logs` directory for persistent logs
- Use your `.env` file for API keys
- Run a 10-minute test on an easy CTF challenge
**Manual Docker Run:**
If you prefer to run docker manually:
```bash
# With OpenRouter
docker run -it \
--env-file .env \
-v $HOME/.codex/config.toml:/root/.codex/config.toml:ro \
-v $(pwd)/logs:/app/trinity/ARTEMIS/logs \
artemis \
python -m supervisor.supervisor \
--config-file configs/tests/ctf_easy.yaml \
--benchmark-mode \
--duration 10 \
--skip-todos
# With OpenAI only
docker run -it \
--env-file .env \
-v $(pwd)/logs:/app/trinity/ARTEMIS/logs \
artemis \
python -m supervisor.supervisor \
--config-file configs/tests/ctf_easy.yaml \
--benchmark-mode \
--duration 10 \
--skip-todos
```
---
## Acknowledgments
This project uses [OpenAI Codex](https://github.com/openai/codex) as a base, forked from [commit c221eab](https://github.com/openai/codex/commit/c221eab0b5cad59ce3dafebf7ca630f217263cc6).
---
## License
This repository is licensed under the [Apache-2.0 License](LICENSE).
================================================
FILE: codex-rs/.gitignore
================================================
/target/
# Recommended value of CARGO_TARGET_DIR when using Docker as explained in .devcontainer/README.md.
/target-amd64/
# Value of CARGO_TARGET_DIR when using .devcontainer/devcontainer.json.
/target-arm64/
================================================
FILE: codex-rs/Cargo.toml
================================================
[workspace]
members = [
"ansi-escape",
"apply-patch",
"arg0",
"cli",
"common",
"core",
"exec",
"execpolicy",
"file-search",
"linux-sandbox",
"login",
"mcp-client",
"mcp-server",
"mcp-types",
"ollama",
"protocol",
"protocol-ts",
"tui",
]
resolver = "2"
[workspace.package]
version = "0.0.0"
# Track the edition for all workspace crates in one place. Individual
# crates can still override this value, but keeping it here means new
# crates created with `cargo new -w ...` automatically inherit the 2024
# edition.
edition = "2024"
[workspace.lints]
rust = {}
[workspace.lints.clippy]
expect_used = "deny"
unwrap_used = "deny"
[profile.release]
lto = "fat"
# Because we bundle some of these executables with the TypeScript CLI, we
# remove everything to make the binary as small as possible.
strip = "symbols"
# See https://github.com/openai/codex/issues/1411 for details.
codegen-units = 1
[patch.crates-io]
# ratatui = { path = "../../ratatui" }
ratatui = { git = "https://github.com/nornagon/ratatui", branch = "nornagon-v0.29.0-patch" }
================================================
FILE: codex-rs/README.md
================================================
# Codex CLI (Rust Implementation)
We provide Codex CLI as a standalone, native executable to ensure a zero-dependency install.
## Installing Codex
Today, the easiest way to install Codex is via `npm`, though we plan to publish Codex to other package managers soon.
```shell
npm i -g @openai/codex@native
codex
```
You can also download a platform-specific release directly from our [GitHub Releases](https://github.com/openai/codex/releases).
## What's new in the Rust CLI
While we are [working to close the gap between the TypeScript and Rust implementations of Codex CLI](https://github.com/openai/codex/issues/1262), note that the Rust CLI has a number of features that the TypeScript CLI does not!
### Config
Codex supports a rich set of configuration options. Note that the Rust CLI uses `config.toml` instead of `config.json`. See [`docs/config.md`](../docs/config.md) for details.
### Model Context Protocol Support
Codex CLI functions as an MCP client that can connect to MCP servers on startup. See the [`mcp_servers`](../docs/config.md#mcp_servers) section in the configuration documentation for details.
It is still experimental, but you can also launch Codex as an MCP _server_ by running `codex mcp`. Use the [`@modelcontextprotocol/inspector`](https://github.com/modelcontextprotocol/inspector) to try it out:
```shell
npx @modelcontextprotocol/inspector codex mcp
```
### Notifications
You can enable notifications by configuring a script that is run whenever the agent finishes a turn. The [notify documentation](../docs/config.md#notify) includes a detailed example that explains how to get desktop notifications via [terminal-notifier](https://github.com/julienXX/terminal-notifier) on macOS.
### `codex exec` to run Codex programmatially/non-interactively
To run Codex non-interactively, run `codex exec PROMPT` (you can also pass the prompt via `stdin`) and Codex will work on your task until it decides that it is done and exits. Output is printed to the terminal directly. You can set the `RUST_LOG` environment variable to see more about what's going on.
### Use `@` for file search
Typing `@` triggers a fuzzy-filename search over the workspace root. Use up/down to select among the results and Tab or Enter to replace the `@` with the selected path. You can use Esc to cancel the search.
### Esc–Esc to edit a previous message
When the chat composer is empty, press Esc to prime “backtrack” mode. Press Esc again to open a transcript preview highlighting the last user message; press Esc repeatedly to step to older user messages. Press Enter to confirm and Codex will fork the conversation from that point, trim the visible transcript accordingly, and pre‑fill the composer with the selected user message so you can edit and resubmit it.
In the transcript preview, the footer shows an `Esc edit prev` hint while editing is active.
### `--cd`/`-C` flag
Sometimes it is not convenient to `cd` to the directory you want Codex to use as the "working root" before running Codex. Fortunately, `codex` supports a `--cd` option so you can specify whatever folder you want. You can confirm that Codex is honoring `--cd` by double-checking the **workdir** it reports in the TUI at the start of a new session.
### Shell completions
Generate shell completion scripts via:
```shell
codex completion bash
codex completion zsh
codex completion fish
```
### Experimenting with the Codex Sandbox
To test to see what happens when a command is run under the sandbox provided by Codex, we provide the following subcommands in Codex CLI:
```
# macOS
codex debug seatbelt [--full-auto] [COMMAND]...
# Linux
codex debug landlock [--full-auto] [COMMAND]...
```
### Selecting a sandbox policy via `--sandbox`
The Rust CLI exposes a dedicated `--sandbox` (`-s`) flag that lets you pick the sandbox policy **without** having to reach for the generic `-c/--config` option:
```shell
# Run Codex with the default, read-only sandbox
codex --sandbox read-only
# Allow the agent to write within the current workspace while still blocking network access
codex --sandbox workspace-write
# Danger! Disable sandboxing entirely (only do this if you are already running in a container or other isolated env)
codex --sandbox danger-full-access
```
The same setting can be persisted in `~/.codex/config.toml` via the top-level `sandbox_mode = "MODE"` key, e.g. `sandbox_mode = "workspace-write"`.
## Code Organization
This folder is the root of a Cargo workspace. It contains quite a bit of experimental code, but here are the key crates:
- [`core/`](./core) contains the business logic for Codex. Ultimately, we hope this to be a library crate that is generally useful for building other Rust/native applications that use Codex.
- [`exec/`](./exec) "headless" CLI for use in automation.
- [`tui/`](./tui) CLI that launches a fullscreen TUI built with [Ratatui](https://ratatui.rs/).
- [`cli/`](./cli) CLI multitool that provides the aforementioned CLIs via subcommands.
# Open Router Support
Add the following to your `~/.codex/config.toml` to enable Open Router support:
```toml
[model_providers.openrouter]
name = "OpenRouter"
base_url = "[https://api.openrouter.ai/v1](https://openrouter.ai/api/v1)"
api_key = "OPENROUTER_API_KEY"
[profiles.openrouter]
name = "OpenRouter"
[profiles.openrouter.sandbox]
network_access = true
```
Call `cargo run --bin codex --release -- --profile OpenRouter --model ` to run Codex with Open Router support and model of your choice. See available models at [Open Router](https://openrouter.ai/models).
================================================
FILE: codex-rs/ansi-escape/Cargo.toml
================================================
[package]
edition = "2024"
name = "codex-ansi-escape"
version = { workspace = true }
[lib]
name = "codex_ansi_escape"
path = "src/lib.rs"
[dependencies]
ansi-to-tui = "7.0.0"
ratatui = { version = "0.29.0", features = [
"unstable-rendered-line-info",
"unstable-widget-ref",
] }
tracing = { version = "0.1.41", features = ["log"] }
================================================
FILE: codex-rs/ansi-escape/README.md
================================================
# oai-codex-ansi-escape
Small helper functions that wrap functionality from
:
```rust
pub fn ansi_escape_line(s: &str) -> Line<'static>
pub fn ansi_escape<'a>(s: &'a str) -> Text<'a>
```
Advantages:
- `ansi_to_tui::IntoText` is not in scope for the entire TUI crate
- we `panic!()` and log if `IntoText` returns an `Err` and log it so that
the caller does not have to deal with it
================================================
FILE: codex-rs/ansi-escape/src/lib.rs
================================================
use ansi_to_tui::Error;
use ansi_to_tui::IntoText;
use ratatui::text::Line;
use ratatui::text::Text;
/// This function should be used when the contents of `s` are expected to match
/// a single line. If multiple lines are found, a warning is logged and only the
/// first line is returned.
pub fn ansi_escape_line(s: &str) -> Line<'static> {
let text = ansi_escape(s);
match text.lines.as_slice() {
[] => Line::from(""),
[only] => only.clone(),
[first, rest @ ..] => {
tracing::warn!("ansi_escape_line: expected a single line, got {first:?} and {rest:?}");
first.clone()
}
}
}
pub fn ansi_escape(s: &str) -> Text<'static> {
// to_text() claims to be faster, but introduces complex lifetime issues
// such that it's not worth it.
match s.into_text() {
Ok(text) => text,
Err(err) => match err {
Error::NomError(message) => {
tracing::error!(
"ansi_to_tui NomError docs claim should never happen when parsing `{s}`: {message}"
);
panic!();
}
Error::Utf8Error(utf8error) => {
tracing::error!("Utf8Error: {utf8error}");
panic!();
}
},
}
}
================================================
FILE: codex-rs/apply-patch/Cargo.toml
================================================
[package]
edition = "2024"
name = "codex-apply-patch"
version = { workspace = true }
[lib]
name = "codex_apply_patch"
path = "src/lib.rs"
[[bin]]
name = "apply_patch"
path = "src/main.rs"
[lints]
workspace = true
[dependencies]
anyhow = "1"
similar = "2.7.0"
thiserror = "2.0.12"
tree-sitter = "0.25.8"
tree-sitter-bash = "0.25.0"
[dev-dependencies]
assert_cmd = "2"
pretty_assertions = "1.4.1"
tempfile = "3.13.0"
================================================
FILE: codex-rs/apply-patch/apply_patch_tool_instructions.md
================================================
## `apply_patch`
Use the `apply_patch` shell command to edit files.
Your patch language is a stripped‑down, file‑oriented diff format designed to be easy to parse and safe to apply. You can think of it as a high‑level envelope:
*** Begin Patch
[ one or more file sections ]
*** End Patch
Within that envelope, you get a sequence of file operations.
You MUST include a header to specify the action you are taking.
Each operation starts with one of three headers:
*** Add File: - create a new file. Every following line is a + line (the initial contents).
*** Delete File: - remove an existing file. Nothing follows.
*** Update File: - patch an existing file in place (optionally with a rename).
May be immediately followed by *** Move to: if you want to rename the file.
Then one or more “hunks”, each introduced by @@ (optionally followed by a hunk header).
Within a hunk each line starts with:
For instructions on [context_before] and [context_after]:
- By default, show 3 lines of code immediately above and 3 lines immediately below each change. If a change is within 3 lines of a previous change, do NOT duplicate the first change’s [context_after] lines in the second change’s [context_before] lines.
- If 3 lines of context is insufficient to uniquely identify the snippet of code within the file, use the @@ operator to indicate the class or function to which the snippet belongs. For instance, we might have:
@@ class BaseClass
[3 lines of pre-context]
- [old_code]
+ [new_code]
[3 lines of post-context]
- If a code block is repeated so many times in a class or function such that even a single `@@` statement and 3 lines of context cannot uniquely identify the snippet of code, you can use multiple `@@` statements to jump to the right context. For instance:
@@ class BaseClass
@@ def method():
[3 lines of pre-context]
- [old_code]
+ [new_code]
[3 lines of post-context]
The full grammar definition is below:
Patch := Begin { FileOp } End
Begin := "*** Begin Patch" NEWLINE
End := "*** End Patch" NEWLINE
FileOp := AddFile | DeleteFile | UpdateFile
AddFile := "*** Add File: " path NEWLINE { "+" line NEWLINE }
DeleteFile := "*** Delete File: " path NEWLINE
UpdateFile := "*** Update File: " path NEWLINE [ MoveTo ] { Hunk }
MoveTo := "*** Move to: " newPath NEWLINE
Hunk := "@@" [ header ] NEWLINE { HunkLine } [ "*** End of File" NEWLINE ]
HunkLine := (" " | "-" | "+") text NEWLINE
A full patch can combine several operations:
*** Begin Patch
*** Add File: hello.txt
+Hello world
*** Update File: src/app.py
*** Move to: src/main.py
@@ def greet():
-print("Hi")
+print("Hello, world!")
*** Delete File: obsolete.txt
*** End Patch
It is important to remember:
- You must include a header with your intended action (Add/Delete/Update)
- You must prefix new lines with `+` even when creating a new file
- File references can only be relative, NEVER ABSOLUTE.
You can invoke apply_patch like:
```
shell {"command":["apply_patch","*** Begin Patch\n*** Add File: hello.txt\n+Hello, world!\n*** End Patch\n"]}
```
================================================
FILE: codex-rs/apply-patch/src/lib.rs
================================================
mod parser;
mod seek_sequence;
mod standalone_executable;
use std::collections::HashMap;
use std::path::Path;
use std::path::PathBuf;
use std::str::Utf8Error;
use anyhow::Context;
use anyhow::Result;
pub use parser::Hunk;
pub use parser::ParseError;
use parser::ParseError::*;
use parser::UpdateFileChunk;
pub use parser::parse_patch;
use similar::TextDiff;
use thiserror::Error;
use tree_sitter::LanguageError;
use tree_sitter::Parser;
use tree_sitter_bash::LANGUAGE as BASH;
pub use standalone_executable::main;
/// Detailed instructions for gpt-4.1 on how to use the `apply_patch` tool.
pub const APPLY_PATCH_TOOL_INSTRUCTIONS: &str = include_str!("../apply_patch_tool_instructions.md");
const APPLY_PATCH_COMMANDS: [&str; 2] = ["apply_patch", "applypatch"];
#[derive(Debug, Error, PartialEq)]
pub enum ApplyPatchError {
#[error(transparent)]
ParseError(#[from] ParseError),
#[error(transparent)]
IoError(#[from] IoError),
/// Error that occurs while computing replacements when applying patch chunks
#[error("{0}")]
ComputeReplacements(String),
}
impl From for ApplyPatchError {
fn from(err: std::io::Error) -> Self {
ApplyPatchError::IoError(IoError {
context: "I/O error".to_string(),
source: err,
})
}
}
impl From<&std::io::Error> for ApplyPatchError {
fn from(err: &std::io::Error) -> Self {
ApplyPatchError::IoError(IoError {
context: "I/O error".to_string(),
source: std::io::Error::new(err.kind(), err.to_string()),
})
}
}
#[derive(Debug, Error)]
#[error("{context}: {source}")]
pub struct IoError {
context: String,
#[source]
source: std::io::Error,
}
impl PartialEq for IoError {
fn eq(&self, other: &Self) -> bool {
self.context == other.context && self.source.to_string() == other.source.to_string()
}
}
#[derive(Debug, PartialEq)]
pub enum MaybeApplyPatch {
Body(ApplyPatchArgs),
ShellParseError(ExtractHeredocError),
PatchParseError(ParseError),
NotApplyPatch,
}
/// Both the raw PATCH argument to `apply_patch` as well as the PATCH argument
/// parsed into hunks.
#[derive(Debug, PartialEq)]
pub struct ApplyPatchArgs {
pub patch: String,
pub hunks: Vec,
}
pub fn maybe_parse_apply_patch(argv: &[String]) -> MaybeApplyPatch {
match argv {
[cmd, body] if APPLY_PATCH_COMMANDS.contains(&cmd.as_str()) => match parse_patch(body) {
Ok(source) => MaybeApplyPatch::Body(source),
Err(e) => MaybeApplyPatch::PatchParseError(e),
},
[bash, flag, script]
if bash == "bash"
&& flag == "-lc"
&& APPLY_PATCH_COMMANDS
.iter()
.any(|cmd| script.trim_start().starts_with(cmd)) =>
{
match extract_heredoc_body_from_apply_patch_command(script) {
Ok(body) => match parse_patch(&body) {
Ok(source) => MaybeApplyPatch::Body(source),
Err(e) => MaybeApplyPatch::PatchParseError(e),
},
Err(e) => MaybeApplyPatch::ShellParseError(e),
}
}
_ => MaybeApplyPatch::NotApplyPatch,
}
}
#[derive(Debug, PartialEq)]
pub enum ApplyPatchFileChange {
Add {
content: String,
},
Delete,
Update {
unified_diff: String,
move_path: Option,
/// new_content that will result after the unified_diff is applied.
new_content: String,
},
}
#[derive(Debug, PartialEq)]
pub enum MaybeApplyPatchVerified {
/// `argv` corresponded to an `apply_patch` invocation, and these are the
/// resulting proposed file changes.
Body(ApplyPatchAction),
/// `argv` could not be parsed to determine whether it corresponds to an
/// `apply_patch` invocation.
ShellParseError(ExtractHeredocError),
/// `argv` corresponded to an `apply_patch` invocation, but it could not
/// be fulfilled due to the specified error.
CorrectnessError(ApplyPatchError),
/// `argv` decidedly did not correspond to an `apply_patch` invocation.
NotApplyPatch,
}
/// ApplyPatchAction is the result of parsing an `apply_patch` command. By
/// construction, all paths should be absolute paths.
#[derive(Debug, PartialEq)]
pub struct ApplyPatchAction {
changes: HashMap,
/// The raw patch argument that can be used with `apply_patch` as an exec
/// call. i.e., if the original arg was parsed in "lenient" mode with a
/// heredoc, this should be the value without the heredoc wrapper.
pub patch: String,
/// The working directory that was used to resolve relative paths in the patch.
pub cwd: PathBuf,
}
impl ApplyPatchAction {
pub fn is_empty(&self) -> bool {
self.changes.is_empty()
}
/// Returns the changes that would be made by applying the patch.
pub fn changes(&self) -> &HashMap {
&self.changes
}
/// Should be used exclusively for testing. (Not worth the overhead of
/// creating a feature flag for this.)
pub fn new_add_for_test(path: &Path, content: String) -> Self {
if !path.is_absolute() {
panic!("path must be absolute");
}
#[expect(clippy::expect_used)]
let filename = path
.file_name()
.expect("path should not be empty")
.to_string_lossy();
let patch = format!(
r#"*** Begin Patch
*** Update File: {filename}
@@
+ {content}
*** End Patch"#,
);
let changes = HashMap::from([(path.to_path_buf(), ApplyPatchFileChange::Add { content })]);
#[expect(clippy::expect_used)]
Self {
changes,
cwd: path
.parent()
.expect("path should have parent")
.to_path_buf(),
patch,
}
}
}
/// cwd must be an absolute path so that we can resolve relative paths in the
/// patch.
pub fn maybe_parse_apply_patch_verified(argv: &[String], cwd: &Path) -> MaybeApplyPatchVerified {
match maybe_parse_apply_patch(argv) {
MaybeApplyPatch::Body(ApplyPatchArgs { patch, hunks }) => {
let mut changes = HashMap::new();
for hunk in hunks {
let path = hunk.resolve_path(cwd);
match hunk {
Hunk::AddFile { contents, .. } => {
changes.insert(path, ApplyPatchFileChange::Add { content: contents });
}
Hunk::DeleteFile { .. } => {
changes.insert(path, ApplyPatchFileChange::Delete);
}
Hunk::UpdateFile {
move_path, chunks, ..
} => {
let ApplyPatchFileUpdate {
unified_diff,
content: contents,
} = match unified_diff_from_chunks(&path, &chunks) {
Ok(diff) => diff,
Err(e) => {
return MaybeApplyPatchVerified::CorrectnessError(e);
}
};
changes.insert(
path,
ApplyPatchFileChange::Update {
unified_diff,
move_path: move_path.map(|p| cwd.join(p)),
new_content: contents,
},
);
}
}
}
MaybeApplyPatchVerified::Body(ApplyPatchAction {
changes,
patch,
cwd: cwd.to_path_buf(),
})
}
MaybeApplyPatch::ShellParseError(e) => MaybeApplyPatchVerified::ShellParseError(e),
MaybeApplyPatch::PatchParseError(e) => MaybeApplyPatchVerified::CorrectnessError(e.into()),
MaybeApplyPatch::NotApplyPatch => MaybeApplyPatchVerified::NotApplyPatch,
}
}
/// Attempts to extract a heredoc_body object from a string bash command like:
/// Optimistically
///
/// ```bash
/// bash -lc 'apply_patch < std::result::Result {
if !APPLY_PATCH_COMMANDS
.iter()
.any(|cmd| src.trim_start().starts_with(cmd))
{
return Err(ExtractHeredocError::CommandDidNotStartWithApplyPatch);
}
let lang = BASH.into();
let mut parser = Parser::new();
parser
.set_language(&lang)
.map_err(ExtractHeredocError::FailedToLoadBashGrammar)?;
let tree = parser
.parse(src, None)
.ok_or(ExtractHeredocError::FailedToParsePatchIntoAst)?;
let bytes = src.as_bytes();
let mut c = tree.root_node().walk();
loop {
let node = c.node();
if node.kind() == "heredoc_body" {
let text = node
.utf8_text(bytes)
.map_err(ExtractHeredocError::HeredocNotUtf8)?;
return Ok(text.trim_end_matches('\n').to_owned());
}
if c.goto_first_child() {
continue;
}
while !c.goto_next_sibling() {
if !c.goto_parent() {
return Err(ExtractHeredocError::FailedToFindHeredocBody);
}
}
}
}
#[derive(Debug, PartialEq)]
pub enum ExtractHeredocError {
CommandDidNotStartWithApplyPatch,
FailedToLoadBashGrammar(LanguageError),
HeredocNotUtf8(Utf8Error),
FailedToParsePatchIntoAst,
FailedToFindHeredocBody,
}
/// Applies the patch and prints the result to stdout/stderr.
pub fn apply_patch(
patch: &str,
stdout: &mut impl std::io::Write,
stderr: &mut impl std::io::Write,
) -> Result<(), ApplyPatchError> {
let hunks = match parse_patch(patch) {
Ok(source) => source.hunks,
Err(e) => {
match &e {
InvalidPatchError(message) => {
writeln!(stderr, "Invalid patch: {message}").map_err(ApplyPatchError::from)?;
}
InvalidHunkError {
message,
line_number,
} => {
writeln!(
stderr,
"Invalid patch hunk on line {line_number}: {message}"
)
.map_err(ApplyPatchError::from)?;
}
}
return Err(ApplyPatchError::ParseError(e));
}
};
apply_hunks(&hunks, stdout, stderr)?;
Ok(())
}
/// Applies hunks and continues to update stdout/stderr
pub fn apply_hunks(
hunks: &[Hunk],
stdout: &mut impl std::io::Write,
stderr: &mut impl std::io::Write,
) -> Result<(), ApplyPatchError> {
let _existing_paths: Vec<&Path> = hunks
.iter()
.filter_map(|hunk| match hunk {
Hunk::AddFile { .. } => {
// The file is being added, so it doesn't exist yet.
None
}
Hunk::DeleteFile { path } => Some(path.as_path()),
Hunk::UpdateFile {
path, move_path, ..
} => match move_path {
Some(move_path) => {
if std::fs::metadata(move_path)
.map(|m| m.is_file())
.unwrap_or(false)
{
Some(move_path.as_path())
} else {
None
}
}
None => Some(path.as_path()),
},
})
.collect::>();
// Delegate to a helper that applies each hunk to the filesystem.
match apply_hunks_to_files(hunks) {
Ok(affected) => {
print_summary(&affected, stdout).map_err(ApplyPatchError::from)?;
Ok(())
}
Err(err) => {
let msg = err.to_string();
writeln!(stderr, "{msg}").map_err(ApplyPatchError::from)?;
if let Some(io) = err.downcast_ref::() {
Err(ApplyPatchError::from(io))
} else {
Err(ApplyPatchError::IoError(IoError {
context: msg,
source: std::io::Error::other(err),
}))
}
}
}
}
/// Applies each parsed patch hunk to the filesystem.
/// Returns an error if any of the changes could not be applied.
/// Tracks file paths affected by applying a patch.
pub struct AffectedPaths {
pub added: Vec,
pub modified: Vec,
pub deleted: Vec,
}
/// Apply the hunks to the filesystem, returning which files were added, modified, or deleted.
/// Returns an error if the patch could not be applied.
fn apply_hunks_to_files(hunks: &[Hunk]) -> anyhow::Result {
if hunks.is_empty() {
anyhow::bail!("No files were modified.");
}
let mut added: Vec = Vec::new();
let mut modified: Vec = Vec::new();
let mut deleted: Vec = Vec::new();
for hunk in hunks {
match hunk {
Hunk::AddFile { path, contents } => {
if let Some(parent) = path.parent()
&& !parent.as_os_str().is_empty()
{
std::fs::create_dir_all(parent).with_context(|| {
format!("Failed to create parent directories for {}", path.display())
})?;
}
std::fs::write(path, contents)
.with_context(|| format!("Failed to write file {}", path.display()))?;
added.push(path.clone());
}
Hunk::DeleteFile { path } => {
std::fs::remove_file(path)
.with_context(|| format!("Failed to delete file {}", path.display()))?;
deleted.push(path.clone());
}
Hunk::UpdateFile {
path,
move_path,
chunks,
} => {
let AppliedPatch { new_contents, .. } =
derive_new_contents_from_chunks(path, chunks)?;
if let Some(dest) = move_path {
if let Some(parent) = dest.parent()
&& !parent.as_os_str().is_empty()
{
std::fs::create_dir_all(parent).with_context(|| {
format!("Failed to create parent directories for {}", dest.display())
})?;
}
std::fs::write(dest, new_contents)
.with_context(|| format!("Failed to write file {}", dest.display()))?;
std::fs::remove_file(path)
.with_context(|| format!("Failed to remove original {}", path.display()))?;
modified.push(dest.clone());
} else {
std::fs::write(path, new_contents)
.with_context(|| format!("Failed to write file {}", path.display()))?;
modified.push(path.clone());
}
}
}
}
Ok(AffectedPaths {
added,
modified,
deleted,
})
}
struct AppliedPatch {
original_contents: String,
new_contents: String,
}
/// Return *only* the new file contents (joined into a single `String`) after
/// applying the chunks to the file at `path`.
fn derive_new_contents_from_chunks(
path: &Path,
chunks: &[UpdateFileChunk],
) -> std::result::Result {
let original_contents = match std::fs::read_to_string(path) {
Ok(contents) => contents,
Err(err) => {
return Err(ApplyPatchError::IoError(IoError {
context: format!("Failed to read file to update {}", path.display()),
source: err,
}));
}
};
let mut original_lines: Vec = original_contents
.split('\n')
.map(|s| s.to_string())
.collect();
// Drop the trailing empty element that results from the final newline so
// that line counts match the behaviour of standard `diff`.
if original_lines.last().is_some_and(|s| s.is_empty()) {
original_lines.pop();
}
let replacements = compute_replacements(&original_lines, path, chunks)?;
let new_lines = apply_replacements(original_lines, &replacements);
let mut new_lines = new_lines;
if !new_lines.last().is_some_and(|s| s.is_empty()) {
new_lines.push(String::new());
}
let new_contents = new_lines.join("\n");
Ok(AppliedPatch {
original_contents,
new_contents,
})
}
/// Compute a list of replacements needed to transform `original_lines` into the
/// new lines, given the patch `chunks`. Each replacement is returned as
/// `(start_index, old_len, new_lines)`.
fn compute_replacements(
original_lines: &[String],
path: &Path,
chunks: &[UpdateFileChunk],
) -> std::result::Result)>, ApplyPatchError> {
let mut replacements: Vec<(usize, usize, Vec)> = Vec::new();
let mut line_index: usize = 0;
for chunk in chunks {
// If a chunk has a `change_context`, we use seek_sequence to find it, then
// adjust our `line_index` to continue from there.
if let Some(ctx_line) = &chunk.change_context {
if let Some(idx) = seek_sequence::seek_sequence(
original_lines,
std::slice::from_ref(ctx_line),
line_index,
false,
) {
line_index = idx + 1;
} else {
return Err(ApplyPatchError::ComputeReplacements(format!(
"Failed to find context '{}' in {}",
ctx_line,
path.display()
)));
}
}
if chunk.old_lines.is_empty() {
// Pure addition (no old lines). We'll add them at the end or just
// before the final empty line if one exists.
let insertion_idx = if original_lines.last().is_some_and(|s| s.is_empty()) {
original_lines.len() - 1
} else {
original_lines.len()
};
replacements.push((insertion_idx, 0, chunk.new_lines.clone()));
continue;
}
// Otherwise, try to match the existing lines in the file with the old lines
// from the chunk. If found, schedule that region for replacement.
// Attempt to locate the `old_lines` verbatim within the file. In many
// real‑world diffs the last element of `old_lines` is an *empty* string
// representing the terminating newline of the region being replaced.
// This sentinel is not present in `original_lines` because we strip the
// trailing empty slice emitted by `split('\n')`. If a direct search
// fails and the pattern ends with an empty string, retry without that
// final element so that modifications touching the end‑of‑file can be
// located reliably.
let mut pattern: &[String] = &chunk.old_lines;
let mut found =
seek_sequence::seek_sequence(original_lines, pattern, line_index, chunk.is_end_of_file);
let mut new_slice: &[String] = &chunk.new_lines;
if found.is_none() && pattern.last().is_some_and(|s| s.is_empty()) {
// Retry without the trailing empty line which represents the final
// newline in the file.
pattern = &pattern[..pattern.len() - 1];
if new_slice.last().is_some_and(|s| s.is_empty()) {
new_slice = &new_slice[..new_slice.len() - 1];
}
found = seek_sequence::seek_sequence(
original_lines,
pattern,
line_index,
chunk.is_end_of_file,
);
}
if let Some(start_idx) = found {
replacements.push((start_idx, pattern.len(), new_slice.to_vec()));
line_index = start_idx + pattern.len();
} else {
return Err(ApplyPatchError::ComputeReplacements(format!(
"Failed to find expected lines {:?} in {}",
chunk.old_lines,
path.display()
)));
}
}
Ok(replacements)
}
/// Apply the `(start_index, old_len, new_lines)` replacements to `original_lines`,
/// returning the modified file contents as a vector of lines.
fn apply_replacements(
mut lines: Vec,
replacements: &[(usize, usize, Vec)],
) -> Vec {
// We must apply replacements in descending order so that earlier replacements
// don't shift the positions of later ones.
for (start_idx, old_len, new_segment) in replacements.iter().rev() {
let start_idx = *start_idx;
let old_len = *old_len;
// Remove old lines.
for _ in 0..old_len {
if start_idx < lines.len() {
lines.remove(start_idx);
}
}
// Insert new lines.
for (offset, new_line) in new_segment.iter().enumerate() {
lines.insert(start_idx + offset, new_line.clone());
}
}
lines
}
/// Intended result of a file update for apply_patch.
#[derive(Debug, Eq, PartialEq)]
pub struct ApplyPatchFileUpdate {
unified_diff: String,
content: String,
}
pub fn unified_diff_from_chunks(
path: &Path,
chunks: &[UpdateFileChunk],
) -> std::result::Result {
unified_diff_from_chunks_with_context(path, chunks, 1)
}
pub fn unified_diff_from_chunks_with_context(
path: &Path,
chunks: &[UpdateFileChunk],
context: usize,
) -> std::result::Result {
let AppliedPatch {
original_contents,
new_contents,
} = derive_new_contents_from_chunks(path, chunks)?;
let text_diff = TextDiff::from_lines(&original_contents, &new_contents);
let unified_diff = text_diff.unified_diff().context_radius(context).to_string();
Ok(ApplyPatchFileUpdate {
unified_diff,
content: new_contents,
})
}
/// Print the summary of changes in git-style format.
/// Write a summary of changes to the given writer.
pub fn print_summary(
affected: &AffectedPaths,
out: &mut impl std::io::Write,
) -> std::io::Result<()> {
writeln!(out, "Success. Updated the following files:")?;
for path in &affected.added {
writeln!(out, "A {}", path.display())?;
}
for path in &affected.modified {
writeln!(out, "M {}", path.display())?;
}
for path in &affected.deleted {
writeln!(out, "D {}", path.display())?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use std::fs;
use tempfile::tempdir;
/// Helper to construct a patch with the given body.
fn wrap_patch(body: &str) -> String {
format!("*** Begin Patch\n{body}\n*** End Patch")
}
fn strs_to_strings(strs: &[&str]) -> Vec {
strs.iter().map(|s| s.to_string()).collect()
}
#[test]
fn test_literal() {
let args = strs_to_strings(&[
"apply_patch",
r#"*** Begin Patch
*** Add File: foo
+hi
*** End Patch
"#,
]);
match maybe_parse_apply_patch(&args) {
MaybeApplyPatch::Body(ApplyPatchArgs { hunks, patch: _ }) => {
assert_eq!(
hunks,
vec![Hunk::AddFile {
path: PathBuf::from("foo"),
contents: "hi\n".to_string()
}]
);
}
result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
}
}
#[test]
fn test_literal_applypatch() {
let args = strs_to_strings(&[
"applypatch",
r#"*** Begin Patch
*** Add File: foo
+hi
*** End Patch
"#,
]);
match maybe_parse_apply_patch(&args) {
MaybeApplyPatch::Body(ApplyPatchArgs { hunks, patch: _ }) => {
assert_eq!(
hunks,
vec![Hunk::AddFile {
path: PathBuf::from("foo"),
contents: "hi\n".to_string()
}]
);
}
result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
}
}
#[test]
fn test_heredoc() {
let args = strs_to_strings(&[
"bash",
"-lc",
r#"apply_patch <<'PATCH'
*** Begin Patch
*** Add File: foo
+hi
*** End Patch
PATCH"#,
]);
match maybe_parse_apply_patch(&args) {
MaybeApplyPatch::Body(ApplyPatchArgs { hunks, patch: _ }) => {
assert_eq!(
hunks,
vec![Hunk::AddFile {
path: PathBuf::from("foo"),
contents: "hi\n".to_string()
}]
);
}
result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
}
}
#[test]
fn test_heredoc_applypatch() {
let args = strs_to_strings(&[
"bash",
"-lc",
r#"applypatch <<'PATCH'
*** Begin Patch
*** Add File: foo
+hi
*** End Patch
PATCH"#,
]);
match maybe_parse_apply_patch(&args) {
MaybeApplyPatch::Body(ApplyPatchArgs { hunks, patch: _ }) => {
assert_eq!(
hunks,
vec![Hunk::AddFile {
path: PathBuf::from("foo"),
contents: "hi\n".to_string()
}]
);
}
result => panic!("expected MaybeApplyPatch::Body got {result:?}"),
}
}
#[test]
fn test_add_file_hunk_creates_file_with_contents() {
let dir = tempdir().unwrap();
let path = dir.path().join("add.txt");
let patch = wrap_patch(&format!(
r#"*** Add File: {}
+ab
+cd"#,
path.display()
));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
// Verify expected stdout and stderr outputs.
let stdout_str = String::from_utf8(stdout).unwrap();
let stderr_str = String::from_utf8(stderr).unwrap();
let expected_out = format!(
"Success. Updated the following files:\nA {}\n",
path.display()
);
assert_eq!(stdout_str, expected_out);
assert_eq!(stderr_str, "");
let contents = fs::read_to_string(path).unwrap();
assert_eq!(contents, "ab\ncd\n");
}
#[test]
fn test_delete_file_hunk_removes_file() {
let dir = tempdir().unwrap();
let path = dir.path().join("del.txt");
fs::write(&path, "x").unwrap();
let patch = wrap_patch(&format!("*** Delete File: {}", path.display()));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
let stdout_str = String::from_utf8(stdout).unwrap();
let stderr_str = String::from_utf8(stderr).unwrap();
let expected_out = format!(
"Success. Updated the following files:\nD {}\n",
path.display()
);
assert_eq!(stdout_str, expected_out);
assert_eq!(stderr_str, "");
assert!(!path.exists());
}
#[test]
fn test_update_file_hunk_modifies_content() {
let dir = tempdir().unwrap();
let path = dir.path().join("update.txt");
fs::write(&path, "foo\nbar\n").unwrap();
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
foo
-bar
+baz"#,
path.display()
));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
// Validate modified file contents and expected stdout/stderr.
let stdout_str = String::from_utf8(stdout).unwrap();
let stderr_str = String::from_utf8(stderr).unwrap();
let expected_out = format!(
"Success. Updated the following files:\nM {}\n",
path.display()
);
assert_eq!(stdout_str, expected_out);
assert_eq!(stderr_str, "");
let contents = fs::read_to_string(&path).unwrap();
assert_eq!(contents, "foo\nbaz\n");
}
#[test]
fn test_update_file_hunk_can_move_file() {
let dir = tempdir().unwrap();
let src = dir.path().join("src.txt");
let dest = dir.path().join("dst.txt");
fs::write(&src, "line\n").unwrap();
let patch = wrap_patch(&format!(
r#"*** Update File: {}
*** Move to: {}
@@
-line
+line2"#,
src.display(),
dest.display()
));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
// Validate move semantics and expected stdout/stderr.
let stdout_str = String::from_utf8(stdout).unwrap();
let stderr_str = String::from_utf8(stderr).unwrap();
let expected_out = format!(
"Success. Updated the following files:\nM {}\n",
dest.display()
);
assert_eq!(stdout_str, expected_out);
assert_eq!(stderr_str, "");
assert!(!src.exists());
let contents = fs::read_to_string(&dest).unwrap();
assert_eq!(contents, "line2\n");
}
/// Verify that a single `Update File` hunk with multiple change chunks can update different
/// parts of a file and that the file is listed only once in the summary.
#[test]
fn test_multiple_update_chunks_apply_to_single_file() {
// Start with a file containing four lines.
let dir = tempdir().unwrap();
let path = dir.path().join("multi.txt");
fs::write(&path, "foo\nbar\nbaz\nqux\n").unwrap();
// Construct an update patch with two separate change chunks.
// The first chunk uses the line `foo` as context and transforms `bar` into `BAR`.
// The second chunk uses `baz` as context and transforms `qux` into `QUX`.
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
foo
-bar
+BAR
@@
baz
-qux
+QUX"#,
path.display()
));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
let stdout_str = String::from_utf8(stdout).unwrap();
let stderr_str = String::from_utf8(stderr).unwrap();
let expected_out = format!(
"Success. Updated the following files:\nM {}\n",
path.display()
);
assert_eq!(stdout_str, expected_out);
assert_eq!(stderr_str, "");
let contents = fs::read_to_string(&path).unwrap();
assert_eq!(contents, "foo\nBAR\nbaz\nQUX\n");
}
/// A more involved `Update File` hunk that exercises additions, deletions and
/// replacements in separate chunks that appear in non‑adjacent parts of the
/// file. Verifies that all edits are applied and that the summary lists the
/// file only once.
#[test]
fn test_update_file_hunk_interleaved_changes() {
let dir = tempdir().unwrap();
let path = dir.path().join("interleaved.txt");
// Original file: six numbered lines.
fs::write(&path, "a\nb\nc\nd\ne\nf\n").unwrap();
// Patch performs:
// • Replace `b` → `B`
// • Replace `e` → `E` (using surrounding context)
// • Append new line `g` at the end‑of‑file
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
a
-b
+B
@@
c
d
-e
+E
@@
f
+g
*** End of File"#,
path.display()
));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
let stdout_str = String::from_utf8(stdout).unwrap();
let stderr_str = String::from_utf8(stderr).unwrap();
let expected_out = format!(
"Success. Updated the following files:\nM {}\n",
path.display()
);
assert_eq!(stdout_str, expected_out);
assert_eq!(stderr_str, "");
let contents = fs::read_to_string(&path).unwrap();
assert_eq!(contents, "a\nB\nc\nd\nE\nf\ng\n");
}
/// Ensure that patches authored with ASCII characters can update lines that
/// contain typographic Unicode punctuation (e.g. EN DASH, NON-BREAKING
/// HYPHEN). Historically `git apply` succeeds in such scenarios but our
/// internal matcher failed requiring an exact byte-for-byte match. The
/// fuzzy-matching pass that normalises common punctuation should now bridge
/// the gap.
#[test]
fn test_update_line_with_unicode_dash() {
let dir = tempdir().unwrap();
let path = dir.path().join("unicode.py");
// Original line contains EN DASH (\u{2013}) and NON-BREAKING HYPHEN (\u{2011}).
let original = "import asyncio # local import \u{2013} avoids top\u{2011}level dep\n";
std::fs::write(&path, original).unwrap();
// Patch uses plain ASCII dash / hyphen.
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
-import asyncio # local import - avoids top-level dep
+import asyncio # HELLO"#,
path.display()
));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
// File should now contain the replaced comment.
let expected = "import asyncio # HELLO\n";
let contents = std::fs::read_to_string(&path).unwrap();
assert_eq!(contents, expected);
// Ensure success summary lists the file as modified.
let stdout_str = String::from_utf8(stdout).unwrap();
let expected_out = format!(
"Success. Updated the following files:\nM {}\n",
path.display()
);
assert_eq!(stdout_str, expected_out);
// No stderr expected.
assert_eq!(String::from_utf8(stderr).unwrap(), "");
}
#[test]
fn test_unified_diff() {
// Start with a file containing four lines.
let dir = tempdir().unwrap();
let path = dir.path().join("multi.txt");
fs::write(&path, "foo\nbar\nbaz\nqux\n").unwrap();
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
foo
-bar
+BAR
@@
baz
-qux
+QUX"#,
path.display()
));
let patch = parse_patch(&patch).unwrap();
let update_file_chunks = match patch.hunks.as_slice() {
[Hunk::UpdateFile { chunks, .. }] => chunks,
_ => panic!("Expected a single UpdateFile hunk"),
};
let diff = unified_diff_from_chunks(&path, update_file_chunks).unwrap();
let expected_diff = r#"@@ -1,4 +1,4 @@
foo
-bar
+BAR
baz
-qux
+QUX
"#;
let expected = ApplyPatchFileUpdate {
unified_diff: expected_diff.to_string(),
content: "foo\nBAR\nbaz\nQUX\n".to_string(),
};
assert_eq!(expected, diff);
}
#[test]
fn test_unified_diff_first_line_replacement() {
// Replace the very first line of the file.
let dir = tempdir().unwrap();
let path = dir.path().join("first.txt");
fs::write(&path, "foo\nbar\nbaz\n").unwrap();
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
-foo
+FOO
bar
"#,
path.display()
));
let patch = parse_patch(&patch).unwrap();
let chunks = match patch.hunks.as_slice() {
[Hunk::UpdateFile { chunks, .. }] => chunks,
_ => panic!("Expected a single UpdateFile hunk"),
};
let diff = unified_diff_from_chunks(&path, chunks).unwrap();
let expected_diff = r#"@@ -1,2 +1,2 @@
-foo
+FOO
bar
"#;
let expected = ApplyPatchFileUpdate {
unified_diff: expected_diff.to_string(),
content: "FOO\nbar\nbaz\n".to_string(),
};
assert_eq!(expected, diff);
}
#[test]
fn test_unified_diff_last_line_replacement() {
// Replace the very last line of the file.
let dir = tempdir().unwrap();
let path = dir.path().join("last.txt");
fs::write(&path, "foo\nbar\nbaz\n").unwrap();
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
foo
bar
-baz
+BAZ
"#,
path.display()
));
let patch = parse_patch(&patch).unwrap();
let chunks = match patch.hunks.as_slice() {
[Hunk::UpdateFile { chunks, .. }] => chunks,
_ => panic!("Expected a single UpdateFile hunk"),
};
let diff = unified_diff_from_chunks(&path, chunks).unwrap();
let expected_diff = r#"@@ -2,2 +2,2 @@
bar
-baz
+BAZ
"#;
let expected = ApplyPatchFileUpdate {
unified_diff: expected_diff.to_string(),
content: "foo\nbar\nBAZ\n".to_string(),
};
assert_eq!(expected, diff);
}
#[test]
fn test_unified_diff_insert_at_eof() {
// Insert a new line at end‑of‑file.
let dir = tempdir().unwrap();
let path = dir.path().join("insert.txt");
fs::write(&path, "foo\nbar\nbaz\n").unwrap();
let patch = wrap_patch(&format!(
r#"*** Update File: {}
@@
+quux
*** End of File
"#,
path.display()
));
let patch = parse_patch(&patch).unwrap();
let chunks = match patch.hunks.as_slice() {
[Hunk::UpdateFile { chunks, .. }] => chunks,
_ => panic!("Expected a single UpdateFile hunk"),
};
let diff = unified_diff_from_chunks(&path, chunks).unwrap();
let expected_diff = r#"@@ -3 +3,2 @@
baz
+quux
"#;
let expected = ApplyPatchFileUpdate {
unified_diff: expected_diff.to_string(),
content: "foo\nbar\nbaz\nquux\n".to_string(),
};
assert_eq!(expected, diff);
}
#[test]
fn test_unified_diff_interleaved_changes() {
// Original file with six lines.
let dir = tempdir().unwrap();
let path = dir.path().join("interleaved.txt");
fs::write(&path, "a\nb\nc\nd\ne\nf\n").unwrap();
// Patch replaces two separate lines and appends a new one at EOF using
// three distinct chunks.
let patch_body = format!(
r#"*** Update File: {}
@@
a
-b
+B
@@
d
-e
+E
@@
f
+g
*** End of File"#,
path.display()
);
let patch = wrap_patch(&patch_body);
// Extract chunks then build the unified diff.
let parsed = parse_patch(&patch).unwrap();
let chunks = match parsed.hunks.as_slice() {
[Hunk::UpdateFile { chunks, .. }] => chunks,
_ => panic!("Expected a single UpdateFile hunk"),
};
let diff = unified_diff_from_chunks(&path, chunks).unwrap();
let expected_diff = r#"@@ -1,6 +1,7 @@
a
-b
+B
c
d
-e
+E
f
+g
"#;
let expected = ApplyPatchFileUpdate {
unified_diff: expected_diff.to_string(),
content: "a\nB\nc\nd\nE\nf\ng\n".to_string(),
};
assert_eq!(expected, diff);
let mut stdout = Vec::new();
let mut stderr = Vec::new();
apply_patch(&patch, &mut stdout, &mut stderr).unwrap();
let contents = fs::read_to_string(path).unwrap();
assert_eq!(
contents,
r#"a
B
c
d
E
f
g
"#
);
}
#[test]
fn test_apply_patch_should_resolve_absolute_paths_in_cwd() {
let session_dir = tempdir().unwrap();
let relative_path = "source.txt";
// Note that we need this file to exist for the patch to be "verified"
// and parsed correctly.
let session_file_path = session_dir.path().join(relative_path);
fs::write(&session_file_path, "session directory content\n").unwrap();
let argv = vec![
"apply_patch".to_string(),
r#"*** Begin Patch
*** Update File: source.txt
@@
-session directory content
+updated session directory content
*** End Patch"#
.to_string(),
];
let result = maybe_parse_apply_patch_verified(&argv, session_dir.path());
// Verify the patch contents - as otherwise we may have pulled contents
// from the wrong file (as we're using relative paths)
assert_eq!(
result,
MaybeApplyPatchVerified::Body(ApplyPatchAction {
changes: HashMap::from([(
session_dir.path().join(relative_path),
ApplyPatchFileChange::Update {
unified_diff: r#"@@ -1 +1 @@
-session directory content
+updated session directory content
"#
.to_string(),
move_path: None,
new_content: "updated session directory content\n".to_string(),
},
)]),
patch: argv[1].clone(),
cwd: session_dir.path().to_path_buf(),
})
);
}
#[test]
fn test_apply_patch_fails_on_write_error() {
let dir = tempdir().unwrap();
let path = dir.path().join("readonly.txt");
fs::write(&path, "before\n").unwrap();
let mut perms = fs::metadata(&path).unwrap().permissions();
perms.set_readonly(true);
fs::set_permissions(&path, perms).unwrap();
let patch = wrap_patch(&format!(
"*** Update File: {}\n@@\n-before\n+after\n*** End Patch",
path.display()
));
let mut stdout = Vec::new();
let mut stderr = Vec::new();
let result = apply_patch(&patch, &mut stdout, &mut stderr);
assert!(result.is_err());
}
}
================================================
FILE: codex-rs/apply-patch/src/main.rs
================================================
pub fn main() -> ! {
codex_apply_patch::main()
}
================================================
FILE: codex-rs/apply-patch/src/parser.rs
================================================
//! This module is responsible for parsing & validating a patch into a list of "hunks".
//! (It does not attempt to actually check that the patch can be applied to the filesystem.)
//!
//! The official Lark grammar for the apply-patch format is:
//!
//! start: begin_patch hunk+ end_patch
//! begin_patch: "*** Begin Patch" LF
//! end_patch: "*** End Patch" LF?
//!
//! hunk: add_hunk | delete_hunk | update_hunk
//! add_hunk: "*** Add File: " filename LF add_line+
//! delete_hunk: "*** Delete File: " filename LF
//! update_hunk: "*** Update File: " filename LF change_move? change?
//! filename: /(.+)/
//! add_line: "+" /(.+)/ LF -> line
//!
//! change_move: "*** Move to: " filename LF
//! change: (change_context | change_line)+ eof_line?
//! change_context: ("@@" | "@@ " /(.+)/) LF
//! change_line: ("+" | "-" | " ") /(.+)/ LF
//! eof_line: "*** End of File" LF
//!
//! The parser below is a little more lenient than the explicit spec and allows for
//! leading/trailing whitespace around patch markers.
use crate::ApplyPatchArgs;
use std::path::Path;
use std::path::PathBuf;
use thiserror::Error;
const BEGIN_PATCH_MARKER: &str = "*** Begin Patch";
const END_PATCH_MARKER: &str = "*** End Patch";
const ADD_FILE_MARKER: &str = "*** Add File: ";
const DELETE_FILE_MARKER: &str = "*** Delete File: ";
const UPDATE_FILE_MARKER: &str = "*** Update File: ";
const MOVE_TO_MARKER: &str = "*** Move to: ";
const EOF_MARKER: &str = "*** End of File";
const CHANGE_CONTEXT_MARKER: &str = "@@ ";
const EMPTY_CHANGE_CONTEXT_MARKER: &str = "@@";
/// Currently, the only OpenAI model that knowingly requires lenient parsing is
/// gpt-4.1. While we could try to require everyone to pass in a strictness
/// param when invoking apply_patch, it is a pain to thread it through all of
/// the call sites, so we resign ourselves allowing lenient parsing for all
/// models. See [`ParseMode::Lenient`] for details on the exceptions we make for
/// gpt-4.1.
const PARSE_IN_STRICT_MODE: bool = false;
#[derive(Debug, PartialEq, Error, Clone)]
pub enum ParseError {
#[error("invalid patch: {0}")]
InvalidPatchError(String),
#[error("invalid hunk at line {line_number}, {message}")]
InvalidHunkError { message: String, line_number: usize },
}
use ParseError::*;
#[derive(Debug, PartialEq, Clone)]
#[allow(clippy::enum_variant_names)]
pub enum Hunk {
AddFile {
path: PathBuf,
contents: String,
},
DeleteFile {
path: PathBuf,
},
UpdateFile {
path: PathBuf,
move_path: Option,
/// Chunks should be in order, i.e. the `change_context` of one chunk
/// should occur later in the file than the previous chunk.
chunks: Vec,
},
}
impl Hunk {
pub fn resolve_path(&self, cwd: &Path) -> PathBuf {
match self {
Hunk::AddFile { path, .. } => cwd.join(path),
Hunk::DeleteFile { path } => cwd.join(path),
Hunk::UpdateFile { path, .. } => cwd.join(path),
}
}
}
use Hunk::*;
#[derive(Debug, PartialEq, Clone)]
pub struct UpdateFileChunk {
/// A single line of context used to narrow down the position of the chunk
/// (this is usually a class, method, or function definition.)
pub change_context: Option,
/// A contiguous block of lines that should be replaced with `new_lines`.
/// `old_lines` must occur strictly after `change_context`.
pub old_lines: Vec,
pub new_lines: Vec,
/// If set to true, `old_lines` must occur at the end of the source file.
/// (Tolerance around trailing newlines should be encouraged.)
pub is_end_of_file: bool,
}
pub fn parse_patch(patch: &str) -> Result {
let mode = if PARSE_IN_STRICT_MODE {
ParseMode::Strict
} else {
ParseMode::Lenient
};
parse_patch_text(patch, mode)
}
enum ParseMode {
/// Parse the patch text argument as is.
Strict,
/// GPT-4.1 is known to formulate the `command` array for the `local_shell`
/// tool call for `apply_patch` call using something like the following:
///
/// ```json
/// [
/// "apply_patch",
/// "<<'EOF'\n*** Begin Patch\n*** Update File: README.md\n@@...\n*** End Patch\nEOF\n",
/// ]
/// ```
///
/// This is a problem because `local_shell` is a bit of a misnomer: the
/// `command` is not invoked by passing the arguments to a shell like Bash,
/// but are invoked using something akin to `execvpe(3)`.
///
/// This is significant in this case because where a shell would interpret
/// `<<'EOF'...` as a heredoc and pass the contents via stdin (which is
/// fine, as `apply_patch` is specified to read from stdin if no argument is
/// passed), `execvpe(3)` interprets the heredoc as a literal string. To get
/// the `local_shell` tool to run a command the way shell would, the
/// `command` array must be something like:
///
/// ```json
/// [
/// "bash",
/// "-lc",
/// "apply_patch <<'EOF'\n*** Begin Patch\n*** Update File: README.md\n@@...\n*** End Patch\nEOF\n",
/// ]
/// ```
///
/// In lenient mode, we check if the argument to `apply_patch` starts with
/// `<<'EOF'` and ends with `EOF\n`. If so, we strip off these markers,
/// trim() the result, and treat what is left as the patch text.
Lenient,
}
fn parse_patch_text(patch: &str, mode: ParseMode) -> Result {
let lines: Vec<&str> = patch.trim().lines().collect();
let lines: &[&str] = match check_patch_boundaries_strict(&lines) {
Ok(()) => &lines,
Err(e) => match mode {
ParseMode::Strict => {
return Err(e);
}
ParseMode::Lenient => check_patch_boundaries_lenient(&lines, e)?,
},
};
let mut hunks: Vec = Vec::new();
// The above checks ensure that lines.len() >= 2.
let last_line_index = lines.len().saturating_sub(1);
let mut remaining_lines = &lines[1..last_line_index];
let mut line_number = 2;
while !remaining_lines.is_empty() {
let (hunk, hunk_lines) = parse_one_hunk(remaining_lines, line_number)?;
hunks.push(hunk);
line_number += hunk_lines;
remaining_lines = &remaining_lines[hunk_lines..]
}
let patch = lines.join("\n");
Ok(ApplyPatchArgs { hunks, patch })
}
/// Checks the start and end lines of the patch text for `apply_patch`,
/// returning an error if they do not match the expected markers.
fn check_patch_boundaries_strict(lines: &[&str]) -> Result<(), ParseError> {
let (first_line, last_line) = match lines {
[] => (None, None),
[first] => (Some(first), Some(first)),
[first, .., last] => (Some(first), Some(last)),
};
check_start_and_end_lines_strict(first_line, last_line)
}
/// If we are in lenient mode, we check if the first line starts with `<(
original_lines: &'a [&'a str],
original_parse_error: ParseError,
) -> Result<&'a [&'a str], ParseError> {
match original_lines {
[first, .., last] => {
if (first == &"<= 4
{
let inner_lines = &original_lines[1..original_lines.len() - 1];
match check_patch_boundaries_strict(inner_lines) {
Ok(()) => Ok(inner_lines),
Err(e) => Err(e),
}
} else {
Err(original_parse_error)
}
}
_ => Err(original_parse_error),
}
}
fn check_start_and_end_lines_strict(
first_line: Option<&&str>,
last_line: Option<&&str>,
) -> Result<(), ParseError> {
match (first_line, last_line) {
(Some(&first), Some(&last)) if first == BEGIN_PATCH_MARKER && last == END_PATCH_MARKER => {
Ok(())
}
(Some(&first), _) if first != BEGIN_PATCH_MARKER => Err(InvalidPatchError(String::from(
"The first line of the patch must be '*** Begin Patch'",
))),
_ => Err(InvalidPatchError(String::from(
"The last line of the patch must be '*** End Patch'",
))),
}
}
/// Attempts to parse a single hunk from the start of lines.
/// Returns the parsed hunk and the number of lines parsed (or a ParseError).
fn parse_one_hunk(lines: &[&str], line_number: usize) -> Result<(Hunk, usize), ParseError> {
// Be tolerant of case mismatches and extra padding around marker strings.
let first_line = lines[0].trim();
if let Some(path) = first_line.strip_prefix(ADD_FILE_MARKER) {
// Add File
let mut contents = String::new();
let mut parsed_lines = 1;
for add_line in &lines[1..] {
if let Some(line_to_add) = add_line.strip_prefix('+') {
contents.push_str(line_to_add);
contents.push('\n');
parsed_lines += 1;
} else {
break;
}
}
return Ok((
AddFile {
path: PathBuf::from(path),
contents,
},
parsed_lines,
));
} else if let Some(path) = first_line.strip_prefix(DELETE_FILE_MARKER) {
// Delete File
return Ok((
DeleteFile {
path: PathBuf::from(path),
},
1,
));
} else if let Some(path) = first_line.strip_prefix(UPDATE_FILE_MARKER) {
// Update File
let mut remaining_lines = &lines[1..];
let mut parsed_lines = 1;
// Optional: move file line
let move_path = remaining_lines
.first()
.and_then(|x| x.strip_prefix(MOVE_TO_MARKER));
if move_path.is_some() {
remaining_lines = &remaining_lines[1..];
parsed_lines += 1;
}
let mut chunks = Vec::new();
// NOTE: we need to know to stop once we reach the next special marker header.
while !remaining_lines.is_empty() {
// Skip over any completely blank lines that may separate chunks.
if remaining_lines[0].trim().is_empty() {
parsed_lines += 1;
remaining_lines = &remaining_lines[1..];
continue;
}
if remaining_lines[0].starts_with("***") {
break;
}
let (chunk, chunk_lines) = parse_update_file_chunk(
remaining_lines,
line_number + parsed_lines,
chunks.is_empty(),
)?;
chunks.push(chunk);
parsed_lines += chunk_lines;
remaining_lines = &remaining_lines[chunk_lines..]
}
if chunks.is_empty() {
return Err(InvalidHunkError {
message: format!("Update file hunk for path '{path}' is empty"),
line_number,
});
}
return Ok((
UpdateFile {
path: PathBuf::from(path),
move_path: move_path.map(PathBuf::from),
chunks,
},
parsed_lines,
));
}
Err(InvalidHunkError {
message: format!(
"'{first_line}' is not a valid hunk header. Valid hunk headers: '*** Add File: {{path}}', '*** Delete File: {{path}}', '*** Update File: {{path}}'"
),
line_number,
})
}
fn parse_update_file_chunk(
lines: &[&str],
line_number: usize,
allow_missing_context: bool,
) -> Result<(UpdateFileChunk, usize), ParseError> {
if lines.is_empty() {
return Err(InvalidHunkError {
message: "Update hunk does not contain any lines".to_string(),
line_number,
});
}
// If we see an explicit context marker @@ or @@ , consume it; otherwise, optionally
// allow treating the chunk as starting directly with diff lines.
let (change_context, start_index) = if lines[0] == EMPTY_CHANGE_CONTEXT_MARKER {
(None, 1)
} else if let Some(context) = lines[0].strip_prefix(CHANGE_CONTEXT_MARKER) {
(Some(context.to_string()), 1)
} else {
if !allow_missing_context {
return Err(InvalidHunkError {
message: format!(
"Expected update hunk to start with a @@ context marker, got: '{}'",
lines[0]
),
line_number,
});
}
(None, 0)
};
if start_index >= lines.len() {
return Err(InvalidHunkError {
message: "Update hunk does not contain any lines".to_string(),
line_number: line_number + 1,
});
}
let mut chunk = UpdateFileChunk {
change_context,
old_lines: Vec::new(),
new_lines: Vec::new(),
is_end_of_file: false,
};
let mut parsed_lines = 0;
for line in &lines[start_index..] {
match *line {
EOF_MARKER => {
if parsed_lines == 0 {
return Err(InvalidHunkError {
message: "Update hunk does not contain any lines".to_string(),
line_number: line_number + 1,
});
}
chunk.is_end_of_file = true;
parsed_lines += 1;
break;
}
line_contents => {
match line_contents.chars().next() {
None => {
// Interpret this as an empty line.
chunk.old_lines.push(String::new());
chunk.new_lines.push(String::new());
}
Some(' ') => {
chunk.old_lines.push(line_contents[1..].to_string());
chunk.new_lines.push(line_contents[1..].to_string());
}
Some('+') => {
chunk.new_lines.push(line_contents[1..].to_string());
}
Some('-') => {
chunk.old_lines.push(line_contents[1..].to_string());
}
_ => {
if parsed_lines == 0 {
return Err(InvalidHunkError {
message: format!(
"Unexpected line found in update hunk: '{line_contents}'. Every line should start with ' ' (context line), '+' (added line), or '-' (removed line)"
),
line_number: line_number + 1,
});
}
// Assume this is the start of the next hunk.
break;
}
}
parsed_lines += 1;
}
}
}
Ok((chunk, parsed_lines + start_index))
}
#[test]
fn test_parse_patch() {
assert_eq!(
parse_patch_text("bad", ParseMode::Strict),
Err(InvalidPatchError(
"The first line of the patch must be '*** Begin Patch'".to_string()
))
);
assert_eq!(
parse_patch_text("*** Begin Patch\nbad", ParseMode::Strict),
Err(InvalidPatchError(
"The last line of the patch must be '*** End Patch'".to_string()
))
);
assert_eq!(
parse_patch_text(
"*** Begin Patch\n\
*** Update File: test.py\n\
*** End Patch",
ParseMode::Strict
),
Err(InvalidHunkError {
message: "Update file hunk for path 'test.py' is empty".to_string(),
line_number: 2,
})
);
assert_eq!(
parse_patch_text(
"*** Begin Patch\n\
*** End Patch",
ParseMode::Strict
)
.unwrap()
.hunks,
Vec::new()
);
assert_eq!(
parse_patch_text(
"*** Begin Patch\n\
*** Add File: path/add.py\n\
+abc\n\
+def\n\
*** Delete File: path/delete.py\n\
*** Update File: path/update.py\n\
*** Move to: path/update2.py\n\
@@ def f():\n\
- pass\n\
+ return 123\n\
*** End Patch",
ParseMode::Strict
)
.unwrap()
.hunks,
vec![
AddFile {
path: PathBuf::from("path/add.py"),
contents: "abc\ndef\n".to_string()
},
DeleteFile {
path: PathBuf::from("path/delete.py")
},
UpdateFile {
path: PathBuf::from("path/update.py"),
move_path: Some(PathBuf::from("path/update2.py")),
chunks: vec![UpdateFileChunk {
change_context: Some("def f():".to_string()),
old_lines: vec![" pass".to_string()],
new_lines: vec![" return 123".to_string()],
is_end_of_file: false
}]
}
]
);
// Update hunk followed by another hunk (Add File).
assert_eq!(
parse_patch_text(
"*** Begin Patch\n\
*** Update File: file.py\n\
@@\n\
+line\n\
*** Add File: other.py\n\
+content\n\
*** End Patch",
ParseMode::Strict
)
.unwrap()
.hunks,
vec![
UpdateFile {
path: PathBuf::from("file.py"),
move_path: None,
chunks: vec![UpdateFileChunk {
change_context: None,
old_lines: vec![],
new_lines: vec!["line".to_string()],
is_end_of_file: false
}],
},
AddFile {
path: PathBuf::from("other.py"),
contents: "content\n".to_string()
}
]
);
// Update hunk without an explicit @@ header for the first chunk should parse.
// Use a raw string to preserve the leading space diff marker on the context line.
assert_eq!(
parse_patch_text(
r#"*** Begin Patch
*** Update File: file2.py
import foo
+bar
*** End Patch"#,
ParseMode::Strict
)
.unwrap()
.hunks,
vec![UpdateFile {
path: PathBuf::from("file2.py"),
move_path: None,
chunks: vec![UpdateFileChunk {
change_context: None,
old_lines: vec!["import foo".to_string()],
new_lines: vec!["import foo".to_string(), "bar".to_string()],
is_end_of_file: false,
}],
}]
);
}
#[test]
fn test_parse_patch_lenient() {
let patch_text = r#"*** Begin Patch
*** Update File: file2.py
import foo
+bar
*** End Patch"#;
let expected_patch = vec![UpdateFile {
path: PathBuf::from("file2.py"),
move_path: None,
chunks: vec![UpdateFileChunk {
change_context: None,
old_lines: vec!["import foo".to_string()],
new_lines: vec!["import foo".to_string(), "bar".to_string()],
is_end_of_file: false,
}],
}];
let expected_error =
InvalidPatchError("The first line of the patch must be '*** Begin Patch'".to_string());
let patch_text_in_heredoc = format!("< lines.len()` → returns `None` (cannot match, avoids
/// out‑of‑bounds panic that occurred pre‑2025‑04‑12)
pub(crate) fn seek_sequence(
lines: &[String],
pattern: &[String],
start: usize,
eof: bool,
) -> Option {
if pattern.is_empty() {
return Some(start);
}
// When the pattern is longer than the available input there is no possible
// match. Early‑return to avoid the out‑of‑bounds slice that would occur in
// the search loops below (previously caused a panic when
// `pattern.len() > lines.len()`).
if pattern.len() > lines.len() {
return None;
}
let search_start = if eof && lines.len() >= pattern.len() {
lines.len() - pattern.len()
} else {
start
};
// Exact match first.
for i in search_start..=lines.len().saturating_sub(pattern.len()) {
if lines[i..i + pattern.len()] == *pattern {
return Some(i);
}
}
// Then rstrip match.
for i in search_start..=lines.len().saturating_sub(pattern.len()) {
let mut ok = true;
for (p_idx, pat) in pattern.iter().enumerate() {
if lines[i + p_idx].trim_end() != pat.trim_end() {
ok = false;
break;
}
}
if ok {
return Some(i);
}
}
// Finally, trim both sides to allow more lenience.
for i in search_start..=lines.len().saturating_sub(pattern.len()) {
let mut ok = true;
for (p_idx, pat) in pattern.iter().enumerate() {
if lines[i + p_idx].trim() != pat.trim() {
ok = false;
break;
}
}
if ok {
return Some(i);
}
}
// ------------------------------------------------------------------
// Final, most permissive pass – attempt to match after *normalising*
// common Unicode punctuation to their ASCII equivalents so that diffs
// authored with plain ASCII characters can still be applied to source
// files that contain typographic dashes / quotes, etc. This mirrors the
// fuzzy behaviour of `git apply` which ignores minor byte-level
// differences when locating context lines.
// ------------------------------------------------------------------
fn normalise(s: &str) -> String {
s.trim()
.chars()
.map(|c| match c {
// Various dash / hyphen code-points → ASCII '-'
'\u{2010}' | '\u{2011}' | '\u{2012}' | '\u{2013}' | '\u{2014}' | '\u{2015}'
| '\u{2212}' => '-',
// Fancy single quotes → '\''
'\u{2018}' | '\u{2019}' | '\u{201A}' | '\u{201B}' => '\'',
// Fancy double quotes → '"'
'\u{201C}' | '\u{201D}' | '\u{201E}' | '\u{201F}' => '"',
// Non-breaking space and other odd spaces → normal space
'\u{00A0}' | '\u{2002}' | '\u{2003}' | '\u{2004}' | '\u{2005}' | '\u{2006}'
| '\u{2007}' | '\u{2008}' | '\u{2009}' | '\u{200A}' | '\u{202F}' | '\u{205F}'
| '\u{3000}' => ' ',
other => other,
})
.collect::()
}
for i in search_start..=lines.len().saturating_sub(pattern.len()) {
let mut ok = true;
for (p_idx, pat) in pattern.iter().enumerate() {
if normalise(&lines[i + p_idx]) != normalise(pat) {
ok = false;
break;
}
}
if ok {
return Some(i);
}
}
None
}
#[cfg(test)]
mod tests {
use super::seek_sequence;
fn to_vec(strings: &[&str]) -> Vec {
strings.iter().map(|s| s.to_string()).collect()
}
#[test]
fn test_exact_match_finds_sequence() {
let lines = to_vec(&["foo", "bar", "baz"]);
let pattern = to_vec(&["bar", "baz"]);
assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(1));
}
#[test]
fn test_rstrip_match_ignores_trailing_whitespace() {
let lines = to_vec(&["foo ", "bar\t\t"]);
// Pattern omits trailing whitespace.
let pattern = to_vec(&["foo", "bar"]);
assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(0));
}
#[test]
fn test_trim_match_ignores_leading_and_trailing_whitespace() {
let lines = to_vec(&[" foo ", " bar\t"]);
// Pattern omits any additional whitespace.
let pattern = to_vec(&["foo", "bar"]);
assert_eq!(seek_sequence(&lines, &pattern, 0, false), Some(0));
}
#[test]
fn test_pattern_longer_than_input_returns_none() {
let lines = to_vec(&["just one line"]);
let pattern = to_vec(&["too", "many", "lines"]);
// Should not panic – must return None when pattern cannot possibly fit.
assert_eq!(seek_sequence(&lines, &pattern, 0, false), None);
}
}
================================================
FILE: codex-rs/apply-patch/src/standalone_executable.rs
================================================
use std::io::Read;
use std::io::Write;
pub fn main() -> ! {
let exit_code = run_main();
std::process::exit(exit_code);
}
/// We would prefer to return `std::process::ExitCode`, but its `exit_process()`
/// method is still a nightly API and we want main() to return !.
pub fn run_main() -> i32 {
// Expect either one argument (the full apply_patch payload) or read it from stdin.
let mut args = std::env::args_os();
let _argv0 = args.next();
let patch_arg = match args.next() {
Some(arg) => match arg.into_string() {
Ok(s) => s,
Err(_) => {
eprintln!("Error: apply_patch requires a UTF-8 PATCH argument.");
return 1;
}
},
None => {
// No argument provided; attempt to read the patch from stdin.
let mut buf = String::new();
match std::io::stdin().read_to_string(&mut buf) {
Ok(_) => {
if buf.is_empty() {
eprintln!("Usage: apply_patch 'PATCH'\n echo 'PATCH' | apply-patch");
return 2;
}
buf
}
Err(err) => {
eprintln!("Error: Failed to read PATCH from stdin.\n{err}");
return 1;
}
}
}
};
// Refuse extra args to avoid ambiguity.
if args.next().is_some() {
eprintln!("Error: apply_patch accepts exactly one argument.");
return 2;
}
let mut stdout = std::io::stdout();
let mut stderr = std::io::stderr();
match crate::apply_patch(&patch_arg, &mut stdout, &mut stderr) {
Ok(()) => {
// Flush to ensure output ordering when used in pipelines.
let _ = stdout.flush();
0
}
Err(_) => 1,
}
}
================================================
FILE: codex-rs/apply-patch/tests/all.rs
================================================
// Single integration test binary that aggregates all test modules.
// The submodules live in `tests/suite/`.
mod suite;
================================================
FILE: codex-rs/apply-patch/tests/suite/cli.rs
================================================
use assert_cmd::prelude::*;
use std::fs;
use std::process::Command;
use tempfile::tempdir;
#[test]
fn test_apply_patch_cli_add_and_update() -> anyhow::Result<()> {
let tmp = tempdir()?;
let file = "cli_test.txt";
let absolute_path = tmp.path().join(file);
// 1) Add a file
let add_patch = format!(
r#"*** Begin Patch
*** Add File: {file}
+hello
*** End Patch"#
);
Command::cargo_bin("apply_patch")
.expect("should find apply_patch binary")
.arg(add_patch)
.current_dir(tmp.path())
.assert()
.success()
.stdout(format!("Success. Updated the following files:\nA {file}\n"));
assert_eq!(fs::read_to_string(&absolute_path)?, "hello\n");
// 2) Update the file
let update_patch = format!(
r#"*** Begin Patch
*** Update File: {file}
@@
-hello
+world
*** End Patch"#
);
Command::cargo_bin("apply_patch")
.expect("should find apply_patch binary")
.arg(update_patch)
.current_dir(tmp.path())
.assert()
.success()
.stdout(format!("Success. Updated the following files:\nM {file}\n"));
assert_eq!(fs::read_to_string(&absolute_path)?, "world\n");
Ok(())
}
#[test]
fn test_apply_patch_cli_stdin_add_and_update() -> anyhow::Result<()> {
let tmp = tempdir()?;
let file = "cli_test_stdin.txt";
let absolute_path = tmp.path().join(file);
// 1) Add a file via stdin
let add_patch = format!(
r#"*** Begin Patch
*** Add File: {file}
+hello
*** End Patch"#
);
let mut cmd =
assert_cmd::Command::cargo_bin("apply_patch").expect("should find apply_patch binary");
cmd.current_dir(tmp.path());
cmd.write_stdin(add_patch)
.assert()
.success()
.stdout(format!("Success. Updated the following files:\nA {file}\n"));
assert_eq!(fs::read_to_string(&absolute_path)?, "hello\n");
// 2) Update the file via stdin
let update_patch = format!(
r#"*** Begin Patch
*** Update File: {file}
@@
-hello
+world
*** End Patch"#
);
let mut cmd =
assert_cmd::Command::cargo_bin("apply_patch").expect("should find apply_patch binary");
cmd.current_dir(tmp.path());
cmd.write_stdin(update_patch)
.assert()
.success()
.stdout(format!("Success. Updated the following files:\nM {file}\n"));
assert_eq!(fs::read_to_string(&absolute_path)?, "world\n");
Ok(())
}
================================================
FILE: codex-rs/apply-patch/tests/suite/mod.rs
================================================
mod cli;
================================================
FILE: codex-rs/arg0/Cargo.toml
================================================
[package]
edition = "2024"
name = "codex-arg0"
version = { workspace = true }
[lib]
name = "codex_arg0"
path = "src/lib.rs"
[lints]
workspace = true
[dependencies]
anyhow = "1"
codex-apply-patch = { path = "../apply-patch" }
codex-core = { path = "../core" }
codex-linux-sandbox = { path = "../linux-sandbox" }
dotenvy = "0.15.7"
tempfile = "3"
tokio = { version = "1", features = ["rt-multi-thread"] }
================================================
FILE: codex-rs/arg0/src/lib.rs
================================================
use std::future::Future;
use std::path::Path;
use std::path::PathBuf;
use codex_core::CODEX_APPLY_PATCH_ARG1;
#[cfg(unix)]
use std::os::unix::fs::symlink;
use tempfile::TempDir;
const LINUX_SANDBOX_ARG0: &str = "codex-linux-sandbox";
const APPLY_PATCH_ARG0: &str = "apply_patch";
const MISSPELLED_APPLY_PATCH_ARG0: &str = "applypatch";
/// While we want to deploy the Codex CLI as a single executable for simplicity,
/// we also want to expose some of its functionality as distinct CLIs, so we use
/// the "arg0 trick" to determine which CLI to dispatch. This effectively allows
/// us to simulate deploying multiple executables as a single binary on Mac and
/// Linux (but not Windows).
///
/// When the current executable is invoked through the hard-link or alias named
/// `codex-linux-sandbox` we *directly* execute
/// [`codex_linux_sandbox::run_main`] (which never returns). Otherwise we:
///
/// 1. Use [`dotenvy::from_path`] and [`dotenvy::dotenv`] to modify the
/// environment before creating any threads.
/// 2. Construct a Tokio multi-thread runtime.
/// 3. Derive the path to the current executable (so children can re-invoke the
/// sandbox) when running on Linux.
/// 4. Execute the provided async `main_fn` inside that runtime, forwarding any
/// error. Note that `main_fn` receives `codex_linux_sandbox_exe:
/// Option`, as an argument, which is generally needed as part of
/// constructing [`codex_core::config::Config`].
///
/// This function should be used to wrap any `main()` function in binary crates
/// in this workspace that depends on these helper CLIs.
pub fn arg0_dispatch_or_else(main_fn: F) -> anyhow::Result<()>
where
F: FnOnce(Option) -> Fut,
Fut: Future